diff --git a/corrlib/find.py b/corrlib/find.py index 3cbe09b..dd3a9a6 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,7 +6,6 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from .integrity import check_time_validity from typing import Any, Optional, Union from pathlib import Path import datetime as dt @@ -71,9 +70,6 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) - db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) - if not db_times_valid: - raise ValueError('Time stamps not valid for result with path', result["path"]) if created_before is not None: date_created_before = dt.datetime.fromisoformat(created_before) diff --git a/corrlib/integrity.py b/corrlib/integrity.py deleted file mode 100644 index bf890db..0000000 --- a/corrlib/integrity.py +++ /dev/null @@ -1,5 +0,0 @@ -import datetime as dt - - -def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: - return not (created_at > updated_at) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 48017a1..be80b6f 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], str(cache_path(path, file, key))) + dump_object(preloaded[file][key], cache_path(path, file, key)) return returned_data @@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path: The path holding the cached data for the given file. """ cache_path_list = file.split("/")[1:] - cache_path = Path(path) / CACHE_DIR + cache_path = path / CACHE_DIR for directory in cache_path_list: cache_path /= directory return cache_path @@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) + filedict: dict[str, Any] = pj.load_json_dict(path / file) print("> read file") return filedict diff --git a/tests/find_test.py b/tests/find_test.py index 573f87e..e895b85 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -3,8 +3,6 @@ import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest -import pandas as pd -import datalad.api as dl def make_sql(path: Path) -> Path: @@ -36,34 +34,6 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: conn.close() -def test_find_project(tmp_path: Path) -> None: - cinit.create(tmp_path) - db = tmp_path / "backlogger.db" - dl.unlock(str(db), dataset=str(tmp_path)) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - assert uuid == find.find_project(tmp_path, "fun_project") - - uuid = "test_uuid2" - alias_str = "fun_project" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - with pytest.raises(Exception): - assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") - conn.close() - - def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -152,89 +122,3 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 conn.close() - - -def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... - - data = [record_A, record_B, record_C, record_D, record_E] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') - assert len(results) == 3 - results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') - assert len(results) == 3 - results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') - assert len(results) == 1 - - results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') - assert len(results) == 2 - results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') - assert len(results) == 2 - results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') - assert len(results) == 4 - - results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') - assert len(results) == 3 - results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') - assert len(results) == 3 - results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') - assert len(results) == 1 - - results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') - assert len(results) == 2 - results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') - assert len(results) == 2 - results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') - assert len(results) == 4 - - data = [record_A, record_B, record_C, record_D, record_F] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - with pytest.raises(ValueError): - results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')