diff --git a/corrlib/find.py b/corrlib/find.py index 7b07321..4c51e05 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,12 +6,8 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from .integrity import has_valid_times from typing import Any, Optional from pathlib import Path -import datetime as dt -from collections.abc import Callable -import warnings def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -42,7 +38,7 @@ def _project_lookup_by_alias(db: Path, alias: str) -> str: return str(results[0][0]) -def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: +def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: """ Return the project information available in the database by UUID. @@ -66,56 +62,8 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: return results -def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: - """ - Filter the results from the database in terms of the creation and update times. - - Parameters - ---------- - results: pd.DataFrame - The dataframe holding the unfilteres results from the database. - created_before: str - Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly before the date and time given. - created_after: str - Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly after the date and time given. - updated_before: str - Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly before the date and time given. - updated_after: str - Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly after the date and time given. - """ - drops = [] - for ind in range(len(results)): - result = results.iloc[ind] - created_at = dt.datetime.fromisoformat(result['created_at']) - updated_at = dt.datetime.fromisoformat(result['updated_at']) - db_times_valid = has_valid_times(result) - if not db_times_valid: - raise ValueError('Time stamps not valid for result with path', result["path"]) - - if created_before is not None: - date_created_before = dt.datetime.fromisoformat(created_before) - if date_created_before < created_at: - drops.append(ind) - continue - if created_after is not None: - date_created_after = dt.datetime.fromisoformat(created_after) - if date_created_after > created_at: - drops.append(ind) - continue - if updated_before is not None: - date_updated_before = dt.datetime.fromisoformat(updated_before) - if date_updated_before < updated_at: - drops.append(ind) - continue - if updated_after is not None: - date_updated_after = dt.datetime.fromisoformat(updated_after) - if date_updated_after > updated_at: - drops.append(ind) - continue - return results.drop(drops) - - -def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None) -> pd.DataFrame: +def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, + created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: """ Look up a correlator record in the database by the data given to the method. @@ -157,84 +105,20 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project search_expr += f" AND code = '{code}'" if parameters: search_expr += f" AND parameters = '{parameters}'" + if created_before: + search_expr += f" AND created_at < '{created_before}'" + if created_after: + search_expr += f" AND created_at > '{created_after}'" + if updated_before: + search_expr += f" AND updated_at < '{updated_before}'" + if updated_after: + search_expr += f" AND updated_at > '{updated_after}'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) conn.close() return results -def _sfcf_drop(param: dict[str, Any], **kwargs: Any) -> bool: - if 'offset' in kwargs: - if kwargs.get('offset') != param['offset']: - return True - if 'quark_kappas' in kwargs: - kappas = kwargs['quark_kappas'] - if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): - return True - if 'quark_masses' in kwargs: - masses = kwargs['quark_masses'] - if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): - return True - if 'qk1' in kwargs: - quark_kappa1 = kwargs['qk1'] - if not isinstance(quark_kappa1, list): - if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): - return True - else: - if len(quark_kappa1) == 2: - if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): - return True - else: - raise ValueError("quark_kappa1 has to have length 2") - if 'qk2' in kwargs: - quark_kappa2 = kwargs['qk2'] - if not isinstance(quark_kappa2, list): - if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): - return True - else: - if len(quark_kappa2) == 2: - if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): - return True - else: - raise ValueError("quark_kappa2 has to have length 2") - if 'qm1' in kwargs: - quark_mass1 = kwargs['qm1'] - if not isinstance(quark_mass1, list): - if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): - return True - else: - if len(quark_mass1) == 2: - if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): - return True - else: - raise ValueError("quark_mass1 has to have length 2") - if 'qm2' in kwargs: - quark_mass2 = kwargs['qm2'] - if not isinstance(quark_mass2, list): - if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): - return True - else: - if len(quark_mass2) == 2: - if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): - return True - else: - raise ValueError("quark_mass2 has to have length 2") - if 'quark_thetas' in kwargs: - quark_thetas = kwargs['quark_thetas'] - if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): - return True - # careful, this is not save, when multiple contributions are present! - if 'wf1' in kwargs: - wf1 = kwargs['wf1'] - if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): - return True - if 'wf2' in kwargs: - wf2 = kwargs['wf2'] - if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): - return True - return False - - def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: r""" Filter method for the Database entries holding SFCF calculations. @@ -252,9 +136,9 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: qk2: float, optional Mass parameter $\kappa_2$ of the first quark. qm1: float, optional - Bare quark mass $m_1$ of the first quark. + Bare quak mass $m_1$ of the first quark. qm2: float, optional - Bare quark mass $m_2$ of the first quark. + Bare quak mass $m_1$ of the first quark. quarks_thetas: list[list[float]], optional wf1: optional wf2: optional @@ -264,80 +148,101 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: results: pd.DataFrame The filtered DataFrame, only holding the records that fit to the parameters given. """ - drops = [] for ind in range(len(results)): result = results.iloc[ind] param = json.loads(result['parameters']) - if _sfcf_drop(param, **kwargs): - drops.append(ind) + if 'offset' in kwargs: + if kwargs.get('offset') != param['offset']: + drops.append(ind) + continue + if 'quark_kappas' in kwargs: + kappas = kwargs['quark_kappas'] + if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): + drops.append(ind) + continue + if 'quark_masses' in kwargs: + masses = kwargs['quark_masses'] + if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): + drops.append(ind) + continue + if 'qk1' in kwargs: + quark_kappa1 = kwargs['qk1'] + if not isinstance(quark_kappa1, list): + if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): + drops.append(ind) + continue + else: + if len(quark_kappa1) == 2: + if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): + drops.append(ind) + continue + if 'qk2' in kwargs: + quark_kappa2 = kwargs['qk2'] + if not isinstance(quark_kappa2, list): + if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): + drops.append(ind) + continue + else: + if len(quark_kappa2) == 2: + if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): + drops.append(ind) + continue + if 'qm1' in kwargs: + quark_mass1 = kwargs['qm1'] + if not isinstance(quark_mass1, list): + if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): + drops.append(ind) + continue + else: + if len(quark_mass1) == 2: + if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): + drops.append(ind) + continue + if 'qm2' in kwargs: + quark_mass2 = kwargs['qm2'] + if not isinstance(quark_mass2, list): + if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): + drops.append(ind) + continue + else: + if len(quark_mass2) == 2: + if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): + drops.append(ind) + continue + if 'quark_thetas' in kwargs: + quark_thetas = kwargs['quark_thetas'] + if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): + drops.append(ind) + continue + # careful, this is not save, when multiple contributions are present! + if 'wf1' in kwargs: + wf1 = kwargs['wf1'] + if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): + drops.append(ind) + continue + if 'wf2' in kwargs: + wf2 = kwargs['wf2'] + if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): + drops.append(ind) + continue return results.drop(drops) -def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: - """ - Filter for parameters of openQCD. - - Parameters - ---------- - results: pd.DataFrame - The unfiltered list of results from the database. - - Returns - ------- - results: pd.DataFrame - The filtered results. - - """ - warnings.warn("A filter for openQCD parameters is no implemented yet.", Warning) - - return results - - -def _code_filter(results: pd.DataFrame, code: str, **kwargs: Any) -> pd.DataFrame: - """ - Abstraction of the filters for the different codes that are available. - At the moment, only openQCD and SFCF are known. - The possible key words for the parameters can be seen in the descriptionso f the code-specific filters. - - Parameters - ---------- - results: pd.DataFrame - The unfiltered list of results from the database. - code: str - The name of the code that produced the record at hand. - kwargs: - The keyworkd args that are handed over to the code-specific filters. - - Returns - ------- - results: pd.DataFrame - The filtered results. - """ - if code == "sfcf": - return sfcf_filter(results, **kwargs) - elif code == "openQCD": - return openQCD_filter(results, **kwargs) - else: - raise ValueError(f"Code {code} is not known.") - - def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, - created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, - revision: Optional[str]=None, - customFilter: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, - **kwargs: Any) -> pd.DataFrame: + created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: db_file = get_db_file(path) db = path / db_file if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) - results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) - if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): - results = _time_filter(results, created_before, created_after, updated_before, updated_after) - results = _code_filter(results, code, **kwargs) - if customFilter is not None: - results = customFilter(results) + results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after) + if code == "sfcf": + results = sfcf_filter(results, **kwargs) + elif code == "openQCD": + pass + else: + raise Exception print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() diff --git a/corrlib/integrity.py b/corrlib/integrity.py deleted file mode 100644 index d865944..0000000 --- a/corrlib/integrity.py +++ /dev/null @@ -1,45 +0,0 @@ -import datetime as dt -from pathlib import Path -from .tools import get_db_file -import pandas as pd -import sqlite3 - - -def has_valid_times(result: pd.Series) -> bool: - # we expect created_at <= updated_at <= now - created_at = dt.datetime.fromisoformat(result['created_at']) - updated_at = dt.datetime.fromisoformat(result['updated_at']) - if created_at > updated_at: - return False - if updated_at > dt.datetime.now(): - return False - return True - -def are_keys_unique(db: Path, table: str, col: str) -> bool: - conn = sqlite3.connect(db) - c = conn.cursor() - c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") - results = c.fetchall()[0] - conn.close() - return bool(results[0] == results[1]) - - -def check_db_integrity(path: Path) -> None: - db = get_db_file(path) - - if not are_keys_unique(db, 'backlogs', 'path'): - raise Exception("The paths the backlog table of the database links are not unique.") - - search_expr = "SELECT * FROM 'backlogs'" - conn = sqlite3.connect(db) - results = pd.read_sql(search_expr, conn) - - for _, result in results.iterrows(): - if not has_valid_times(result): - raise ValueError(f"Result with id {result[id]} has wrong time signatures.") - - - -def full_integrity_check(path: Path) -> None: - check_db_integrity(path) - diff --git a/tests/find_test.py b/tests/find_test.py index cc455f9..b63b246 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -3,9 +3,6 @@ import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest -import pandas as pd -import datalad.api as dl -import datetime as dt def make_sql(path: Path) -> Path: @@ -13,7 +10,6 @@ def make_sql(path: Path) -> Path: cinit._create_db(db) return db - def test_find_lookup_by_one_alias(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -35,398 +31,3 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() - -def test_find_lookup_by_id(tmp_path: Path) -> None: - db = make_sql(tmp_path) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - conn.close() - result = find._project_lookup_by_id(db, uuid)[0] - assert uuid == result[0] - assert alias_str == result[1] - assert tag_str == result[2] - assert owner == result[3] - assert code == result[4] - - -def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf0", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf1", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-04-14 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf3", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf4", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf5", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... - record_G = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', str(dt.datetime.now() + dt.timedelta(days=2, hours=3, minutes=5, seconds=30))] # created and updated later - - data = [record_A, record_B, record_C, record_D, record_E] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') - assert len(results) == 3 - results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') - assert len(results) == 3 - results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') - assert len(results) == 1 - - results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') - assert len(results) == 2 - results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') - assert len(results) == 2 - results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') - assert len(results) == 4 - - results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') - assert len(results) == 3 - results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') - assert len(results) == 3 - results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') - assert len(results) == 1 - - results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') - assert len(results) == 5 - results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') - assert results.empty - results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') - assert len(results) == 2 - results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') - assert len(results) == 2 - results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') - assert len(results) == 4 - - data = [record_A, record_B, record_C, record_D, record_F] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - with pytest.raises(ValueError): - results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') - - data = [record_A, record_B, record_C, record_D, record_G] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - with pytest.raises(ValueError): - results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') - - -def test_db_lookup(tmp_path: Path) -> None: - db = make_sql(tmp_path) - conn = sqlite3.connect(db) - c = conn.cursor() - - corr = "f_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 5.0}" - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert results.empty - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, parameters = '{"par_A": 3.0, "par_B": 4.0}') - assert results.empty - - corr = "g_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = '{"par_A": 3.0, "par_B": 4.0}' - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - corr = "f_A" - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, parameters = '{"par_A": 3.0, "par_B": 4.0}') - assert len(results) == 1 - - conn.close() - - -def test_sfcf_drop() -> None: - parameters0 = { - 'offset': [0,0,0], - 'quarks': [{'mass': 1, 'thetas': [0,0,0]}, {'mass': 2, 'thetas': [0,0,1]}], # m0s = -3.5, -3.75 - 'wf1': [[1, [0, 0]], [0.5, [1, 0]], [.75, [.5, .5]]], - 'wf2': [[1, [2, 1]], [2, [0.5, -0.5]], [.5, [.75, .72]]], - } - - assert not find._sfcf_drop(parameters0, offset=[0,0,0]) - assert find._sfcf_drop(parameters0, offset=[1,0,0]) - - assert not find._sfcf_drop(parameters0, quark_kappas = [1, 2]) - assert find._sfcf_drop(parameters0, quark_kappas = [-3.1, -3.72]) - - assert not find._sfcf_drop(parameters0, quark_masses = [-3.5, -3.75]) - assert find._sfcf_drop(parameters0, quark_masses = [-3.1, -3.72]) - - assert not find._sfcf_drop(parameters0, qk1 = 1) - assert not find._sfcf_drop(parameters0, qk2 = 2) - assert find._sfcf_drop(parameters0, qk1 = 2) - assert find._sfcf_drop(parameters0, qk2 = 1) - - assert not find._sfcf_drop(parameters0, qk1 = [0.5,1.5]) - assert not find._sfcf_drop(parameters0, qk2 = [1.5,2.5]) - assert find._sfcf_drop(parameters0, qk1 = 2) - assert find._sfcf_drop(parameters0, qk2 = 1) - with pytest.raises(ValueError): - assert not find._sfcf_drop(parameters0, qk1 = [0.5,1,5]) - with pytest.raises(ValueError): - assert not find._sfcf_drop(parameters0, qk2 = [1,5,2.5]) - - assert find._sfcf_drop(parameters0, qm1 = 1.2) - assert find._sfcf_drop(parameters0, qm2 = 2.2) - assert not find._sfcf_drop(parameters0, qm1 = -3.5) - assert not find._sfcf_drop(parameters0, qm2 = -3.75) - - assert find._sfcf_drop(parameters0, qm2 = 1.2) - assert find._sfcf_drop(parameters0, qm1 = 2.2) - with pytest.raises(ValueError): - assert not find._sfcf_drop(parameters0, qm1 = [0.5,1,5]) - with pytest.raises(ValueError): - assert not find._sfcf_drop(parameters0, qm2 = [1,5,2.5]) - - -def test_openQCD_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - with pytest.warns(Warning): - find.openQCD_filter(df, a = "asdf") - - -def test_code_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_4 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_5 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_6 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_7 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_8 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - res = find._code_filter(df, "sfcf") - assert len(res) == 4 - - data = [ - record_4, - record_5, - record_6, - record_7, - record_8, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - res = find._code_filter(df, "openQCD") - assert len(res) == 5 - with pytest.raises(ValueError): - res = find._code_filter(df, "asdf") - - -def test_find_record() -> None: - assert True - - -def test_find_project(tmp_path: Path) -> None: - cinit.create(tmp_path) - db = tmp_path / "backlogger.db" - dl.unlock(str(db), dataset=str(tmp_path)) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - assert uuid == find.find_project(tmp_path, "fun_project") - - uuid = "test_uuid2" - alias_str = "fun_project" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - with pytest.raises(Exception): - assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") - conn.close() - - -def test_list_projects(tmp_path: Path) -> None: - cinit.create(tmp_path) - db = tmp_path / "backlogger.db" - dl.unlock(str(db), dataset=str(tmp_path)) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - uuid = "test_uuid2" - alias_str = "fun_project2" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - uuid = "test_uuid3" - alias_str = "fun_project3" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - uuid = "test_uuid4" - alias_str = "fun_project4" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - conn.close() - results = find.list_projects(tmp_path) - assert len(results) == 4 - for i in range(4): - assert len(results[i]) == 2