diff --git a/corrlib/find.py b/corrlib/find.py index 0c5f9a6..3e73240 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -5,7 +5,7 @@ import json import pandas as pd import numpy as np from .input.implementations import codes -from .tools import k2m +from .tools import k2m, get_file # this will implement the search functionality @@ -143,21 +143,22 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= db = path + '/backlogger.db' if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) - if os.path.exists(db): - dl.get(db, dataset=path) + get_file(path, "backlogger.db") results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after, revision=revision) if code == "sfcf": results = sfcf_filter(results, **kwargs) - print("Found " + str(len(results)) + " results") + print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() -def find_project(db, name): - return _project_lookup_by_alias(db, name) +def find_project(path, name): + get_file(path, "backlogger.db") + return _project_lookup_by_alias(os.path.join(path, "backlogger.db"), name) def list_projects(path): db = path + '/backlogger.db' + get_file(path, "backlogger.db") conn = sqlite3.connect(db) c = conn.cursor() c.execute("SELECT id,aliases FROM projects") diff --git a/corrlib/main.py b/corrlib/main.py index defe03a..fc7663f 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -5,7 +5,7 @@ import os from .git_tools import move_submodule import shutil from .find import _project_lookup_by_id -from .tools import list2str, str2list +from .tools import list2str, str2list, get_file from typing import Union @@ -24,13 +24,15 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni code: str (optional) The code that was used to create the measurements. """ - conn = sqlite3.connect(path + "/backlogger.db") + db = path + "/backlogger.db" + get_file(path, "backlogger.db") + conn = sqlite3.connect(db) c = conn.cursor() known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,)) if known_projects.fetchone(): raise ValueError("Project already imported, use update_project() instead.") - dl.unlock(path + "/backlogger.db", dataset=path) + dl.unlock(db, dataset=path) alias_str = None if aliases is not None: alias_str = list2str(aliases) @@ -40,11 +42,12 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() conn.close() - dl.save(path + "/backlogger.db", message="Added entry for project " + uuid + " to database", dataset=path) + dl.save(db, message="Added entry for project " + uuid + " to database", dataset=path) -def update_project_data(db, uuid, prop, value = None): - conn = sqlite3.connect(db) +def update_project_data(path, uuid, prop, value = None): + get_file(path, "backlogger.db") + conn = sqlite3.connect(os.path.join(path, "backlogger.db")) c = conn.cursor() c.execute(f"UPDATE projects SET '{prop}' = '{value}' WHERE id == '{uuid}'") conn.commit() @@ -54,6 +57,7 @@ def update_project_data(db, uuid, prop, value = None): def update_aliases(path: str, uuid: str, aliases: list[str]): db = os.path.join(path, "backlogger.db") + get_file(path, "backlogger.db") known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] @@ -73,7 +77,7 @@ def update_aliases(path: str, uuid: str, aliases: list[str]): if not len(new_alias_list) == len(known_alias_list): alias_str = list2str(new_alias_list) dl.unlock(db, dataset=path) - update_project_data(db, uuid, "aliases", alias_str) + update_project_data(path, uuid, "aliases", alias_str) dl.save(db, dataset=path) return @@ -117,11 +121,13 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio if not uuid: raise ValueError("The dataset does not have a uuid!") if not os.path.exists(path + "/projects/" + uuid): - dl.unlock(path + "/backlogger.db", dataset=path) + db = path + "/backlogger.db" + get_file(path, "backlogger.db") + dl.unlock(db, dataset=path) create_project(path, uuid, owner, tags, aliases, code) move_submodule(path, 'projects/tmp', 'projects/' + uuid) os.mkdir(path + '/import_scripts/' + uuid) - dl.save([path + "/backlogger.db", path + '/projects/' + uuid], message="Import project from " + url, dataset=path) + dl.save([db, path + '/projects/' + uuid], message="Import project from " + url, dataset=path) else: dl.drop(tmp_path, reckless='kill') shutil.rmtree(tmp_path) @@ -134,3 +140,11 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio # make this more concrete return uuid + + +def drop_project_data(path: str, uuid: str, path_in_project: str = ""): + """ + Drop (parts of) a prject to free up diskspace + """ + dl.drop(path + "/projects/" + uuid + "/" + path_in_project) + diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a78f6f2..80925e1 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -7,7 +7,7 @@ import json from typing import Union, Optional from pyerrors import Obs, Corr, load_object, dump_object from hashlib import sha256, sha1 -from .tools import cached, record2name_key, name_key2record, make_version_hash +from .tools import cached, get_file, record2name_key, name_key2record, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash @@ -28,6 +28,7 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O The uuid of the project. """ db = os.path.join(path, 'backlogger.db') + get_file(path, "backlogger.db") dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() @@ -159,8 +160,9 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni def preload(path: str, file: str): - dl.get(os.path.join(path, file), dataset=path) + get_file(path, file) filedict = pj.load_json_dict(os.path.join(path, file)) + print("> read file") return filedict @@ -168,6 +170,7 @@ def drop_record(path: str, meas_path: str): file_in_archive, sub_key = record2name_key(meas_path) file = os.path.join(path, file_in_archive) db = os.path.join(path, 'backlogger.db') + get_file(path, 'backlogger.db') dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() diff --git a/corrlib/tools.py b/corrlib/tools.py index 43ab1ba..337912e 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,5 +1,6 @@ -import hashlib import os +import datalad.api as dl +import hashlib def str2list(string): return string.split(",") @@ -16,20 +17,3 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 - - -def record2name_key(record_path: str): - file = record_path.split("::")[0] - key = record_path.split("::")[1] - return file, key - - -def name_key2record(name: str, key: str): - return name + "::" + key - - -def make_version_hash(path, record): - file, key = record2name_key(record) - with open(os.path.join(path, file), 'rb') as fp: - file_hash = hashlib.file_digest(fp, 'sha1').hexdigest() - return file_hash diff --git a/corrlib/version.py b/corrlib/version.py index bbab024..d31c31e 100644 --- a/corrlib/version.py +++ b/corrlib/version.py @@ -1 +1 @@ -__version__ = "0.1.4" +__version__ = "0.2.3" diff --git a/examples/example_import.toml b/examples/example_import.toml new file mode 100644 index 0000000..f0965fc --- /dev/null +++ b/examples/example_import.toml @@ -0,0 +1,28 @@ +['project'] +url = "git@kuhl-mann.de:lattice/cA_data.git" +code = "sfcf" +[measurements] +[measurements.a] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] +[measurements.b] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] +[measurements.c] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ed2df7b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = ["setuptools >= 63.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.ruff.lint] +ignore = ["F403"] \ No newline at end of file diff --git a/tests/import_project_test.py b/tests/import_project_test.py new file mode 100644 index 0000000..ec8272c --- /dev/null +++ b/tests/import_project_test.py @@ -0,0 +1,17 @@ +import corrlib.toml as t + + +def test_toml_check_measurement_data(): + measurements = { + "a": + { + "path": "/path/to/measurement", + "ensemble": "A1k1", + "param_file": "/path/to/file", + "version": "1.1", + "prefix": "pref", + "cfg_seperator": "n", + "names": ['list', 'of', 'names'] + } + } + t.check_measurement_data(measurements)