Compare commits

..

15 commits

30 changed files with 323 additions and 3392 deletions

View file

@ -1,30 +0,0 @@
name: Mypy
on:
push:
pull_request:
workflow_dispatch:
jobs:
mypy:
runs-on: ubuntu-latest
env:
UV_CACHE_DIR: /tmp/.uv-cache
steps:
- name: Install git-annex
run: |
sudo apt-get update
sudo apt-get install -y git-annex
- name: Check out the repository
uses: https://github.com/RouxAntoine/checkout@v4.1.8
with:
show-progress: true
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install corrlib
run: uv sync --locked --all-extras --dev --python "3.12"
- name: Run tests
run: uv run mypy corrlib

View file

@ -1,39 +0,0 @@
name: Pytest
on:
push:
pull_request:
workflow_dispatch:
schedule:
- cron: '0 4 1 * *'
jobs:
pytest:
strategy:
matrix:
python-version:
- "3.12"
- "3.13"
- "3.14"
runs-on: ubuntu-latest
env:
UV_CACHE_DIR: /tmp/.uv-cache
steps:
- name: Install git-annex
run: |
sudo apt-get update
sudo apt-get install -y git-annex
- name: Check out the repository
uses: https://github.com/RouxAntoine/checkout@v4.1.8
with:
show-progress: true
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install corrlib
run: uv sync --locked --all-extras --dev --python ${{ matrix.python-version }}
- name: Run tests
run: uv run pytest --cov=corrlib tests

View file

@ -1,30 +0,0 @@
name: Ruff
on:
push:
pull_request:
workflow_dispatch:
jobs:
ruff:
runs-on: ubuntu-latest
env:
UV_CACHE_DIR: /tmp/.uv-cache
steps:
- name: Install git-annex
run: |
sudo apt-get update
sudo apt-get install -y git-annex
- name: Check out the repository
uses: https://github.com/RouxAntoine/checkout@v4.1.8
with:
show-progress: true
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install corrlib
run: uv sync --locked --all-extras --dev --python "3.12"
- name: Run tests
run: uv run ruff check corrlib

5
.gitignore vendored
View file

@ -2,7 +2,4 @@ pyerrors_corrlib.egg-info
__pycache__ __pycache__
*.egg-info *.egg-info
test.ipynb test.ipynb
.vscode test_ds
.venv
.pytest_cache
.coverage

5
.gitmodules vendored Normal file
View file

@ -0,0 +1,5 @@
[submodule "projects/tmp"]
path = projects/tmp
url = git@kuhl-mann.de:lattice/charm_SF_data.git
datalad-id = 5f402163-77f2-470e-b6f1-64d7bf9f87d4
datalad-url = git@kuhl-mann.de:lattice/charm_SF_data.git

View file

@ -5,3 +5,12 @@ This is done in a reproducible way using `datalad`.
In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together. In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together.
Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements. Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements.
The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors. The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors.
We work with the following nomenclature in this project:
- Measurement
A setis of Observables, including the appropriate metadata.
- Project
A series of measurements that was done by one person as part of their research.
- Record
An entry of a single Correlator in the database of the backlogger.
-

25
TODO.md
View file

@ -1,14 +1,21 @@
# TODO # TODO
## Features ## Features
- implement import of non-datalad projects - [ ] implement import of non-datalad projects
- implement a way to use another backlog repo as a project - [ ] implement a way to use another backlog repo as a project
- [ ] make cache deadlock resistent (no read while writing)
- find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project - [ ] find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project
- this could e.g. be done along the lines of mandatory documentation - [ ] this could e.g. be done along the lines of mandatory documentation
- keep better track of the versions of the code, that was used for a specific measurement. - [ ] keep better track of the versions of the code, that was used for a specific measurement.
- maybe let this be an input in the project file? - [ ] maybe let this be an input in the project file?
- git repo and commit hash/version tag - [ ] git repo and commit hash/version tag
- [ ] implement a code table?
- [ ] parallel processing of measurements
- [ ] extra SQL table for ensembles with UUID and aliases
## Bugfixes ## Bugfixes
- [ ] revisit the reimport function for single files - [ ] revisit the reimport function for single files
- [ ] drop record needs to look if no records are left in a json file.
## Rough Ideas
- [ ] multitable could provide a high speed implementation of an HDF5 based format
- [ ] implement also a way to include compiled binaries in the archives.

View file

@ -1,5 +1,5 @@
""" """
The aim of this project is to extend pyerrors to be able to collect measurements from different projects and make them easily accessable to The aim of this project is to extend pyerrors to be able to collect measurements from different projects and make them easily accessable to
the research group. The idea is to build a database, in which the researcher can easily search for measurements on a correlator basis, the research group. The idea is to build a database, in which the researcher can easily search for measurements on a correlator basis,
which may be reusable. which may be reusable.
As a standard to store the measurements, we will use the .json.gz format from pyerrors. As a standard to store the measurements, we will use the .json.gz format from pyerrors.
@ -15,10 +15,10 @@ For now, we are interested in collecting primary IObservables only, as these are
__app_name__ = "corrlib" __app_name__ = "corrlib"
from .main import *
from .import input as input from .import input as input
from .initialization import create as create from .initialization import *
from .meas_io import load_record as load_record from .meas_io import *
from .meas_io import load_records as load_records from .cache_io import *
from .find import find_project as find_project from .find import *
from .find import find_record as find_record from .version import __version__
from .find import list_projects as list_projects

View file

@ -1,9 +1,8 @@
from corrlib import cli, __app_name__ from corrlib import cli, __app_name__
def main() -> None: def main():
cli.app(prog_name=__app_name__) cli.app(prog_name=__app_name__)
return
if __name__ == "__main__": if __name__ == "__main__":

58
corrlib/cache_io.py Normal file
View file

@ -0,0 +1,58 @@
from typing import Union, Optional
import os
import shutil
from .tools import record2name_key
from pyerrors import dump_object
import datalad.api as dl
import sqlite3
def get_version_hash(path, record):
db = os.path.join(path, "backlogger.db")
dl.get(db, dataset=path)
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'")
return c.fetchall()[0][0]
def drop_cache_files(path: str, fs: Optional[list[str]]=None):
cache_dir = os.path.join(path, ".cache")
if fs is None:
fs = os.listdir(cache_dir)
for f in fs:
shutil.rmtree(os.path.join(cache_dir, f))
def cache_dir(path, file):
cache_path_list = [path]
cache_path_list.append(".cache")
cache_path_list.extend(file.split("/")[1:])
cache_path = "/".join(cache_path_list)
return cache_path
def cache_path(path, file, sha_hash, key):
cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash)
return cache_path
def is_old_version(path, record):
version_hash = get_version_hash(path, record)
file, key = record2name_key(record)
meas_cache_path = os.path.join(cache_dir(path, file))
ls = []
for p, ds, fs in os.walk(meas_cache_path):
ls.extend(fs)
for filename in ls:
if key == filename.split("_")[0]:
if not version_hash == filename.split("_")[1][:-2]:
return True
else:
return False
def is_in_cache(path, record):
version_hash = get_version_hash(path, record)
file, key = record2name_key(record)
return os.path.exists(cache_path(path, file, version_hash, key) + ".p")

View file

@ -1,14 +1,13 @@
from typing import Optional from typing import Optional
import typer import typer
from corrlib import __app_name__ from corrlib import __app_name__, __version__
from .initialization import create from .initialization import create
from .toml import import_tomls, update_project, reimport_project from .toml import import_tomls, update_project, reimport_project
from .find import find_record, list_projects from .find import find_record, list_projects
from .tools import str2list from .tools import str2list
from .main import update_aliases from .main import update_aliases
from .meas_io import drop_cache as mio_drop_cache from .cache_io import drop_cache_files as cio_drop_cache_files
import os import os
from importlib.metadata import version
app = typer.Typer() app = typer.Typer()
@ -16,7 +15,7 @@ app = typer.Typer()
def _version_callback(value: bool) -> None: def _version_callback(value: bool) -> None:
if value: if value:
print(__app_name__, version(__app_name__)) typer.echo(f"{__app_name__} v{__version__}")
raise typer.Exit() raise typer.Exit()
@ -26,9 +25,9 @@ def update(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
uuid: str = typer.Argument(), uuid: str = typer.Argument(),
) -> None: ) -> None:
""" """
Update a project by it's UUID. Update a project by it's UUID.
""" """
@ -43,7 +42,7 @@ def list(
"-d", "-d",
), ),
entities: str = typer.Argument('ensembles'), entities: str = typer.Argument('ensembles'),
) -> None: ) -> None:
""" """
List entities. (ensembles, projects) List entities. (ensembles, projects)
""" """
@ -72,10 +71,10 @@ def alias_add(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
uuid: str = typer.Argument(), uuid: str = typer.Argument(),
alias: str = typer.Argument(), alias: str = typer.Argument(),
) -> None: ) -> None:
""" """
Add an alias to a project UUID. Add an alias to a project UUID.
""" """
@ -90,11 +89,11 @@ def find(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
ensemble: str = typer.Argument(), ensemble: str = typer.Argument(),
corr: str = typer.Argument(), corr: str = typer.Argument(),
code: str = typer.Argument(), code: str = typer.Argument(),
) -> None: ) -> None:
""" """
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
""" """
@ -108,15 +107,15 @@ def importer(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
files: str = typer.Argument( files: str = typer.Argument(
), ),
copy_file: bool = typer.Option( copy_file: bool = typer.Option(
bool(True), bool(True),
"--save", "--save",
"-s", "-s",
), ),
) -> None: ) -> None:
""" """
Import a project from a .toml-file via CLI. Import a project from a .toml-file via CLI.
""" """
@ -152,17 +151,12 @@ def init(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
tracker: str = typer.Option( ) -> None:
str('datalad'),
"--tracker",
"-t",
),
) -> None:
""" """
Initialize a new backlog-database. Initialize a new backlog-database.
""" """
create(path, tracker) create(path)
return return
@ -172,12 +166,12 @@ def drop_cache(
str('./corrlib'), str('./corrlib'),
"--dataset", "--dataset",
"-d", "-d",
), ),
) -> None: ) -> None:
""" """
Drop the currect cache directory of the dataset. Drop the currect cache directory of the dataset.
""" """
mio_drop_cache(path) cio_drop_cache_files(path)
return return
@ -190,6 +184,6 @@ def main(
help="Show the application's version and exit.", help="Show the application's version and exit.",
callback=_version_callback, callback=_version_callback,
is_eager=True, is_eager=True,
) )
) -> None: ) -> None:
return return

View file

@ -1,31 +1,29 @@
import sqlite3 import sqlite3
import datalad.api as dl
import os import os
import json import json
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from .input.implementations import codes from .input.implementations import codes
from .tools import k2m, get_db_file from .tools import k2m, get_file
from .tracker import get
from typing import Any, Optional
# this will implement the search functionality # this will implement the search functionality
def _project_lookup_by_alias(db: str, alias: str) -> str: def _project_lookup_by_alias(db, alias):
# this will lookup the project name based on the alias # this will lookup the project name based on the alias
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'") c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'")
results = c.fetchall() results = c.fetchall()
conn.close() conn.close()
if len(results)>1: if len(results) > 1:
print("Error: multiple projects found with alias " + alias) print("Error: multiple projects found with alias " + alias)
elif len(results) == 0: elif len(results) == 0:
raise Exception("Error: no project found with alias " + alias) raise Exception("Error: no project found with alias " + alias)
return str(results[0][0]) return results[0][0]
def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: def _project_lookup_by_id(db, uuid):
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
c.execute(f"SELECT * FROM 'projects' WHERE id = '{uuid}'") c.execute(f"SELECT * FROM 'projects' WHERE id = '{uuid}'")
@ -34,8 +32,7 @@ def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]:
return results return results
def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, def _db_lookup(db, ensemble, correlator_name,code, project=None, parameters=None, created_before=None, created_after=None, updated_before=None, updated_after=None, revision=None):
created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame:
project_str = project project_str = project
search_expr = f"SELECT * FROM 'backlogs' WHERE name = '{correlator_name}' AND ensemble = '{ensemble}'" search_expr = f"SELECT * FROM 'backlogs' WHERE name = '{correlator_name}' AND ensemble = '{ensemble}'"
@ -59,7 +56,7 @@ def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project:
return results return results
def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: def sfcf_filter(results, **kwargs):
drops = [] drops = []
for ind in range(len(results)): for ind in range(len(results)):
result = results.iloc[ind] result = results.iloc[ind]
@ -142,30 +139,27 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
return results.drop(drops) return results.drop(drops)
def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, def find_record(path, ensemble, correlator_name, code, project=None, parameters=None, created_before=None, created_after=None, updated_before=None, updated_after=None, revision=None, **kwargs):
created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: db = path + '/backlogger.db'
db_file = get_db_file(path)
db = os.path.join(path, db_file)
if code not in codes: if code not in codes:
raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes))
get(path, db_file) get_file(path, "backlogger.db")
results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after, revision=revision)
if code == "sfcf": if code == "sfcf":
results = sfcf_filter(results, **kwargs) results = sfcf_filter(results, **kwargs)
print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else ""))
return results.reset_index() return results.reset_index()
def find_project(path: str, name: str) -> str: def find_project(path, name):
db_file = get_db_file(path) get_file(path, "backlogger.db")
get(path, db_file) return _project_lookup_by_alias(os.path.join(path, "backlogger.db"), name)
return _project_lookup_by_alias(os.path.join(path, db_file), name)
def list_projects(path: str) -> list[tuple[str, str]]: def list_projects(path):
db_file = get_db_file(path) db = path + '/backlogger.db'
get(path, db_file) get_file(path, "backlogger.db")
conn = sqlite3.connect(os.path.join(path, db_file)) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
c.execute("SELECT id,aliases FROM projects") c.execute("SELECT id,aliases FROM projects")
results = c.fetchall() results = c.fetchall()

View file

@ -1,11 +1,11 @@
import os import os
from .tracker import save import datalad.api as dl
import git import git
GITMODULES_FILE = '.gitmodules' GITMODULES_FILE = '.gitmodules'
def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: def move_submodule(repo_path, old_path, new_path):
""" """
Move a submodule to a new location. Move a submodule to a new location.
@ -40,6 +40,4 @@ def move_submodule(repo_path: str, old_path: str, new_path: str) -> None:
repo = git.Repo(repo_path) repo = git.Repo(repo_path)
repo.git.add('.gitmodules') repo.git.add('.gitmodules')
# save new state of the dataset # save new state of the dataset
save(repo_path, message=f"Move module from {old_path} to {new_path}", files=['.gitmodules', repo_path]) dl.save(repo_path, message=f"Move module from {old_path} to {new_path}", dataset=repo_path)
return

View file

@ -1,10 +1,9 @@
from configparser import ConfigParser
import sqlite3 import sqlite3
import datalad.api as dl
import os import os
from .tracker import save, init
def _create_db(db: str) -> None: def _create_db(db):
""" """
Create the database file and the table. Create the database file and the table.
@ -22,7 +21,8 @@ def _create_db(db: str) -> None:
parameters TEXT, parameters TEXT,
parameter_file TEXT, parameter_file TEXT,
created_at TEXT, created_at TEXT,
updated_at TEXT)''') updated_at TEXT,
current_version TEXT)''')
c.execute('''CREATE TABLE IF NOT EXISTS projects c.execute('''CREATE TABLE IF NOT EXISTS projects
(id TEXT PRIMARY KEY, (id TEXT PRIMARY KEY,
aliases TEXT, aliases TEXT,
@ -33,55 +33,21 @@ def _create_db(db: str) -> None:
updated_at TEXT)''') updated_at TEXT)''')
conn.commit() conn.commit()
conn.close() conn.close()
return
def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: def create(path):
"""
Create the config file for backlogger.
"""
config = ConfigParser()
config['core'] = {
'version': '1.0',
'tracker': tracker,
'cached': str(cached),
}
config['paths'] = {
'db': 'backlogger.db',
'projects_path': 'projects',
'archive_path': 'archive',
'toml_imports_path': 'toml_imports',
'import_scripts_path': 'import_scripts',
}
return config
def _write_config(path: str, config: ConfigParser) -> None:
"""
Write the config file to disk.
"""
with open(os.path.join(path, '.corrlib'), 'w') as configfile:
config.write(configfile)
return
def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None:
""" """
Create folder of backlogs. Create folder of backlogs.
""" """
config = _create_config(path, tracker, cached) dl.create(path)
init(path, tracker) _create_db(path + '/backlogger.db')
_write_config(path, config) os.chmod(path + '/backlogger.db', 0o666) # why does this not work?
_create_db(os.path.join(path, config['paths']['db'])) os.makedirs(path + '/projects')
os.chmod(os.path.join(path, config['paths']['db']), 0o666) os.makedirs(path + '/archive')
os.makedirs(os.path.join(path, config['paths']['projects_path'])) os.makedirs(path + '/toml_imports')
os.makedirs(os.path.join(path, config['paths']['archive_path'])) os.makedirs(path + '/import_scripts/template.py')
os.makedirs(os.path.join(path, config['paths']['toml_imports_path'])) with open(path + "/.gitignore", "w") as fp:
os.makedirs(os.path.join(path, config['paths']['import_scripts_path'], 'template.py'))
with open(os.path.join(path, ".gitignore"), "w") as fp:
fp.write(".cache") fp.write(".cache")
fp.close() fp.close()
save(path, message="Initialized correlator library") dl.save(path, dataset=path, message="Initialize backlogger directory.")
return

View file

@ -2,6 +2,6 @@
Import functions for different codes. Import functions for different codes.
""" """
from . import sfcf as sfcf from . import sfcf
from . import openQCD as openQCD from . import openQCD
from . import implementations as implementations from . import implementations

View file

@ -2,7 +2,7 @@ import pyerrors.input.openQCD as input
import datalad.api as dl import datalad.api as dl
import os import os
import fnmatch import fnmatch
from typing import Any, Optional from typing import Any
def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]:
@ -67,7 +67,7 @@ def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, A
return param return param
def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: list[str]=None, files: list[str]=None) -> dict[str, Any]:
dataset = os.path.join(path, "projects", project) dataset = os.path.join(path, "projects", project)
directory = os.path.join(dataset, dir_in_project) directory = os.path.join(dataset, dir_in_project)
if files is None: if files is None:
@ -94,7 +94,7 @@ def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any
return rw_dict return rw_dict
def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str=None, names: list[str]=None, files: list[str]=None) -> dict[str, Any]:
dataset = os.path.join(path, "projects", project) dataset = os.path.join(path, "projects", project)
directory = os.path.join(dataset, dir_in_project) directory = os.path.join(dataset, dir_in_project)
if files is None: if files is None:
@ -132,7 +132,7 @@ def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, An
return t0_dict return t0_dict
def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = None, names: list[str]=None, files: list[str]=None) -> dict[str, Any]:
directory = os.path.join(path, "projects", project, dir_in_project) directory = os.path.join(path, "projects", project, dir_in_project)
if files is None: if files is None:
files = [] files = []

View file

@ -5,7 +5,7 @@ import os
from typing import Any from typing import Any
bi_corrs: list[str] = ["f_P", "fP", "f_p", bi_corrs: list = ["f_P", "fP", "f_p",
"g_P", "gP", "g_p", "g_P", "gP", "g_p",
"fA0", "f_A", "f_a", "fA0", "f_A", "f_a",
"gA0", "g_A", "g_a", "gA0", "g_A", "g_a",
@ -43,7 +43,7 @@ bi_corrs: list[str] = ["f_P", "fP", "f_p",
"l3A2", "l3_A2", "g_av23", "l3A2", "l3_A2", "g_av23",
] ]
bb_corrs: list[str] = [ bb_corrs: list = [
'F1', 'F1',
'F_1', 'F_1',
'f_1', 'f_1',
@ -64,7 +64,7 @@ bb_corrs: list[str] = [
'F_sPdP_d', 'F_sPdP_d',
] ]
bib_corrs: list[str] = [ bib_corrs: list = [
'F_V0', 'F_V0',
'K_V0', 'K_V0',
] ]
@ -184,7 +184,7 @@ def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]:
return params return params
def _map_params(params: dict[str, Any], spec_list: list[str]) -> dict[str, Any]: def _map_params(params: dict, spec_list: list) -> dict[str, Any]:
""" """
Map the extracted parameters to the extracted data. Map the extracted parameters to the extracted data.
@ -194,7 +194,7 @@ def _map_params(params: dict[str, Any], spec_list: list[str]) -> dict[str, Any]:
The parameters extracted from the parameter (input) file. in the dict form given by read_param. The parameters extracted from the parameter (input) file. in the dict form given by read_param.
spec_list: list spec_list: list
The list of specifications that belongs to the dorrelator in question. The list of specifications that belongs to the dorrelator in question.
Return Return
------ ------
new_specs: dict new_specs: dict
@ -228,7 +228,7 @@ def _map_params(params: dict[str, Any], spec_list: list[str]) -> dict[str, Any]:
return new_specs return new_specs
def get_specs(key: str, parameters: dict[str, Any], sep: str = '/') -> str: def get_specs(key, parameters, sep='/') -> str:
key_parts = key.split(sep) key_parts = key.split(sep)
if corr_types[key_parts[0]] == 'bi': if corr_types[key_parts[0]] == 'bi':
param = _map_params(parameters, key_parts[1:-1]) param = _map_params(parameters, key_parts[1:-1])
@ -238,7 +238,7 @@ def get_specs(key: str, parameters: dict[str, Any], sep: str = '/') -> str:
return s return s
def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: dict[str, Any], version: str = '1.0c', cfg_seperator: str = 'n', sep: str = '/', **kwargs: Any) -> dict[str, Any]: def read_data(path, project, dir_in_project, prefix, param, version='1.0c', cfg_seperator='n', sep='/', **kwargs) -> dict:
""" """
Extract the data from the sfcf file. Extract the data from the sfcf file.

View file

@ -5,12 +5,11 @@ import os
from .git_tools import move_submodule from .git_tools import move_submodule
import shutil import shutil
from .find import _project_lookup_by_id from .find import _project_lookup_by_id
from .tools import list2str, str2list, get_db_file from .tools import list2str, str2list, get_file
from .tracker import get, save, unlock, clone, drop from typing import Union
from typing import Union, Optional
def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Union[list[str], None]=None, aliases: Union[list[str], None]=None, code: Union[str, None]=None) -> None: def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Union[str, None]=None, aliases: Union[str, None]=None, code: Union[str, None]=None):
""" """
Create a new project entry in the database. Create a new project entry in the database.
@ -25,32 +24,30 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni
code: str (optional) code: str (optional)
The code that was used to create the measurements. The code that was used to create the measurements.
""" """
db_file = get_db_file(path) db = path + "/backlogger.db"
db = os.path.join(path, db_file) get_file(path, "backlogger.db")
get(path, db_file)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,)) known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,))
if known_projects.fetchone(): if known_projects.fetchone():
raise ValueError("Project already imported, use update_project() instead.") raise ValueError("Project already imported, use update_project() instead.")
unlock(path, db_file) dl.unlock(db, dataset=path)
alias_str = "" alias_str = None
if aliases is not None: if aliases is not None:
alias_str = list2str(aliases) alias_str = list2str(aliases)
tag_str = "" tag_str = None
if tags is not None: if tags is not None:
tag_str = list2str(tags) tag_str = list2str(tags)
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code))
conn.commit() conn.commit()
conn.close() conn.close()
save(path, message="Added entry for project " + uuid + " to database", files=[db_file]) dl.save(db, message="Added entry for project " + uuid + " to database", dataset=path)
def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] = None) -> None: def update_project_data(path, uuid, prop, value = None):
db_file = get_db_file(path) get_file(path, "backlogger.db")
get(path, db_file) conn = sqlite3.connect(os.path.join(path, "backlogger.db"))
conn = sqlite3.connect(os.path.join(path, db_file))
c = conn.cursor() c = conn.cursor()
c.execute(f"UPDATE projects SET '{prop}' = '{value}' WHERE id == '{uuid}'") c.execute(f"UPDATE projects SET '{prop}' = '{value}' WHERE id == '{uuid}'")
conn.commit() conn.commit()
@ -58,10 +55,9 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None]
return return
def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: def update_aliases(path: str, uuid: str, aliases: list[str]):
db_file = get_db_file(path) db = os.path.join(path, "backlogger.db")
db = os.path.join(path, db_file) get_file(path, "backlogger.db")
get(path, db_file)
known_data = _project_lookup_by_id(db, uuid)[0] known_data = _project_lookup_by_id(db, uuid)[0]
known_aliases = known_data[1] known_aliases = known_data[1]
@ -80,13 +76,13 @@ def update_aliases(path: str, uuid: str, aliases: list[str]) -> None:
if not len(new_alias_list) == len(known_alias_list): if not len(new_alias_list) == len(known_alias_list):
alias_str = list2str(new_alias_list) alias_str = list2str(new_alias_list)
unlock(path, db_file) dl.unlock(db, dataset=path)
update_project_data(path, uuid, "aliases", alias_str) update_project_data(path, uuid, "aliases", alias_str)
save(path, message="Updated aliases for project " + uuid, files=[db_file]) dl.save(db, dataset=path)
return return
def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Union[str, None]=None, aliases: Union[str, None]=None, code: Union[str, None]=None, isDataset: bool=True):
""" """
Parameters Parameters
---------- ----------
@ -112,39 +108,43 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti
in order to receive a uuid and have a consistent interface. in order to receive a uuid and have a consistent interface.
""" """
tmp_path = os.path.join(path, 'projects/tmp') tmp_path = path + '/projects/tmp'
clone(path, source=url, target=tmp_path) if not isDataset:
dl.create(tmp_path, dataset=path)
shutil.copytree(url + "/*", path + '/projects/tmp/')
dl.save(tmp_path, dataset=path)
else:
dl.install(path=tmp_path, source=url, dataset=path)
tmp_ds = dl.Dataset(tmp_path) tmp_ds = dl.Dataset(tmp_path)
conf = dlc.ConfigManager(tmp_ds) conf = dlc.ConfigManager(tmp_ds)
uuid = str(conf.get("datalad.dataset.id")) uuid = conf.get("datalad.dataset.id")
if not uuid: if not uuid:
raise ValueError("The dataset does not have a uuid!") raise ValueError("The dataset does not have a uuid!")
if not os.path.exists(path + "/projects/" + uuid): if not os.path.exists(path + "/projects/" + uuid):
db_file = get_db_file(path) db = path + "/backlogger.db"
get(path, db_file) get_file(path, "backlogger.db")
unlock(path, db_file) dl.unlock(db, dataset=path)
create_project(path, uuid, owner, tags, aliases, code) create_project(path, uuid, owner, tags, aliases, code)
move_submodule(path, 'projects/tmp', 'projects/' + uuid) move_submodule(path, 'projects/tmp', 'projects/' + uuid)
os.mkdir(path + '/import_scripts/' + uuid) os.mkdir(path + '/import_scripts/' + uuid)
save(path, message="Import project from " + url, files=['projects/' + uuid, db_file]) dl.save([db, path + '/projects/' + uuid], message="Import project from " + url, dataset=path)
else: else:
dl.drop(tmp_path, reckless='kill') dl.drop(tmp_path, reckless='kill')
shutil.rmtree(tmp_path) shutil.rmtree(tmp_path)
if aliases is not None: if aliases is not None:
if isinstance(aliases, str): if isinstance(aliases, str):
alias_list = [aliases] alias_list = [aliases]
else: else:
alias_list = aliases alias_list = aliases
update_aliases(path, uuid, alias_list) update_aliases(path, uuid, alias_list)
# make this more concrete # make this more concrete
return uuid return uuid
def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: def drop_project_data(path: str, uuid: str, path_in_project: str = ""):
""" """
Drop (parts of) a project to free up diskspace Drop (parts of) a prject to free up diskspace
""" """
drop(path + "/projects/" + uuid + "/" + path_in_project) dl.drop(path + "/projects/" + uuid + "/" + path_in_project)
return

View file

@ -1,18 +1,17 @@
from pyerrors.input import json as pj from pyerrors.input import json as pj
import os import os
import datalad.api as dl
import sqlite3 import sqlite3
from .input import sfcf,openQCD from .input import sfcf,openQCD
import json import json
from typing import Union from typing import Union, Optional
from pyerrors import Obs, Corr, dump_object, load_object from pyerrors import Obs, Corr, load_object, dump_object
from hashlib import sha256 from hashlib import sha256, sha1
from .tools import get_db_file, cache_enabled from .tools import cached, get_file, record2name_key, name_key2record, make_version_hash
from .tracker import get, save, unlock from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash
import shutil
from typing import Any
def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None: def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None):
""" """
Write a measurement to the backlog. Write a measurement to the backlog.
If the file for the measurement already exists, update the measurement. If the file for the measurement already exists, update the measurement.
@ -28,10 +27,9 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
uuid: str uuid: str
The uuid of the project. The uuid of the project.
""" """
db_file = get_db_file(path) db = os.path.join(path, 'backlogger.db')
db = os.path.join(path, db_file) get_file(path, "backlogger.db")
get(path, db_file) dl.unlock(db, dataset=path)
unlock(path, db_file)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
files = [] files = []
@ -44,7 +42,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr)) os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr))
else: else:
if os.path.exists(file): if os.path.exists(file):
unlock(path, file_in_archive) dl.unlock(file, dataset=path)
known_meas = pj.load_json_dict(file) known_meas = pj.load_json_dict(file)
if code == "sfcf": if code == "sfcf":
parameters = sfcf.read_param(path, uuid, parameter_file) parameters = sfcf.read_param(path, uuid, parameter_file)
@ -60,7 +58,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
pars = {} pars = {}
subkeys = [] subkeys = []
for i in range(len(parameters["rw_fcts"])): for i in range(len(parameters["rw_fcts"])):
par_list = [] par_list = []
for k in parameters["rw_fcts"][i].keys(): for k in parameters["rw_fcts"][i].keys():
par_list.append(str(parameters["rw_fcts"][i][k])) par_list.append(str(parameters["rw_fcts"][i][k]))
subkey = "/".join(par_list) subkey = "/".join(par_list)
@ -81,25 +79,26 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
subkey = "/".join(par_list) subkey = "/".join(par_list)
subkeys = [subkey] subkeys = [subkey]
pars[subkey] = json.dumps(parameters) pars[subkey] = json.dumps(parameters)
for subkey in subkeys:
parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
meas_path = file_in_archive + "::" + parHash
known_meas[parHash] = measurement[corr][subkey] meas_paths = []
for subkey in subkeys:
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) meas_path = name_key2record(file_in_archive, par_hash)
else: meas_paths.append(meas_path)
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", known_meas[par_hash] = measurement[corr][subkey]
data_hash = make_version_hash(path, meas_path)
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))",
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
conn.commit() c.execute("UPDATE backlogs SET current_version = ?, updated_at = datetime('now') WHERE path = ?", (data_hash, meas_path))
pj.dump_dict_to_json(known_meas, file) pj.dump_dict_to_json(known_meas, file)
files.append(os.path.join(path, db_file)) conn.commit()
files.append(db)
conn.close() conn.close()
save(path, message="Add measurements to database", files=files) dl.save(files, message="Add measurements to database", dataset=path)
def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: def load_record(path: str, meas_path: str):
""" """
Load a list of records by their paths. Load a list of records by their paths.
@ -109,7 +108,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]:
Path of the correlator library. Path of the correlator library.
meas_path: str meas_path: str
The path to the correlator in the backlog system. The path to the correlator in the backlog system.
Returns Returns
------- -------
co : Corr or Obs co : Corr or Obs
@ -118,7 +117,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]:
return load_records(path, [meas_path])[0] return load_records(path, [meas_path])[0]
def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Union[Corr, Obs]]:
""" """
Load a list of records by their paths. Load a list of records by their paths.
@ -128,62 +127,51 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {
Path of the correlator library. Path of the correlator library.
meas_paths: list[str] meas_paths: list[str]
A list of the paths to the correlator in the backlog system. A list of the paths to the correlator in the backlog system.
Returns Returns
------- -------
List List
""" """
needed_data: dict[str, list[str]] = {} needed_data: dict[str, list[str]] = {}
for mpath in meas_paths: for rpath in record_paths:
file = mpath.split("::")[0] file, key = record2name_key(rpath)
if file not in needed_data.keys(): if file not in needed_data.keys():
needed_data[file] = [] needed_data[file] = []
key = mpath.split("::")[1]
needed_data[file].append(key) needed_data[file].append(key)
returned_data: list[Any] = [] returned_data: list = []
for file in needed_data.keys(): for file in needed_data.keys():
for key in list(needed_data[file]): for key in list(needed_data[file]):
if os.path.exists(cache_path(path, file, key) + ".p"): record = name_key2record(file, key)
returned_data.append(load_object(cache_path(path, file, key) + ".p")) current_version = get_version_hash(path, record)
if is_in_cache(path, record):
returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p"))
else: else:
if file not in preloaded: if file not in preloaded:
preloaded[file] = preload(path, file) preloaded[file] = preload(path, file)
returned_data.append(preloaded[file][key]) returned_data.append(preloaded[file][key])
if cache_enabled(path): if cached:
if not os.path.exists(cache_dir(path, file)): if not is_in_cache(path, record):
os.makedirs(cache_dir(path, file)) file, key = record2name_key(record)
dump_object(preloaded[file][key], cache_path(path, file, key)) if not os.path.exists(cache_dir(path, file)):
os.makedirs(cache_dir(path, file))
current_version = get_version_hash(path, record)
dump_object(preloaded[file][key], cache_path(path, file, current_version, key))
return returned_data return returned_data
def cache_dir(path: str, file: str) -> str: def preload(path: str, file: str):
cache_path_list = [path] get_file(path, file)
cache_path_list.append(".cache") filedict = pj.load_json_dict(os.path.join(path, file))
cache_path_list.extend(file.split("/")[1:])
cache_path = "/".join(cache_path_list)
return cache_path
def cache_path(path: str, file: str, key: str) -> str:
cache_path = os.path.join(cache_dir(path, file), key)
return cache_path
def preload(path: str, file: str) -> dict[str, Any]:
get(path, file)
filedict: dict[str, Any] = pj.load_json_dict(os.path.join(path, file))
print("> read file") print("> read file")
return filedict return filedict
def drop_record(path: str, meas_path: str) -> None: def drop_record(path: str, meas_path: str):
file_in_archive = meas_path.split("::")[0] file_in_archive, sub_key = record2name_key(meas_path)
file = os.path.join(path, file_in_archive) file = os.path.join(path, file_in_archive)
db_file = get_db_file(path) db = os.path.join(path, 'backlogger.db')
db = os.path.join(path, db_file) get_file(path, 'backlogger.db')
get(path, db_file) dl.unlock(db, dataset=path)
sub_key = meas_path.split("::")[1]
unlock(path, db_file)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path, )).fetchone() is not None: if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path, )).fetchone() is not None:
@ -195,16 +183,10 @@ def drop_record(path: str, meas_path: str) -> None:
known_meas = pj.load_json_dict(file) known_meas = pj.load_json_dict(file)
if sub_key in known_meas: if sub_key in known_meas:
del known_meas[sub_key] del known_meas[sub_key]
unlock(path, file_in_archive) dl.unlock(file, dataset=path)
pj.dump_dict_to_json(known_meas, file) pj.dump_dict_to_json(known_meas, file)
save(path, message="Drop measurements to database", files=[db, file]) dl.save([db, file], message="Drop measurements to database", dataset=path)
return return
else: else:
raise ValueError("This measurement does not exist as a file!") raise ValueError("This measurement does not exist as a file!")
def drop_cache(path: str) -> None:
cache_dir = os.path.join(path, ".cache")
for f in os.listdir(cache_dir):
shutil.rmtree(os.path.join(cache_dir, f))
return

View file

@ -10,26 +10,21 @@ the import of projects via TOML.
import tomllib as toml import tomllib as toml
import shutil import shutil
import datalad.api as dl
from .tracker import save
from .input import sfcf, openQCD from .input import sfcf, openQCD
from .main import import_project, update_aliases from .main import import_project, update_aliases
from .meas_io import write_measurement from .meas_io import write_measurement
import datalad.api as dl
import os import os
from .input.implementations import codes as known_codes from .input.implementations import codes as known_codes
from typing import Any
def replace_string(string: str, name: str, val: str):
def replace_string(string: str, name: str, val: str) -> str:
if '{' + name + '}' in string: if '{' + name + '}' in string:
n = string.replace('{' + name + '}', val) n = string.replace('{' + name + '}', val)
return n return n
else: else:
return string return string
def replace_in_meas(measurements: dict, vars: dict[str, str]):
def replace_in_meas(measurements: dict[str, dict[str, Any]], vars: dict[str, str]) -> dict[str, dict[str, Any]]:
# replace global variables # replace global variables
for name, value in vars.items(): for name, value in vars.items():
for m in measurements.keys(): for m in measurements.keys():
@ -41,8 +36,7 @@ def replace_in_meas(measurements: dict[str, dict[str, Any]], vars: dict[str, str
measurements[m][key][i] = replace_string(measurements[m][key][i], name, value) measurements[m][key][i] = replace_string(measurements[m][key][i], name, value)
return measurements return measurements
def fill_cons(measurements, constants):
def fill_cons(measurements: dict[str, dict[str, Any]], constants: dict[str, str]) -> dict[str, dict[str, Any]]:
for m in measurements.keys(): for m in measurements.keys():
for name, val in constants.items(): for name, val in constants.items():
if name not in measurements[m].keys(): if name not in measurements[m].keys():
@ -50,7 +44,7 @@ def fill_cons(measurements: dict[str, dict[str, Any]], constants: dict[str, str]
return measurements return measurements
def check_project_data(d: dict[str, dict[str, str]]) -> None: def check_project_data(d: dict) -> None:
if 'project' not in d.keys() or 'measurements' not in d.keys() or len(list(d.keys())) > 4: if 'project' not in d.keys() or 'measurements' not in d.keys() or len(list(d.keys())) > 4:
raise ValueError('There should only be maximally be four keys on the top level, "project" and "measurements" are mandatory, "contants" is optional!') raise ValueError('There should only be maximally be four keys on the top level, "project" and "measurements" are mandatory, "contants" is optional!')
project_data = d['project'] project_data = d['project']
@ -63,7 +57,7 @@ def check_project_data(d: dict[str, dict[str, str]]) -> None:
return return
def check_measurement_data(measurements: dict[str, dict[str, str]], code: str) -> None: def check_measurement_data(measurements: dict, code: str) -> None:
var_names: list[str] = [] var_names: list[str] = []
if code == "sfcf": if code == "sfcf":
var_names = ["path", "ensemble", "param_file", "version", "prefix", "cfg_seperator", "names"] var_names = ["path", "ensemble", "param_file", "version", "prefix", "cfg_seperator", "names"]
@ -97,14 +91,14 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None:
with open(file, 'rb') as fp: with open(file, 'rb') as fp:
toml_dict = toml.load(fp) toml_dict = toml.load(fp)
check_project_data(toml_dict) check_project_data(toml_dict)
project: dict[str, Any] = toml_dict['project'] project: dict = toml_dict['project']
if project['code'] not in known_codes: if project['code'] not in known_codes:
raise ValueError('Code' + project['code'] + 'has no import implementation!') raise ValueError('Code' + project['code'] + 'has no import implementation!')
measurements: dict[str, dict[str, Any]] = toml_dict['measurements'] measurements: dict = toml_dict['measurements']
measurements = fill_cons(measurements, toml_dict['constants'] if 'constants' in toml_dict else {}) measurements = fill_cons(measurements, toml_dict['constants'] if 'constants' in toml_dict else {})
measurements = replace_in_meas(measurements, toml_dict['replace'] if 'replace' in toml_dict else {}) measurements = replace_in_meas(measurements, toml_dict['replace'] if 'replace' in toml_dict else {})
check_measurement_data(measurements, project['code']) check_measurement_data(measurements, project['code'])
aliases = project.get('aliases', []) aliases = project.get('aliases', None)
uuid = project.get('uuid', None) uuid = project.get('uuid', None)
if uuid is not None: if uuid is not None:
if not os.path.exists(path + "/projects/" + uuid): if not os.path.exists(path + "/projects/" + uuid):
@ -139,29 +133,29 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None:
for rwp in ["integrator", "eps", "ntot", "dnms"]: for rwp in ["integrator", "eps", "ntot", "dnms"]:
param[rwp] = "Unknown" param[rwp] = "Unknown"
param['type'] = 't0' param['type'] = 't0'
measurement = openQCD.extract_t0(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), measurement = openQCD.extract_t0(path, uuid, md['path'], param, md["prefix"], md["dtr_read"], md["xmin"], md["spatial_extent"],
fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) fit_range=md.get('fit_range', 5), postfix=md.get('postfix', None), names=md.get('names', None), files=md.get('files', None))
elif md['measurement'] == 't1': elif md['measurement'] == 't1':
if 'param_file' in md: if 'param_file' in md:
param = openQCD.read_ms3_param(path, uuid, md['param_file']) param = openQCD.read_ms3_param(path, uuid, md['param_file'])
param['type'] = 't1' param['type'] = 't1'
measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), measurement = openQCD.extract_t1(path, uuid, md['path'], param, md["prefix"], md["dtr_read"], md["xmin"], md["spatial_extent"],
fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) fit_range=md.get('fit_range', 5), postfix=md.get('postfix', None), names=md.get('names', None), files=md.get('files', None))
write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else '')) write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None))
if not os.path.exists(os.path.join(path, "toml_imports", uuid)): if not os.path.exists(os.path.join(path, "toml_imports", uuid)):
os.makedirs(os.path.join(path, "toml_imports", uuid)) os.makedirs(os.path.join(path, "toml_imports", uuid))
if copy_file: if copy_file:
import_file = os.path.join(path, "toml_imports", uuid, file.split("/")[-1]) import_file = os.path.join(path, "toml_imports", uuid, file.split("/")[-1])
shutil.copy(file, import_file) shutil.copy(file, import_file)
save(path, files=[import_file], message="Import using " + import_file) dl.save(import_file, message="Import using " + import_file, dataset=path)
print("File copied to " + import_file) print("File copied to " + import_file)
print("Imported project.") print("Imported project.")
return return
def reimport_project(path: str, uuid: str) -> None: def reimport_project(path, uuid):
""" """
Reimport an existing project using the files that are already available for this project. Reimport an existing project using the files that are already available for this project.
@ -179,7 +173,6 @@ def reimport_project(path: str, uuid: str) -> None:
return return
def update_project(path: str, uuid: str) -> None: def update_project(path, uuid):
dl.update(how='merge', follow='sibling', dataset=os.path.join(path, "projects", uuid)) dl.update(how='merge', follow='sibling', dataset=os.path.join(path, "projects", uuid))
# reimport_project(path, uuid) # reimport_project(path, uuid)
return

View file

@ -1,54 +1,45 @@
import os import os
from configparser import ConfigParser import datalad.api as dl
from typing import Any import hashlib
CONFIG_FILENAME = ".corrlib" def str2list(string):
def str2list(string: str) -> list[str]:
return string.split(",") return string.split(",")
def list2str(mylist: list[str]) -> str: def list2str(mylist):
s = ",".join(mylist) s = ",".join(mylist)
return s return s
cached: bool = True cached = True
def m2k(m: float) -> float: def m2k(m):
return 1/(2*m+8) return 1/(2*m+8)
def k2m(k: float) -> float: def k2m(k):
return (1/(2*k))-4 return (1/(2*k))-4
def set_config(path: str, section: str, option: str, value: Any) -> None: def get_file(path, file):
config_path = os.path.join(path, '.corrlib') if file == "backlogger.db":
config = ConfigParser() print("Downloading database...")
if os.path.exists(config_path): else:
config.read(config_path) print("Downloading data...")
if not config.has_section(section): dl.get(os.path.join(path, file), dataset=path)
config.add_section(section) print("> downloaded file")
config.set(section, option, value)
with open(config_path, 'w') as configfile:
config.write(configfile)
return
def get_db_file(path: str) -> str: def record2name_key(record_path: str):
config_path = os.path.join(path, CONFIG_FILENAME) file = record_path.split("::")[0]
config = ConfigParser() key = record_path.split("::")[1]
if os.path.exists(config_path): return file, key
config.read(config_path)
db_file = config.get('paths', 'db', fallback='backlogger.db')
return db_file
def cache_enabled(path: str) -> bool: def name_key2record(name: str, key: str):
config_path = os.path.join(path, CONFIG_FILENAME) return name + "::" + key
config = ConfigParser()
if os.path.exists(config_path):
config.read(config_path) def make_version_hash(path, record):
cached_str = config.get('core', 'cached', fallback='True') file, key = record2name_key(record)
cached_bool = cached_str == ('True') with open(os.path.join(path, file), 'rb') as fp:
return cached_bool file_hash = hashlib.file_digest(fp, 'sha1').hexdigest()
return file_hash

View file

@ -1,169 +0,0 @@
import os
from configparser import ConfigParser
import datalad.api as dl
from typing import Optional
import shutil
from .tools import get_db_file
def get_tracker(path: str) -> str:
"""
Get the tracker used in the dataset located at path.
Parameters
----------
path: str
The path to the backlogger folder.
Returns
-------
tracker: str
The tracker used in the dataset.
"""
config_path = os.path.join(path, '.corrlib')
config = ConfigParser()
if os.path.exists(config_path):
config.read(config_path)
else:
raise FileNotFoundError(f"No config file found in {path}.")
tracker = config.get('core', 'tracker', fallback='datalad')
return tracker
def get(path: str, file: str) -> None:
"""
Wrapper function to get a file from the dataset located at path with the specified tracker.
Parameters
----------
path: str
The path to the backlogger folder.
file: str
The file to get.
"""
tracker = get_tracker(path)
if tracker == 'datalad':
if file == get_db_file(path):
print("Downloading database...")
else:
print("Downloading data...")
dl.get(os.path.join(path, file), dataset=path)
print("> downloaded file")
elif tracker == 'None':
pass
else:
raise ValueError(f"Tracker {tracker} is not supported.")
return
def save(path: str, message: str, files: Optional[list[str]]=None) -> None:
"""
Wrapper function to save a file to the dataset located at path with the specified tracker.
Parameters
----------
path: str
The path to the backlogger folder.
message: str
The commit message.
files: list[str], optional
The files to save. If None, all changes are saved.
"""
tracker = get_tracker(path)
if tracker == 'datalad':
if files is not None:
files = [os.path.join(path, f) for f in files]
dl.save(files, message=message, dataset=path)
elif tracker == 'None':
Warning("Tracker 'None' does not implement save.")
pass
else:
raise ValueError(f"Tracker {tracker} is not supported.")
def init(path: str, tracker: str='datalad') -> None:
"""
Initialize a dataset at the specified path with the specified tracker.
Parameters
----------
path: str
The path to initialize the dataset.
tracker: str
The tracker to use. Currently only 'datalad' and 'None' are supported.
"""
if tracker == 'datalad':
dl.create(path)
elif tracker == 'None':
os.makedirs(path, exist_ok=True)
else:
raise ValueError(f"Tracker {tracker} is not supported.")
return
def unlock(path: str, file: str) -> None:
"""
Wrapper function to unlock a file in the dataset located at path with the specified tracker.
Parameters
----------
path : str
The path to the backlogger folder.
file : str
The file to unlock.
"""
tracker = get_tracker(path)
if tracker == 'datalad':
dl.unlock(file, dataset=path)
elif tracker == 'None':
Warning("Tracker 'None' does not implement unlock.")
pass
else:
raise ValueError(f"Tracker {tracker} is not supported.")
return
def clone(path: str, source: str, target: str) -> None:
"""
Wrapper function to clone a dataset from source to target with the specified tracker.
Parameters
----------
path: str
The path to the backlogger folder.
source: str
The source dataset to clone.
target: str
The target path to clone the dataset to.
"""
tracker = get_tracker(path)
if tracker == 'datalad':
dl.clone(target=target, source=source, dataset=path)
elif tracker == 'None':
os.makedirs(path, exist_ok=True)
# Implement a simple clone by copying files
shutil.copytree(source, target, dirs_exist_ok=False)
else:
raise ValueError(f"Tracker {tracker} is not supported.")
return
def drop(path: str, reckless: Optional[str]=None) -> None:
"""
Wrapper function to drop data from a dataset located at path with the specified tracker.
Parameters
----------
path: str
The path to the backlogger folder.
reckless: Optional[str]
The datalad's reckless option for dropping data.
"""
tracker = get_tracker(path)
if tracker == 'datalad':
dl.drop(path, reckless=reckless)
elif tracker == 'None':
Warning("Tracker 'None' does not implement drop.")
pass
else:
raise ValueError(f"Tracker {tracker} is not supported.")
return

View file

@ -1,34 +1 @@
# file generated by setuptools-scm __version__ = "0.2.3"
# don't change, don't track in version control
__all__ = [
"__version__",
"__version_tuple__",
"version",
"version_tuple",
"__commit_id__",
"commit_id",
]
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple
from typing import Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
COMMIT_ID = Union[str, None]
else:
VERSION_TUPLE = object
COMMIT_ID = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID
__version__ = version = '0.2.4.dev14+g602324f84.d20251202'
__version_tuple__ = version_tuple = (0, 2, 4, 'dev14', 'g602324f84.d20251202')
__commit_id__ = commit_id = 'g602324f84'

View file

@ -1,52 +1,6 @@
[build-system] [build-system]
requires = ["setuptools >= 63.0.0", "wheel", "setuptools-scm"] requires = ["setuptools >= 63.0.0", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project]
requires-python = ">=3.10"
name = "corrlib"
dynamic = ["version"]
dependencies = [
"gitpython>=3.1.45",
'pyerrors>=2.11.1',
"datalad>=1.1.0",
'typer>=0.12.5',
]
description = "Python correlation library"
authors = [
{ name = 'Justus Kuhlmann', email = 'j_kuhl19@uni-muenster.de'}
]
[project.scripts]
pcl = "corrlib.cli:app"
[tool.setuptools.packages.find]
include = ["corrlib", "corrlib.*"]
[tool.setuptools_scm]
write_to = "corrlib/version.py"
[tool.ruff.lint] [tool.ruff.lint]
ignore = ["E501"] ignore = ["F403"]
extend-select = [
"YTT",
"E",
"W",
"F",
]
[tool.mypy]
strict = true
implicit_reexport = false
follow_untyped_imports = false
ignore_missing_imports = true
[dependency-groups]
dev = [
"mypy>=1.19.0",
"pandas-stubs>=2.3.3.251201",
"pytest>=9.0.1",
"pytest-cov>=7.0.0",
"pytest-pretty>=1.3.0",
"ruff>=0.14.7",
]

18
setup.py Normal file
View file

@ -0,0 +1,18 @@
from setuptools import setup
from distutils.util import convert_path
version = {}
with open(convert_path('corrlib/version.py')) as ver_file:
exec(ver_file.read(), version)
setup(name='pycorrlib',
version=version['__version__'],
author='Justus Kuhlmann',
author_email='j_kuhl19@uni-muenster.de',
install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'],
entry_points = {
'console_scripts': ['pcl=corrlib.cli:app'],
},
packages=['corrlib', 'corrlib.input']
)

View file

@ -1,91 +0,0 @@
from typer.testing import CliRunner
from corrlib.cli import app
import os
import sqlite3 as sql
runner = CliRunner()
def test_version():
result = runner.invoke(app, ["--version"])
assert result.exit_code == 0
assert "corrlib" in result.output
def test_init_folders(tmp_path):
dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0
assert os.path.exists(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_db(tmp_path):
dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0
assert os.path.exists(str(dataset_path / "backlogger.db"))
conn = sql.connect(str(dataset_path / "backlogger.db"))
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
expected_tables = [
'projects',
'backlogs',
]
table_names = [table[0] for table in tables]
for expected_table in expected_tables:
assert expected_table in table_names
cursor.execute("SELECT * FROM projects;")
projects = cursor.fetchall()
assert len(projects) == 0
cursor.execute("SELECT * FROM backlogs;")
backlogs = cursor.fetchall()
assert len(backlogs) == 0
cursor.execute("PRAGMA table_info('projects');")
project_columns = cursor.fetchall()
expected_project_columns = [
"id",
"aliases",
"customTags",
"owner",
"code",
"created_at",
"updated_at"
]
project_column_names = [col[1] for col in project_columns]
for expected_col in expected_project_columns:
assert expected_col in project_column_names
cursor.execute("PRAGMA table_info('backlogs');")
backlog_columns = cursor.fetchall()
expected_backlog_columns = [
"id",
"name",
"ensemble",
"code",
"path",
"project",
"customTags",
"parameters",
"parameter_file",
"created_at",
"updated_at"
]
backlog_column_names = [col[1] for col in backlog_columns]
for expected_col in expected_backlog_columns:
assert expected_col in backlog_column_names
def test_list(tmp_path):
dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0
result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "ensembles"])
assert result.exit_code == 0
result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "projects"])
assert result.exit_code == 0

View file

@ -14,4 +14,4 @@ def test_toml_check_measurement_data():
"names": ['list', 'of', 'names'] "names": ['list', 'of', 'names']
} }
} }
t.check_measurement_data(measurements, "sfcf") t.check_measurement_data(measurements)

View file

@ -1,93 +0,0 @@
import corrlib.initialization as init
import os
import sqlite3 as sql
def test_init_folders(tmp_path):
dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path))
assert os.path.exists(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_folders_no_tracker(tmp_path):
dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path), tracker="None")
assert os.path.exists(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_config(tmp_path):
dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path), tracker="None")
config_path = dataset_path / ".corrlib"
assert os.path.exists(str(config_path))
from configparser import ConfigParser
config = ConfigParser()
config.read(str(config_path))
assert config.get("core", "tracker") == "None"
assert config.get("core", "version") == "1.0"
assert config.get("core", "cached") == "True"
assert config.get("paths", "db") == "backlogger.db"
assert config.get("paths", "projects_path") == "projects"
assert config.get("paths", "archive_path") == "archive"
assert config.get("paths", "toml_imports_path") == "toml_imports"
assert config.get("paths", "import_scripts_path") == "import_scripts"
def test_init_db(tmp_path):
dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db"))
conn = sql.connect(str(dataset_path / "backlogger.db"))
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
expected_tables = [
'projects',
'backlogs',
]
table_names = [table[0] for table in tables]
for expected_table in expected_tables:
assert expected_table in table_names
cursor.execute("SELECT * FROM projects;")
projects = cursor.fetchall()
assert len(projects) == 0
cursor.execute("SELECT * FROM backlogs;")
backlogs = cursor.fetchall()
assert len(backlogs) == 0
cursor.execute("PRAGMA table_info('projects');")
project_columns = cursor.fetchall()
expected_project_columns = [
"id",
"aliases",
"customTags",
"owner",
"code",
"created_at",
"updated_at"
]
project_column_names = [col[1] for col in project_columns]
for expected_col in expected_project_columns:
assert expected_col in project_column_names
cursor.execute("PRAGMA table_info('backlogs');")
backlog_columns = cursor.fetchall()
expected_backlog_columns = [
"id",
"name",
"ensemble",
"code",
"path",
"project",
"customTags",
"parameters",
"parameter_file",
"created_at",
"updated_at"
]
backlog_column_names = [col[1] for col in backlog_columns]
for expected_col in expected_backlog_columns:
assert expected_col in backlog_column_names

View file

@ -1,31 +0,0 @@
from corrlib import tools as tl
def test_m2k():
for m in [0.1, 0.5, 1.0]:
expected_k = 1 / (2 * m + 8)
assert tl.m2k(m) == expected_k
def test_k2m():
for m in [0.1, 0.5, 1.0]:
assert tl.k2m(m) == (1/(2*m))-4
def test_k2m_m2k():
for m in [0.1, 0.5, 1.0]:
k = tl.m2k(m)
m_converted = tl.k2m(k)
assert abs(m - m_converted) < 1e-9
def test_str2list():
assert tl.str2list("a,b,c") == ["a", "b", "c"]
assert tl.str2list("1,2,3") == ["1", "2", "3"]
def test_list2str():
assert tl.list2str(["a", "b", "c"]) == "a,b,c"
assert tl.list2str(["1", "2", "3"]) == "1,2,3"

2518
uv.lock generated

File diff suppressed because it is too large Load diff