Merge pull request 'feat/minteg' (#39) from feat/minteg into develop
All checks were successful
Pytest / pytest (3.12) (push) Successful in 1m17s
Mypy / mypy (push) Successful in 1m9s
Pytest / pytest (3.13) (push) Successful in 1m9s
Pytest / pytest (3.14) (push) Successful in 1m12s
Ruff / ruff (push) Successful in 59s

Reviewed-on: #39
This commit is contained in:
Justus Kuhlmann 2026-05-06 09:37:35 +02:00
commit 3aba39fd9d
2 changed files with 153 additions and 6 deletions

View file

@ -108,7 +108,7 @@ def find(
), ),
) -> None: ) -> None:
""" """
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. Find a record in the given backlog.
""" """
results = find_record(path, ensemble, corr, code) results = find_record(path, ensemble, corr, code)
if results.empty: if results.empty:
@ -147,6 +147,9 @@ def check(path: Path = typer.Option(
"-d", "-d",
), ),
) -> None: ) -> None:
"""
Check the integrity of the repository.
"""
full_integrity_check(path) full_integrity_check(path)

View file

@ -1,15 +1,34 @@
import datetime as dt import datetime as dt
from pathlib import Path from pathlib import Path
from .tools import get_db_file from .tools import get_db_file, CONFIG_FILENAME
import pandas as pd import pandas as pd
import sqlite3 import sqlite3
from .tracker import get from .tracker import get
import pyerrors.input.json as pj import pyerrors.input.json as pj
import os
from configparser import ConfigParser
from typing import Any from typing import Any
path_opts = ['db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path']
def has_valid_times(result: pd.Series) -> bool: def has_valid_times(result: pd.Series) -> bool:
"""
Check, whether the result at hand has time-stamps that are sensible:
A recored is created first, then updated, with both times laying in the past.
Parameters
----------
result: pd.Series
The result to check
Returns
-------
b: bool
True, if the timestamps make sense.
"""
# we expect created_at <= updated_at <= now # we expect created_at <= updated_at <= now
created_at = dt.datetime.fromisoformat(result['created_at']) created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at']) updated_at = dt.datetime.fromisoformat(result['updated_at'])
@ -20,15 +39,41 @@ def has_valid_times(result: pd.Series) -> bool:
return True return True
def are_keys_unique(db: Path, table: str, col: str) -> bool: def are_keys_unique(db: Path, table: str, col: str) -> bool:
"""
Check whether the strings listed in a column of a given table are unique.
Parameters
----------
db: Path
The database to check.
table: str
The table to check.
col: str
The column to be checked for uniqueness.
Returns
-------
b: bool
True, if the strings are unique.
"""
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
results = c.fetchall()[0] results = c.fetchall()[0]
conn.close() conn.close()
return bool(results[0] == results[1]) return bool(results[0] == results[1])
def check_db_integrity(path: Path) -> None: def check_db_integrity(path: Path) -> None:
"""
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path) db = get_db_file(path)
if not are_keys_unique(path / db, 'backlogs', 'path'): if not are_keys_unique(path / db, 'backlogs', 'path'):
@ -41,11 +86,21 @@ def check_db_integrity(path: Path) -> None:
for _, result in results.iterrows(): for _, result in results.iterrows():
if not has_valid_times(result): if not has_valid_times(result):
raise ValueError(f"Result with id {result[id]} has wrong time signatures.") raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
print("DB:\t")
return return
def _check_db2paths(path: Path, meas_paths: list[str]) -> None: def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
"""
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
meas_paths: list[str]
List of measurement paths to check.
"""
needed_data: dict[str, list[str]] = {} needed_data: dict[str, list[str]] = {}
for mpath in meas_paths: for mpath in meas_paths:
file = mpath.split("::")[0] file = mpath.split("::")[0]
@ -67,11 +122,19 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
for key in needed_data[file]: for key in needed_data[file]:
if key not in filedict.keys(): if key not in filedict.keys():
raise ValueError(f"Did not find data for key {key} that should be in file {file}.") raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
print("Links:\t")
return return
def check_db_file_links(path: Path) -> None: def check_db_file_links(path: Path) -> None:
"""
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path) db = get_db_file(path)
search_expr = "SELECT path FROM 'backlogs'" search_expr = "SELECT path FROM 'backlogs'"
conn = sqlite3.connect(path / db) conn = sqlite3.connect(path / db)
@ -79,9 +142,90 @@ def check_db_file_links(path: Path) -> None:
_check_db2paths(path, list(results)) _check_db2paths(path, list(results))
def check_path_and_config(path: Path) -> None:
"""
Check whether the given path exists and the cinfigureation file can be found.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
if not os.path.exists(path):
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
config_path = path / CONFIG_FILENAME
if not os.path.exists(config_path):
raise FileNotFoundError(f"Configuration file {config_path} not found.")
def check_config_validity(path: Path) -> None:
"""
Check whether the configuration file of the given corrlib-dataset path is valid.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
config = ConfigParser()
config_path = path / CONFIG_FILENAME
if os.path.exists(config_path):
config.read(config_path)
else:
raise FileNotFoundError("Configuration file not found.")
if config.has_section('core'):
core_opts = ['version', 'tracker', 'cached']
has_core_opts = [config.has_option('core', opt) for opt in core_opts]
if not all(has_core_opts):
raise ValueError("One of the options in the 'core' section ('version', 'tracker', 'cached') is missing.")
if config.has_section('paths'):
has_path_opts = [config.has_option('paths', opt) for opt in path_opts]
if not all(has_path_opts):
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
def check_paths(path: Path) -> None:
"""
Check whether all paths demanded by the 'paths' section of the configuration-file exist.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
config = ConfigParser()
config_path = path / CONFIG_FILENAME
if os.path.exists(config_path):
config.read(config_path)
else:
raise FileNotFoundError("Configuration file not found.")
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
if not all(has_paths):
raise FileNotFoundError("One of the paths specified in the configuration file is not present.")
def full_integrity_check(path: Path) -> None: def full_integrity_check(path: Path) -> None:
"""
Aggregate all checks for easy validation of the backlog-library.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
print("Run full integrity check...")
check_path_and_config(path)
print("(1/5) Path and config-file exist: ✅")
check_config_validity(path)
print("(2/5) Configuration is valid: ✅")
check_paths(path)
print("(3/5) Needed paths exist: ✅")
check_db_integrity(path) check_db_integrity(path)
print("(4/5) Database is sane: ✅")
check_db_file_links(path) check_db_file_links(path)
print("Full:\t") print("(5/5) DB2File and File2DB-links are sound: ✅")
print("Full integrity check: ✅")