add docs, add check for needed paths

This commit is contained in:
Justus Kuhlmann 2026-05-05 17:15:16 +02:00
commit c3bf36bf52
Signed by: jkuhl
GPG key ID: 00ED992DD79B85A6

View file

@ -12,6 +12,20 @@ from typing import Any
def has_valid_times(result: pd.Series) -> bool: def has_valid_times(result: pd.Series) -> bool:
"""
Check, whether the result at hand has time-stamps that are sensible:
A recored is created first, then updated, with both times laying in the past.
Parameters
----------
result: pd.Series
The result to check
Returns
-------
b: bool
True, if the timestamps make sense.
"""
# we expect created_at <= updated_at <= now # we expect created_at <= updated_at <= now
created_at = dt.datetime.fromisoformat(result['created_at']) created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at']) updated_at = dt.datetime.fromisoformat(result['updated_at'])
@ -22,15 +36,41 @@ def has_valid_times(result: pd.Series) -> bool:
return True return True
def are_keys_unique(db: Path, table: str, col: str) -> bool: def are_keys_unique(db: Path, table: str, col: str) -> bool:
"""
Check whether the strings listed in a column of a given table are unique.
Parameters
----------
db: Path
The database to check.
table: str
The table to check.
col: str
The column to be checked for uniqueness.
Returns
-------
b: bool
True, if the strings are unique.
"""
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
c = conn.cursor() c = conn.cursor()
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
results = c.fetchall()[0] results = c.fetchall()[0]
conn.close() conn.close()
return bool(results[0] == results[1]) return bool(results[0] == results[1])
def check_db_integrity(path: Path) -> None: def check_db_integrity(path: Path) -> None:
"""
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path) db = get_db_file(path)
if not are_keys_unique(path / db, 'backlogs', 'path'): if not are_keys_unique(path / db, 'backlogs', 'path'):
@ -47,6 +87,17 @@ def check_db_integrity(path: Path) -> None:
def _check_db2paths(path: Path, meas_paths: list[str]) -> None: def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
"""
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
meas_paths: list[str]
List of measurement paths to check.
"""
needed_data: dict[str, list[str]] = {} needed_data: dict[str, list[str]] = {}
for mpath in meas_paths: for mpath in meas_paths:
file = mpath.split("::")[0] file = mpath.split("::")[0]
@ -72,6 +123,15 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
def check_db_file_links(path: Path) -> None: def check_db_file_links(path: Path) -> None:
"""
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path) db = get_db_file(path)
search_expr = "SELECT path FROM 'backlogs'" search_expr = "SELECT path FROM 'backlogs'"
conn = sqlite3.connect(path / db) conn = sqlite3.connect(path / db)
@ -80,6 +140,14 @@ def check_db_file_links(path: Path) -> None:
def check_path_and_config(path: Path) -> None: def check_path_and_config(path: Path) -> None:
"""
Check whether the given path exists and the cinfigureation file can be found.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
if not os.path.exists(path): if not os.path.exists(path):
raise FileNotFoundError(f"Corrlib path {path} does not exist.") raise FileNotFoundError(f"Corrlib path {path} does not exist.")
config_path = path / CONFIG_FILENAME config_path = path / CONFIG_FILENAME
@ -88,6 +156,14 @@ def check_path_and_config(path: Path) -> None:
def check_config_validity(path: Path) -> None: def check_config_validity(path: Path) -> None:
"""
Check whether the configuration file of the given corrlib-dataset path is valid.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
config = ConfigParser() config = ConfigParser()
config_path = path / CONFIG_FILENAME config_path = path / CONFIG_FILENAME
if os.path.exists(config_path): if os.path.exists(config_path):
@ -107,8 +183,20 @@ def check_config_validity(path: Path) -> None:
if not all(has_path_opts): if not all(has_path_opts):
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.") raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
if not all(has_paths):
raise FileNotFoundError("one of the paths needed by the configuration file is not present.")
def full_integrity_check(path: Path) -> None: def full_integrity_check(path: Path) -> None:
"""
Aggregate all checks for easy validation of the backlog-library.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
check_path_and_config(path) check_path_and_config(path)
print("Path and config-file exist:\t") print("Path and config-file exist:\t")
check_config_validity(path) check_config_validity(path)