add docs, add check for needed paths

This commit is contained in:
Justus Kuhlmann 2026-05-05 17:15:16 +02:00
commit c3bf36bf52
Signed by: jkuhl
GPG key ID: 00ED992DD79B85A6

View file

@ -12,6 +12,20 @@ from typing import Any
def has_valid_times(result: pd.Series) -> bool:
"""
Check, whether the result at hand has time-stamps that are sensible:
A recored is created first, then updated, with both times laying in the past.
Parameters
----------
result: pd.Series
The result to check
Returns
-------
b: bool
True, if the timestamps make sense.
"""
# we expect created_at <= updated_at <= now
created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at'])
@ -22,15 +36,41 @@ def has_valid_times(result: pd.Series) -> bool:
return True
def are_keys_unique(db: Path, table: str, col: str) -> bool:
"""
Check whether the strings listed in a column of a given table are unique.
Parameters
----------
db: Path
The database to check.
table: str
The table to check.
col: str
The column to be checked for uniqueness.
Returns
-------
b: bool
True, if the strings are unique.
"""
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};")
c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
results = c.fetchall()[0]
conn.close()
return bool(results[0] == results[1])
def check_db_integrity(path: Path) -> None:
"""
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path)
if not are_keys_unique(path / db, 'backlogs', 'path'):
@ -47,6 +87,17 @@ def check_db_integrity(path: Path) -> None:
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
"""
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
meas_paths: list[str]
List of measurement paths to check.
"""
needed_data: dict[str, list[str]] = {}
for mpath in meas_paths:
file = mpath.split("::")[0]
@ -72,6 +123,15 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
def check_db_file_links(path: Path) -> None:
"""
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
db = get_db_file(path)
search_expr = "SELECT path FROM 'backlogs'"
conn = sqlite3.connect(path / db)
@ -80,6 +140,14 @@ def check_db_file_links(path: Path) -> None:
def check_path_and_config(path: Path) -> None:
"""
Check whether the given path exists and the cinfigureation file can be found.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
if not os.path.exists(path):
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
config_path = path / CONFIG_FILENAME
@ -88,6 +156,14 @@ def check_path_and_config(path: Path) -> None:
def check_config_validity(path: Path) -> None:
"""
Check whether the configuration file of the given corrlib-dataset path is valid.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
config = ConfigParser()
config_path = path / CONFIG_FILENAME
if os.path.exists(config_path):
@ -107,8 +183,20 @@ def check_config_validity(path: Path) -> None:
if not all(has_path_opts):
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
if not all(has_paths):
raise FileNotFoundError("one of the paths needed by the configuration file is not present.")
def full_integrity_check(path: Path) -> None:
"""
Aggregate all checks for easy validation of the backlog-library.
Parameters
----------
path: Path
Path to the backlog-library to check.
"""
check_path_and_config(path)
print("Path and config-file exist:\t")
check_config_validity(path)