add docs, add check for needed paths
This commit is contained in:
parent
656f99a13c
commit
c3bf36bf52
1 changed files with 89 additions and 1 deletions
|
|
@ -12,6 +12,20 @@ from typing import Any
|
||||||
|
|
||||||
|
|
||||||
def has_valid_times(result: pd.Series) -> bool:
|
def has_valid_times(result: pd.Series) -> bool:
|
||||||
|
"""
|
||||||
|
Check, whether the result at hand has time-stamps that are sensible:
|
||||||
|
A recored is created first, then updated, with both times laying in the past.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
result: pd.Series
|
||||||
|
The result to check
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
b: bool
|
||||||
|
True, if the timestamps make sense.
|
||||||
|
"""
|
||||||
# we expect created_at <= updated_at <= now
|
# we expect created_at <= updated_at <= now
|
||||||
created_at = dt.datetime.fromisoformat(result['created_at'])
|
created_at = dt.datetime.fromisoformat(result['created_at'])
|
||||||
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
||||||
|
|
@ -22,15 +36,41 @@ def has_valid_times(result: pd.Series) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def are_keys_unique(db: Path, table: str, col: str) -> bool:
|
def are_keys_unique(db: Path, table: str, col: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check whether the strings listed in a column of a given table are unique.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db: Path
|
||||||
|
The database to check.
|
||||||
|
table: str
|
||||||
|
The table to check.
|
||||||
|
col: str
|
||||||
|
The column to be checked for uniqueness.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
b: bool
|
||||||
|
True, if the strings are unique.
|
||||||
|
"""
|
||||||
conn = sqlite3.connect(db)
|
conn = sqlite3.connect(db)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
||||||
results = c.fetchall()[0]
|
results = c.fetchall()[0]
|
||||||
conn.close()
|
conn.close()
|
||||||
return bool(results[0] == results[1])
|
return bool(results[0] == results[1])
|
||||||
|
|
||||||
|
|
||||||
def check_db_integrity(path: Path) -> None:
|
def check_db_integrity(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
|
||||||
|
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
db = get_db_file(path)
|
db = get_db_file(path)
|
||||||
|
|
||||||
if not are_keys_unique(path / db, 'backlogs', 'path'):
|
if not are_keys_unique(path / db, 'backlogs', 'path'):
|
||||||
|
|
@ -47,6 +87,17 @@ def check_db_integrity(path: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
||||||
|
"""
|
||||||
|
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
|
||||||
|
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
meas_paths: list[str]
|
||||||
|
List of measurement paths to check.
|
||||||
|
"""
|
||||||
needed_data: dict[str, list[str]] = {}
|
needed_data: dict[str, list[str]] = {}
|
||||||
for mpath in meas_paths:
|
for mpath in meas_paths:
|
||||||
file = mpath.split("::")[0]
|
file = mpath.split("::")[0]
|
||||||
|
|
@ -72,6 +123,15 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
||||||
|
|
||||||
|
|
||||||
def check_db_file_links(path: Path) -> None:
|
def check_db_file_links(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
|
||||||
|
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
db = get_db_file(path)
|
db = get_db_file(path)
|
||||||
search_expr = "SELECT path FROM 'backlogs'"
|
search_expr = "SELECT path FROM 'backlogs'"
|
||||||
conn = sqlite3.connect(path / db)
|
conn = sqlite3.connect(path / db)
|
||||||
|
|
@ -80,6 +140,14 @@ def check_db_file_links(path: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
def check_path_and_config(path: Path) -> None:
|
def check_path_and_config(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether the given path exists and the cinfigureation file can be found.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
|
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
|
||||||
config_path = path / CONFIG_FILENAME
|
config_path = path / CONFIG_FILENAME
|
||||||
|
|
@ -88,6 +156,14 @@ def check_path_and_config(path: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
def check_config_validity(path: Path) -> None:
|
def check_config_validity(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether the configuration file of the given corrlib-dataset path is valid.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
config_path = path / CONFIG_FILENAME
|
config_path = path / CONFIG_FILENAME
|
||||||
if os.path.exists(config_path):
|
if os.path.exists(config_path):
|
||||||
|
|
@ -107,8 +183,20 @@ def check_config_validity(path: Path) -> None:
|
||||||
if not all(has_path_opts):
|
if not all(has_path_opts):
|
||||||
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
|
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
|
||||||
|
|
||||||
|
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
|
||||||
|
if not all(has_paths):
|
||||||
|
raise FileNotFoundError("one of the paths needed by the configuration file is not present.")
|
||||||
|
|
||||||
|
|
||||||
def full_integrity_check(path: Path) -> None:
|
def full_integrity_check(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Aggregate all checks for easy validation of the backlog-library.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
check_path_and_config(path)
|
check_path_and_config(path)
|
||||||
print("Path and config-file exist:\t✅")
|
print("Path and config-file exist:\t✅")
|
||||||
check_config_validity(path)
|
check_config_validity(path)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue