Merge pull request 'feat/minteg' (#39) from feat/minteg into develop
Reviewed-on: #39
This commit is contained in:
commit
3aba39fd9d
2 changed files with 153 additions and 6 deletions
|
|
@ -108,7 +108,7 @@ def find(
|
|||
),
|
||||
) -> None:
|
||||
"""
|
||||
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
|
||||
Find a record in the given backlog.
|
||||
"""
|
||||
results = find_record(path, ensemble, corr, code)
|
||||
if results.empty:
|
||||
|
|
@ -147,6 +147,9 @@ def check(path: Path = typer.Option(
|
|||
"-d",
|
||||
),
|
||||
) -> None:
|
||||
"""
|
||||
Check the integrity of the repository.
|
||||
"""
|
||||
full_integrity_check(path)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,34 @@
|
|||
import datetime as dt
|
||||
from pathlib import Path
|
||||
from .tools import get_db_file
|
||||
from .tools import get_db_file, CONFIG_FILENAME
|
||||
import pandas as pd
|
||||
import sqlite3
|
||||
from .tracker import get
|
||||
import pyerrors.input.json as pj
|
||||
import os
|
||||
from configparser import ConfigParser
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
path_opts = ['db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path']
|
||||
|
||||
|
||||
def has_valid_times(result: pd.Series) -> bool:
|
||||
"""
|
||||
Check, whether the result at hand has time-stamps that are sensible:
|
||||
A recored is created first, then updated, with both times laying in the past.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
result: pd.Series
|
||||
The result to check
|
||||
|
||||
Returns
|
||||
-------
|
||||
b: bool
|
||||
True, if the timestamps make sense.
|
||||
"""
|
||||
# we expect created_at <= updated_at <= now
|
||||
created_at = dt.datetime.fromisoformat(result['created_at'])
|
||||
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
||||
|
|
@ -20,15 +39,41 @@ def has_valid_times(result: pd.Series) -> bool:
|
|||
return True
|
||||
|
||||
def are_keys_unique(db: Path, table: str, col: str) -> bool:
|
||||
"""
|
||||
Check whether the strings listed in a column of a given table are unique.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db: Path
|
||||
The database to check.
|
||||
table: str
|
||||
The table to check.
|
||||
col: str
|
||||
The column to be checked for uniqueness.
|
||||
|
||||
Returns
|
||||
-------
|
||||
b: bool
|
||||
True, if the strings are unique.
|
||||
"""
|
||||
conn = sqlite3.connect(db)
|
||||
c = conn.cursor()
|
||||
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
||||
c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
||||
results = c.fetchall()[0]
|
||||
conn.close()
|
||||
return bool(results[0] == results[1])
|
||||
|
||||
|
||||
def check_db_integrity(path: Path) -> None:
|
||||
"""
|
||||
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
|
||||
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
db = get_db_file(path)
|
||||
|
||||
if not are_keys_unique(path / db, 'backlogs', 'path'):
|
||||
|
|
@ -41,11 +86,21 @@ def check_db_integrity(path: Path) -> None:
|
|||
for _, result in results.iterrows():
|
||||
if not has_valid_times(result):
|
||||
raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
|
||||
print("DB:\t✅")
|
||||
return
|
||||
|
||||
|
||||
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
||||
"""
|
||||
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
|
||||
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
meas_paths: list[str]
|
||||
List of measurement paths to check.
|
||||
"""
|
||||
needed_data: dict[str, list[str]] = {}
|
||||
for mpath in meas_paths:
|
||||
file = mpath.split("::")[0]
|
||||
|
|
@ -67,11 +122,19 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
|||
for key in needed_data[file]:
|
||||
if key not in filedict.keys():
|
||||
raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
|
||||
print("Links:\t✅")
|
||||
return
|
||||
|
||||
|
||||
def check_db_file_links(path: Path) -> None:
|
||||
"""
|
||||
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
|
||||
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
db = get_db_file(path)
|
||||
search_expr = "SELECT path FROM 'backlogs'"
|
||||
conn = sqlite3.connect(path / db)
|
||||
|
|
@ -79,9 +142,90 @@ def check_db_file_links(path: Path) -> None:
|
|||
_check_db2paths(path, list(results))
|
||||
|
||||
|
||||
def check_path_and_config(path: Path) -> None:
|
||||
"""
|
||||
Check whether the given path exists and the cinfigureation file can be found.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
|
||||
config_path = path / CONFIG_FILENAME
|
||||
if not os.path.exists(config_path):
|
||||
raise FileNotFoundError(f"Configuration file {config_path} not found.")
|
||||
|
||||
|
||||
def check_config_validity(path: Path) -> None:
|
||||
"""
|
||||
Check whether the configuration file of the given corrlib-dataset path is valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
config = ConfigParser()
|
||||
config_path = path / CONFIG_FILENAME
|
||||
if os.path.exists(config_path):
|
||||
config.read(config_path)
|
||||
else:
|
||||
raise FileNotFoundError("Configuration file not found.")
|
||||
|
||||
if config.has_section('core'):
|
||||
core_opts = ['version', 'tracker', 'cached']
|
||||
has_core_opts = [config.has_option('core', opt) for opt in core_opts]
|
||||
if not all(has_core_opts):
|
||||
raise ValueError("One of the options in the 'core' section ('version', 'tracker', 'cached') is missing.")
|
||||
|
||||
if config.has_section('paths'):
|
||||
has_path_opts = [config.has_option('paths', opt) for opt in path_opts]
|
||||
if not all(has_path_opts):
|
||||
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
|
||||
|
||||
|
||||
def check_paths(path: Path) -> None:
|
||||
"""
|
||||
Check whether all paths demanded by the 'paths' section of the configuration-file exist.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
config = ConfigParser()
|
||||
config_path = path / CONFIG_FILENAME
|
||||
if os.path.exists(config_path):
|
||||
config.read(config_path)
|
||||
else:
|
||||
raise FileNotFoundError("Configuration file not found.")
|
||||
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
|
||||
if not all(has_paths):
|
||||
raise FileNotFoundError("One of the paths specified in the configuration file is not present.")
|
||||
|
||||
|
||||
def full_integrity_check(path: Path) -> None:
|
||||
"""
|
||||
Aggregate all checks for easy validation of the backlog-library.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path: Path
|
||||
Path to the backlog-library to check.
|
||||
"""
|
||||
print("Run full integrity check...")
|
||||
check_path_and_config(path)
|
||||
print("(1/5) Path and config-file exist: ✅")
|
||||
check_config_validity(path)
|
||||
print("(2/5) Configuration is valid: ✅")
|
||||
check_paths(path)
|
||||
print("(3/5) Needed paths exist: ✅")
|
||||
check_db_integrity(path)
|
||||
print("(4/5) Database is sane: ✅")
|
||||
check_db_file_links(path)
|
||||
print("Full:\t✅")
|
||||
print("(5/5) DB2File and File2DB-links are sound: ✅")
|
||||
print("Full integrity check: ✅")
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue