Merge pull request 'feat/minteg' (#39) from feat/minteg into develop
Reviewed-on: #39
This commit is contained in:
commit
3aba39fd9d
2 changed files with 153 additions and 6 deletions
|
|
@ -108,7 +108,7 @@ def find(
|
||||||
),
|
),
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
|
Find a record in the given backlog.
|
||||||
"""
|
"""
|
||||||
results = find_record(path, ensemble, corr, code)
|
results = find_record(path, ensemble, corr, code)
|
||||||
if results.empty:
|
if results.empty:
|
||||||
|
|
@ -147,6 +147,9 @@ def check(path: Path = typer.Option(
|
||||||
"-d",
|
"-d",
|
||||||
),
|
),
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""
|
||||||
|
Check the integrity of the repository.
|
||||||
|
"""
|
||||||
full_integrity_check(path)
|
full_integrity_check(path)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,34 @@
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from .tools import get_db_file
|
from .tools import get_db_file, CONFIG_FILENAME
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from .tracker import get
|
from .tracker import get
|
||||||
import pyerrors.input.json as pj
|
import pyerrors.input.json as pj
|
||||||
|
import os
|
||||||
|
from configparser import ConfigParser
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
path_opts = ['db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path']
|
||||||
|
|
||||||
|
|
||||||
def has_valid_times(result: pd.Series) -> bool:
|
def has_valid_times(result: pd.Series) -> bool:
|
||||||
|
"""
|
||||||
|
Check, whether the result at hand has time-stamps that are sensible:
|
||||||
|
A recored is created first, then updated, with both times laying in the past.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
result: pd.Series
|
||||||
|
The result to check
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
b: bool
|
||||||
|
True, if the timestamps make sense.
|
||||||
|
"""
|
||||||
# we expect created_at <= updated_at <= now
|
# we expect created_at <= updated_at <= now
|
||||||
created_at = dt.datetime.fromisoformat(result['created_at'])
|
created_at = dt.datetime.fromisoformat(result['created_at'])
|
||||||
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
||||||
|
|
@ -20,15 +39,41 @@ def has_valid_times(result: pd.Series) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def are_keys_unique(db: Path, table: str, col: str) -> bool:
|
def are_keys_unique(db: Path, table: str, col: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check whether the strings listed in a column of a given table are unique.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db: Path
|
||||||
|
The database to check.
|
||||||
|
table: str
|
||||||
|
The table to check.
|
||||||
|
col: str
|
||||||
|
The column to be checked for uniqueness.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
b: bool
|
||||||
|
True, if the strings are unique.
|
||||||
|
"""
|
||||||
conn = sqlite3.connect(db)
|
conn = sqlite3.connect(db)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
c.execute(f"SELECT COUNT( DISTINCT CAST({col} AS nvarchar(4000))), COUNT({col}) FROM {table};")
|
||||||
results = c.fetchall()[0]
|
results = c.fetchall()[0]
|
||||||
conn.close()
|
conn.close()
|
||||||
return bool(results[0] == results[1])
|
return bool(results[0] == results[1])
|
||||||
|
|
||||||
|
|
||||||
def check_db_integrity(path: Path) -> None:
|
def check_db_integrity(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check intergrity of the database by checking the uniqueness of the record keys used to load the records
|
||||||
|
and ensuring that the timestamps of each record is sensible. Throws an error, if issues are detected.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
db = get_db_file(path)
|
db = get_db_file(path)
|
||||||
|
|
||||||
if not are_keys_unique(path / db, 'backlogs', 'path'):
|
if not are_keys_unique(path / db, 'backlogs', 'path'):
|
||||||
|
|
@ -41,11 +86,21 @@ def check_db_integrity(path: Path) -> None:
|
||||||
for _, result in results.iterrows():
|
for _, result in results.iterrows():
|
||||||
if not has_valid_times(result):
|
if not has_valid_times(result):
|
||||||
raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
|
raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
|
||||||
print("DB:\t✅")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
||||||
|
"""
|
||||||
|
Check whether for each record in the given by meas_paths, we can find the data in the file as we expect.
|
||||||
|
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
meas_paths: list[str]
|
||||||
|
List of measurement paths to check.
|
||||||
|
"""
|
||||||
needed_data: dict[str, list[str]] = {}
|
needed_data: dict[str, list[str]] = {}
|
||||||
for mpath in meas_paths:
|
for mpath in meas_paths:
|
||||||
file = mpath.split("::")[0]
|
file = mpath.split("::")[0]
|
||||||
|
|
@ -67,11 +122,19 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
|
||||||
for key in needed_data[file]:
|
for key in needed_data[file]:
|
||||||
if key not in filedict.keys():
|
if key not in filedict.keys():
|
||||||
raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
|
raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
|
||||||
print("Links:\t✅")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def check_db_file_links(path: Path) -> None:
|
def check_db_file_links(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether for each record in the given correlator library, we can find the data in the file as we expect.
|
||||||
|
Also check, whether there are unreachable records in the files. If either of the issues arise, throws an error.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
db = get_db_file(path)
|
db = get_db_file(path)
|
||||||
search_expr = "SELECT path FROM 'backlogs'"
|
search_expr = "SELECT path FROM 'backlogs'"
|
||||||
conn = sqlite3.connect(path / db)
|
conn = sqlite3.connect(path / db)
|
||||||
|
|
@ -79,9 +142,90 @@ def check_db_file_links(path: Path) -> None:
|
||||||
_check_db2paths(path, list(results))
|
_check_db2paths(path, list(results))
|
||||||
|
|
||||||
|
|
||||||
|
def check_path_and_config(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether the given path exists and the cinfigureation file can be found.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise FileNotFoundError(f"Corrlib path {path} does not exist.")
|
||||||
|
config_path = path / CONFIG_FILENAME
|
||||||
|
if not os.path.exists(config_path):
|
||||||
|
raise FileNotFoundError(f"Configuration file {config_path} not found.")
|
||||||
|
|
||||||
|
|
||||||
|
def check_config_validity(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether the configuration file of the given corrlib-dataset path is valid.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
|
config = ConfigParser()
|
||||||
|
config_path = path / CONFIG_FILENAME
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
config.read(config_path)
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError("Configuration file not found.")
|
||||||
|
|
||||||
|
if config.has_section('core'):
|
||||||
|
core_opts = ['version', 'tracker', 'cached']
|
||||||
|
has_core_opts = [config.has_option('core', opt) for opt in core_opts]
|
||||||
|
if not all(has_core_opts):
|
||||||
|
raise ValueError("One of the options in the 'core' section ('version', 'tracker', 'cached') is missing.")
|
||||||
|
|
||||||
|
if config.has_section('paths'):
|
||||||
|
has_path_opts = [config.has_option('paths', opt) for opt in path_opts]
|
||||||
|
if not all(has_path_opts):
|
||||||
|
raise ValueError("One of the options in the 'path' section ('db', 'projects_path', 'archive_path', 'toml_imports_path', 'import_scripts_path') is missing.")
|
||||||
|
|
||||||
|
|
||||||
|
def check_paths(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Check whether all paths demanded by the 'paths' section of the configuration-file exist.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
|
config = ConfigParser()
|
||||||
|
config_path = path / CONFIG_FILENAME
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
config.read(config_path)
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError("Configuration file not found.")
|
||||||
|
has_paths = [os.path.exists(path / config.get('paths', opt)) for opt in path_opts]
|
||||||
|
if not all(has_paths):
|
||||||
|
raise FileNotFoundError("One of the paths specified in the configuration file is not present.")
|
||||||
|
|
||||||
|
|
||||||
def full_integrity_check(path: Path) -> None:
|
def full_integrity_check(path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Aggregate all checks for easy validation of the backlog-library.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: Path
|
||||||
|
Path to the backlog-library to check.
|
||||||
|
"""
|
||||||
|
print("Run full integrity check...")
|
||||||
|
check_path_and_config(path)
|
||||||
|
print("(1/5) Path and config-file exist: ✅")
|
||||||
|
check_config_validity(path)
|
||||||
|
print("(2/5) Configuration is valid: ✅")
|
||||||
|
check_paths(path)
|
||||||
|
print("(3/5) Needed paths exist: ✅")
|
||||||
check_db_integrity(path)
|
check_db_integrity(path)
|
||||||
|
print("(4/5) Database is sane: ✅")
|
||||||
check_db_file_links(path)
|
check_db_file_links(path)
|
||||||
print("Full:\t✅")
|
print("(5/5) DB2File and File2DB-links are sound: ✅")
|
||||||
|
print("Full integrity check: ✅")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue