Merge pull request 'integ/links' (#34) from integ/links into develop
Some checks failed
Mypy / mypy (push) Failing after 39s
Pytest / pytest (3.12) (push) Failing after 38s
Pytest / pytest (3.13) (push) Failing after 37s
Pytest / pytest (3.14) (push) Failing after 40s
Ruff / ruff (push) Failing after 38s

Reviewed-on: https://www.kuhl-mann.de/git/git/jkuhl/corrlib/pulls/34
This commit is contained in:
Justus Kuhlmann 2026-04-17 18:09:17 +02:00
commit 702010c8fc
2 changed files with 50 additions and 4 deletions

View file

@ -3,6 +3,10 @@ from pathlib import Path
from .tools import get_db_file
import pandas as pd
import sqlite3
from .tracker import get
import pyerrors.input.json as pj
from typing import Any
def has_valid_times(result: pd.Series) -> bool:
@ -38,10 +42,46 @@ def check_db_integrity(path: Path) -> None:
if not has_valid_times(result):
raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
print("DB:\t")
return
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
needed_data: dict[str, list[str]] = {}
for mpath in meas_paths:
file = mpath.split("::")[0]
if file not in needed_data.keys():
needed_data[file] = []
key = mpath.split("::")[1]
needed_data[file].append(key)
totf = len(needed_data.keys())
for i, file in enumerate(needed_data.keys()):
print(f"Check against file {i}/{totf}: {file}")
get(path, Path(file))
filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
if not set(filedict.keys()).issubset(needed_data[file]):
for key in filedict.keys():
if key not in needed_data[file]:
raise ValueError(f"Found unintended key {key} in file {file}.")
if not set(needed_data[file]).issubset(filedict.keys()):
for key in needed_data[file]:
if key not in filedict.keys():
raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
print("Links:\t")
return
def check_db_file_links(path: Path) -> None:
db = get_db_file(path)
search_expr = "SELECT path FROM 'backlogs'"
conn = sqlite3.connect(path / db)
results = pd.read_sql(search_expr, conn)['path'].values
_check_db2paths(path, list(results))
def full_integrity_check(path: Path) -> None:
check_db_integrity(path)
check_db_file_links(path)
print("Full:\t")

View file

@ -11,6 +11,7 @@ from .tracker import get, save, unlock
import shutil
from typing import Any
from pathlib import Path
from .integrity import _check_db2paths
CACHE_DIR = ".cache"
@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]:
return load_records(path, [meas_path])[0]
def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]:
"""
Load a list of records by their paths.
@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
Path of the correlator library.
meas_paths: list[str]
A list of the paths to the correlator in the backlog system.
perloaded: dict[str, Any]
The data that is already prelaoded. Of interest if data has alread been loaded in the same script.
preloaded: dict[str, Any]
The data that is already preloaded. Of interest if data has alread been loaded in the same script.
dry_run: bool
Do not load datda, just check whether we can reach the data we are interested in.
Returns
-------
retruned_data: list
returned_data: list
The loaded records.
"""
if dry_run:
_check_db2paths(path, meas_paths)
return []
needed_data: dict[str, list[str]] = {}
for mpath in meas_paths:
file = mpath.split("::")[0]