Merge pull request 'integ/links' (#34) from integ/links into develop

Reviewed-on: https://www.kuhl-mann.de/git/git/jkuhl/corrlib/pulls/34
2026-04-17 18:09:17 +02:00 · 2026-04-17 18:09:17 +02:00 · 702010c8fc
commit 702010c8fc
parent 4411f63984 51ae53aa02
2 changed files with 50 additions and 4 deletions
--- a/corrlib/integrity.py
+++ b/corrlib/integrity.py
@ -3,6 +3,10 @@ from pathlib import Path
 from .tools import get_db_file
 import pandas as pd
 import sqlite3
+from .tracker import get
+import pyerrors.input.json as pj
+
+from typing import Any


 def has_valid_times(result: pd.Series) -> bool:
@ -38,10 +42,46 @@ def check_db_integrity(path: Path) -> None:
        if not has_valid_times(result):
            raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
    print("DB:\t✅")
+    return
+
+
+def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
+    needed_data: dict[str, list[str]] = {}
+    for mpath in meas_paths:
+        file = mpath.split("::")[0]
+        if file not in needed_data.keys():
+            needed_data[file] = []
+        key = mpath.split("::")[1]
+        needed_data[file].append(key)
+
+    totf = len(needed_data.keys())
+    for i, file in enumerate(needed_data.keys()):
+        print(f"Check against file {i}/{totf}: {file}")
+        get(path, Path(file))
+        filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
+        if not set(filedict.keys()).issubset(needed_data[file]):
+            for key in filedict.keys():
+                if key not in needed_data[file]:
+                    raise ValueError(f"Found unintended key {key} in file {file}.")
+        if not set(needed_data[file]).issubset(filedict.keys()):
+            for key in needed_data[file]:
+                if key not in filedict.keys():
+                    raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
+    print("Links:\t✅")
+    return
+
+
+def check_db_file_links(path: Path) -> None:
+    db = get_db_file(path)
+    search_expr = "SELECT path FROM 'backlogs'"
+    conn = sqlite3.connect(path / db)
+    results = pd.read_sql(search_expr, conn)['path'].values
+    _check_db2paths(path, list(results))


 def full_integrity_check(path: Path) -> None:
    check_db_integrity(path)
+    check_db_file_links(path)
    print("Full:\t✅")


--- a/corrlib/meas_io.py
+++ b/corrlib/meas_io.py
@ -11,6 +11,7 @@ from .tracker import get, save, unlock
 import shutil
 from typing import Any
 from pathlib import Path
+from .integrity import _check_db2paths


 CACHE_DIR = ".cache"
@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]:
    return load_records(path, [meas_path])[0]


-def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
+def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]:
    """
    Load a list of records by their paths.

@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
        Path of the correlator library.
    meas_paths: list[str]
        A list of the paths to the correlator in the backlog system.
-    perloaded: dict[str, Any]
-        The data that is already prelaoded. Of interest if data has alread been loaded in the same script.
+    preloaded: dict[str, Any]
+        The data that is already preloaded. Of interest if data has alread been loaded in the same script.
+    dry_run: bool
+        Do not load datda, just check whether we can reach the data we are interested in.

    Returns
    -------
-    retruned_data: list
+    returned_data: list
        The loaded records.
    """
+    if dry_run:
+        _check_db2paths(path, meas_paths)
+        return []
    needed_data: dict[str, list[str]] = {}
    for mpath in meas_paths:
        file = mpath.split("::")[0]