merge with develop

2025-06-30 16:59:27 +02:00 · 2022-02-08 11:16:20 +01:00 · 2022-02-08 11:16:20 +01:00 · 71fe86b8ba
commit 71fe86b8ba
parent 4ae5188f91 a6126c84a8
59 changed files with 5367 additions and 1798 deletions
--- a/pyerrors/input/hadrons.py
+++ b/pyerrors/input/hadrons.py
@ -1,6 +1,7 @@
 import os
 import h5py
 import numpy as np
+from collections import Counter
 from ..obs import Obs, CObs
 from ..correlators import Corr

@ -32,6 +33,10 @@ def _get_files(path, filestem, idl):
            filtered_files.append(line)
            cnfg_numbers.append(no)

+    if idl:
+        if Counter(list(idl)) != Counter(cnfg_numbers):
+            raise Exception("Not all configurations specified in idl found, configurations " + str(list(Counter(list(idl)) - Counter(cnfg_numbers))) + " are missing.")
+
    # Check that configurations are evenly spaced
    dc = np.unique(np.diff(cnfg_numbers))
    if np.any(dc < 0):
@ -58,16 +63,13 @@ def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson', idl=No
    meson : str
        label of the meson to be extracted, standard value meson_0 which
        corresponds to the pseudoscalar pseudoscalar two-point function.
-    tree : str
-        Label of the upmost directory in the hdf5 file, default 'meson'
-        for outputs of the Meson module. Can be altered to read input
-        from other modules with similar structures.
    idl : range
        If specified only configurations in the given range are read in.
    """

    files, idx = _get_files(path, filestem, idl)

+    tree = meson.rsplit('_')[0]
    corr_data = []
    infos = []
    for hd5_file in files:
@ -155,7 +157,7 @@ def read_ExternalLeg_hd5(path, filestem, ens_id, idl=None):
        raw_data = file['ExternalLeg/corr'][0][0].view('complex')
        corr_data.append(raw_data)
        if mom is None:
-            mom = np.array(str(file['ExternalLeg/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int)
+            mom = np.array(str(file['ExternalLeg/info'].attrs['pIn'])[3:-2].strip().split(), dtype=float)
        file.close()
    corr_data = np.array(corr_data)

@ -200,9 +202,9 @@ def read_Bilinear_hd5(path, filestem, ens_id, idl=None):
            raw_data = file['Bilinear/Bilinear_' + str(i) + '/corr'][0][0].view('complex')
            corr_data[name].append(raw_data)
            if mom_in is None:
-                mom_in = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int)
+                mom_in = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(), dtype=float)
            if mom_out is None:
-                mom_out = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int)
+                mom_out = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(), dtype=float)

        file.close()

@ -265,9 +267,9 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]):
                raw_data = file[tree + str(i) + '/corr'][0][0].view('complex')
                corr_data[name].append(raw_data)
                if mom_in is None:
-                    mom_in = np.array(str(file[tree + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int)
+                    mom_in = np.array(str(file[tree + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(), dtype=float)
                if mom_out is None:
-                    mom_out = np.array(str(file[tree + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int)
+                    mom_out = np.array(str(file[tree + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(), dtype=float)

        file.close()

--- a/pyerrors/input/json.py
+++ b/pyerrors/input/json.py
@ -6,8 +6,11 @@ import socket
 import datetime
 import platform
 import warnings
+import re
 from ..obs import Obs
 from ..covobs import Covobs
+from ..correlators import Corr
+from ..misc import _assert_equal_properties
 from .. import version as pyerrorsversion


@ -18,8 +21,8 @@ def create_json_string(ol, description='', indent=1):
    Parameters
    ----------
    ol : list
-        List of objects that will be exported. At the moments, these objects can be
-        either of: Obs, list, numpy.ndarray.
+        List of objects that will be exported. At the moment, these objects can be
+        either of: Obs, list, numpy.ndarray, Corr.
        All Obs inside a structure have to be defined on the same set of configurations.
    description : str
        Optional string that describes the contents of the json file.
@ -103,20 +106,6 @@ def create_json_string(ol, description='', indent=1):
            dl.append(ed)
        return dl

-    def _assert_equal_properties(ol, otype=Obs):
-        for o in ol:
-            if not isinstance(o, otype):
-                raise Exception("Wrong data type in list.")
-        for o in ol[1:]:
-            if not ol[0].is_merged == o.is_merged:
-                raise Exception("All Obs in list have to be defined on the same set of configs.")
-            if not ol[0].reweighted == o.reweighted:
-                raise Exception("All Obs in list have to have the same property 'reweighted'.")
-            if not ol[0].e_content == o.e_content:
-                raise Exception("All Obs in list have to be defined on the same set of configs.")
-            if not ol[0].idl == o.idl:
-                raise Exception("All Obs in list have to be defined on the same set of configurations.")
-
    def write_Obs_to_dict(o):
        d = {}
        d['type'] = 'Obs'
@ -173,12 +162,44 @@ def create_json_string(ol, description='', indent=1):
            d['cdata'] = cdata
        return d

+    def _nan_Obs_like(obs):
+        samples = []
+        names = []
+        idl = []
+        for key, value in obs.idl.items():
+            samples.append([np.nan] * len(value))
+            names.append(key)
+            idl.append(value)
+        my_obs = Obs(samples, names, idl)
+        my_obs.reweighted = obs.reweighted
+        my_obs.is_merged = obs.is_merged
+        return my_obs
+
+    def write_Corr_to_dict(my_corr):
+        first_not_none = next(i for i, j in enumerate(my_corr.content) if np.all(j))
+        dummy_array = np.empty((my_corr.N, my_corr.N), dtype=object)
+        dummy_array[:] = _nan_Obs_like(my_corr.content[first_not_none].ravel()[0])
+        content = [o if o is not None else dummy_array for o in my_corr.content]
+        dat = write_Array_to_dict(np.array(content, dtype=object))
+        dat['type'] = 'Corr'
+        corr_meta_data = str(my_corr.tag)
+        if 'tag' in dat.keys():
+            dat['tag'].append(corr_meta_data)
+        else:
+            dat['tag'] = [corr_meta_data]
+        taglist = dat['tag']
+        dat['tag'] = {}  # tag is now a dictionary, that contains the previous taglist in the key "tag"
+        dat['tag']['tag'] = taglist
+        if my_corr.prange is not None:
+            dat['tag']['prange'] = my_corr.prange
+        return dat
+
    if not isinstance(ol, list):
        ol = [ol]

    d = {}
    d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__)
-    d['version'] = '0.1'
+    d['version'] = '0.2'
    d['who'] = getpass.getuser()
    d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z')
    d['host'] = socket.gethostname() + ', ' + platform.platform()
@ -193,6 +214,10 @@ def create_json_string(ol, description='', indent=1):
            d['obsdata'].append(write_List_to_dict(io))
        elif isinstance(io, np.ndarray):
            d['obsdata'].append(write_Array_to_dict(io))
+        elif isinstance(io, Corr):
+            d['obsdata'].append(write_Corr_to_dict(io))
+        else:
+            raise Exception("Unkown datatype.")

    jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False)

@ -212,6 +237,7 @@ def create_json_string(ol, description='', indent=1):
        return '\n'.join(split)

    jsonstring = remove_quotationmarks(jsonstring)
+    jsonstring = jsonstring.replace('nan', 'NaN')
    return jsonstring


@ -221,8 +247,8 @@ def dump_to_json(ol, fname, description='', indent=1, gz=True):
    Parameters
    ----------
    ol : list
-        List of objects that will be exported. At the moments, these objects can be
-        either of: Obs, list, numpy.ndarray.
+        List of objects that will be exported. At the moment, these objects can be
+        either of: Obs, list, numpy.ndarray, Corr.
        All Obs inside a structure have to be defined on the same set of configurations.
    fname : str
        Filename of the output file.
@ -255,7 +281,7 @@ def dump_to_json(ol, fname, description='', indent=1, gz=True):
 def import_json_string(json_string, verbose=True, full_output=False):
    """Reconstruct a list of Obs or structures containing Obs from a json string.

-    The following structures are supported: Obs, list, numpy.ndarray
+    The following structures are supported: Obs, list, numpy.ndarray, Corr
    If the list contains only one element, it is unpacked from the list.

    Parameters
@ -374,6 +400,33 @@ def import_json_string(json_string, verbose=True, full_output=False):
            ret[-1].tag = taglist[i]
        return np.reshape(ret, layout)

+    def get_Corr_from_dict(o):
+        if isinstance(o.get('tag'), list):  # supports the old way
+            taglist = o.get('tag')  # This had to be modified to get the taglist from the dictionary
+            temp_prange = None
+        elif isinstance(o.get('tag'), dict):
+            tagdic = o.get('tag')
+            taglist = tagdic['tag']
+            if 'prange' in tagdic:
+                temp_prange = tagdic['prange']
+            else:
+                temp_prange = None
+        else:
+            raise Exception("The tag is not a list or dict")
+
+        corr_tag = taglist[-1]
+        tmp_o = o
+        tmp_o['tag'] = taglist[:-1]
+        if len(tmp_o['tag']) == 0:
+            del tmp_o['tag']
+        dat = get_Array_from_dict(tmp_o)
+        my_corr = Corr([None if np.isnan(o.ravel()[0].value) else o for o in list(dat)])
+        if corr_tag != 'None':
+            my_corr.tag = corr_tag
+
+        my_corr.prange = temp_prange
+        return my_corr
+
    json_dict = json.loads(json_string)

    prog = json_dict.get('program', '')
@ -400,6 +453,10 @@ def import_json_string(json_string, verbose=True, full_output=False):
            ol.append(get_List_from_dict(io))
        elif io['type'] == 'Array':
            ol.append(get_Array_from_dict(io))
+        elif io['type'] == 'Corr':
+            ol.append(get_Corr_from_dict(io))
+        else:
+            raise Exception("Unkown datatype.")

    if full_output:
        retd = {}
@ -420,9 +477,9 @@ def import_json_string(json_string, verbose=True, full_output=False):


 def load_json(fname, verbose=True, gz=True, full_output=False):
-    """Import a list of Obs or structures containing Obs from a .json.gz file.
+    """Import a list of Obs or structures containing Obs from a .json(.gz) file.

-    The following structures are supported: Obs, list, numpy.ndarray
+    The following structures are supported: Obs, list, numpy.ndarray, Corr
    If the list contains only one element, it is unpacked from the list.

    Parameters
@ -451,3 +508,215 @@ def load_json(fname, verbose=True, gz=True, full_output=False):
            d = fin.read()

    return import_json_string(d, verbose, full_output)
+
+
+def _ol_from_dict(ind, reps='DICTOBS'):
+    """Convert a dictionary of Obs objects to a list and a dictionary that contains
+    placeholders instead of the Obs objects.
+
+    Parameters
+    ----------
+    ind : dict
+        Dict of JSON valid structures and objects that will be exported.
+        At the moment, these object can be either of: Obs, list, numpy.ndarray, Corr.
+        All Obs inside a structure have to be defined on the same set of configurations.
+    reps : str
+        Specify the structure of the placeholder in exported dict to be reps[0-9]+.
+    """
+
+    obstypes = (Obs, Corr, np.ndarray)
+
+    if not reps.isalnum():
+        raise Exception('Placeholder string has to be alphanumeric!')
+    ol = []
+    counter = 0
+
+    def dict_replace_obs(d):
+        nonlocal ol
+        nonlocal counter
+        x = {}
+        for k, v in d.items():
+            if isinstance(v, dict):
+                v = dict_replace_obs(v)
+            elif isinstance(v, list) and all([isinstance(o, Obs) for o in v]):
+                v = obslist_replace_obs(v)
+            elif isinstance(v, list):
+                v = list_replace_obs(v)
+            elif isinstance(v, obstypes):
+                ol.append(v)
+                v = reps + '%d' % (counter)
+                counter += 1
+            elif isinstance(v, str):
+                if bool(re.match(r'%s[0-9]+' % (reps), v)):
+                    raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (v, reps))
+            x[k] = v
+        return x
+
+    def list_replace_obs(li):
+        nonlocal ol
+        nonlocal counter
+        x = []
+        for e in li:
+            if isinstance(e, list):
+                e = list_replace_obs(e)
+            elif isinstance(e, list) and all([isinstance(o, Obs) for o in e]):
+                e = obslist_replace_obs(e)
+            elif isinstance(e, dict):
+                e = dict_replace_obs(e)
+            elif isinstance(e, obstypes):
+                ol.append(e)
+                e = reps + '%d' % (counter)
+                counter += 1
+            elif isinstance(e, str):
+                if bool(re.match(r'%s[0-9]+' % (reps), e)):
+                    raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (e, reps))
+            x.append(e)
+        return x
+
+    def obslist_replace_obs(li):
+        nonlocal ol
+        nonlocal counter
+        il = []
+        for e in li:
+            il.append(e)
+
+        ol.append(il)
+        x = reps + '%d' % (counter)
+        counter += 1
+        return x
+
+    nd = dict_replace_obs(ind)
+
+    return ol, nd
+
+
+def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True):
+    """Export a dict of Obs or structures containing Obs to a .json(.gz) file
+
+    Parameters
+    ----------
+    od : dict
+        Dict of JSON valid structures and objects that will be exported.
+        At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr.
+        All Obs inside a structure have to be defined on the same set of configurations.
+    fname : str
+        Filename of the output file.
+    description : str
+        Optional string that describes the contents of the json file.
+    indent : int
+        Specify the indentation level of the json file. None or 0 is permissible and
+        saves disk space.
+    reps : str
+        Specify the structure of the placeholder in exported dict to be reps[0-9]+.
+    gz : bool
+        If True, the output is a gzipped json. If False, the output is a json file.
+    """
+
+    if not isinstance(od, dict):
+        raise Exception('od has to be a dictionary. Did you want to use dump_to_json?')
+
+    infostring = ('This JSON file contains a python dictionary that has been parsed to a list of structures. '
+                  'OBSDICT contains the dictionary, where Obs or other structures have been replaced by '
+                  '' + reps + '[0-9]+. The field description contains the additional description of this JSON file. '
+                  'This file may be parsed to a dict with the pyerrors routine load_json_dict.')
+
+    desc_dict = {'INFO': infostring, 'OBSDICT': {}, 'description': description}
+    ol, desc_dict['OBSDICT'] = _ol_from_dict(od, reps=reps)
+
+    dump_to_json(ol, fname, description=desc_dict, indent=indent, gz=gz)
+
+
+def _od_from_list_and_dict(ol, ind, reps='DICTOBS'):
+    """Parse a list of Obs or structures containing Obs and an accompanying
+    dict, where the structures have been replaced by placeholders to a
+    dict that contains the structures.
+
+    The following structures are supported: Obs, list, numpy.ndarray, Corr
+
+    Parameters
+    ----------
+    ol : list
+        List of objects -
+        At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr.
+        All Obs inside a structure have to be defined on the same set of configurations.
+    ind : dict
+        Dict that defines the structure of the resulting dict and contains placeholders
+    reps : str
+        Specify the structure of the placeholder in imported dict to be reps[0-9]+.
+    """
+    if not reps.isalnum():
+        raise Exception('Placeholder string has to be alphanumeric!')
+
+    counter = 0
+
+    def dict_replace_string(d):
+        nonlocal counter
+        nonlocal ol
+        x = {}
+        for k, v in d.items():
+            if isinstance(v, dict):
+                v = dict_replace_string(v)
+            elif isinstance(v, list):
+                v = list_replace_string(v)
+            elif isinstance(v, str) and bool(re.match(r'%s[0-9]+' % (reps), v)):
+                index = int(v[len(reps):])
+                v = ol[index]
+                counter += 1
+            x[k] = v
+        return x
+
+    def list_replace_string(li):
+        nonlocal counter
+        nonlocal ol
+        x = []
+        for e in li:
+            if isinstance(e, list):
+                e = list_replace_string(e)
+            elif isinstance(e, dict):
+                e = dict_replace_string(e)
+            elif isinstance(e, str) and bool(re.match(r'%s[0-9]+' % (reps), e)):
+                index = int(e[len(reps):])
+                e = ol[index]
+                counter += 1
+            x.append(e)
+        return x
+
+    nd = dict_replace_string(ind)
+
+    if counter == 0:
+        raise Exception('No placeholder has been replaced! Check if reps is set correctly.')
+
+    return nd
+
+
+def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'):
+    """Import a dict of Obs or structures containing Obs from a .json(.gz) file.
+
+    The following structures are supported: Obs, list, numpy.ndarray, Corr
+
+    Parameters
+    ----------
+    fname : str
+        Filename of the input file.
+    verbose : bool
+        Print additional information that was written to the file.
+    gz : bool
+        If True, assumes that data is gzipped. If False, assumes JSON file.
+    full_output : bool
+        If True, a dict containing auxiliary information and the data is returned.
+        If False, only the data is returned.
+    reps : str
+        Specify the structure of the placeholder in imported dict to be reps[0-9]+.
+    """
+    indata = load_json(fname, verbose=verbose, gz=gz, full_output=True)
+    description = indata['description']['description']
+    indict = indata['description']['OBSDICT']
+    ol = indata['obsdata']
+    od = _od_from_list_and_dict(ol, indict, reps=reps)
+
+    if full_output:
+        indata['description'] = description
+        indata['obsdata'] = od
+        return indata
+    else:
+        return od
--- a/pyerrors/input/misc.py
+++ b/pyerrors/input/misc.py
@ -1,6 +1,3 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import os
 import fnmatch
 import re
@ -12,11 +9,12 @@ from ..obs import Obs
 def read_pbp(path, prefix, **kwargs):
    """Read pbp format from given folder structure. Returns a list of length nrw

-    Keyword arguments
-    -----------------
-    r_start -- list which contains the first config to be read for each replicum
-    r_stop -- list which contains the last config to be read for each replicum
-
+    Parameters
+    ----------
+    r_start : list
+        list which contains the first config to be read for each replicum
+    r_stop : list
+        list which contains the last config to be read for each replicum
    """

    extract_nfct = 1
@ -66,7 +64,6 @@ def read_pbp(path, prefix, **kwargs):
        tmp_array = []
        with open(path + '/' + ls[rep], 'rb') as fp:

-            # header
            t = fp.read(4)  # number of reweighting factors
            if rep == 0:
                nrw = struct.unpack('i', t)[0]
@ -74,7 +71,7 @@ def read_pbp(path, prefix, **kwargs):
                    deltas.append([])
            else:
                if nrw != struct.unpack('i', t)[0]:
-                    raise Exception('Error: different number of reweighting factors for replicum', rep)
+                    raise Exception('Error: different number of factors for replicum', rep)

            for k in range(nrw):
                tmp_array.append([])
--- a/pyerrors/input/openQCD.py
+++ b/pyerrors/input/openQCD.py
@ -1,11 +1,11 @@
-#!/usr/bin/env python
-# coding: utf-8
-
 import os
 import fnmatch
 import re
 import struct
 import numpy as np  # Thinly-wrapped numpy
+import warnings
+import matplotlib.pyplot as plt
+from matplotlib import gridspec
 from ..obs import Obs
 from ..fits import fit_lin

@ -15,16 +15,31 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):

    Parameters
    ----------
+    path : str
+        path that contains the data files
+    prefix : str
+        all files in path that start with prefix are considered as input files.
+        May be used together postfix to consider only special file endings.
+        Prefix is ignored, if the keyword 'files' is used.
    version : str
        version of openQCD, default 2.0
+    names : list
+        list of names that is assigned to the data according according
+        to the order in the file list. Use careful, if you do not provide file names!
    r_start : list
        list which contains the first config to be read for each replicum
    r_stop : list
        list which contains the last config to be read for each replicum
+    r_step : int
+        integer that defines a fixed step size between two measurements (in units of configs)
+        If not given, r_step=1 is assumed.
    postfix : str
        postfix of the file to read, e.g. '.ms1' for openQCD-files
-    idl_offsets : list
-        offsets to the idl range of obs. Useful for the case that the measurements of rwms are only starting at cfg. 20
+    files : list
+        list which contains the filenames to be read. No automatic detection of
+        files performed if given.
+    print_err : bool
+        Print additional information that is useful for debugging.
    """
    known_oqcd_versions = ['1.4', '1.6', '2.0']
    if not (version in known_oqcd_versions):
@ -44,7 +59,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    if 'files' in kwargs:
        ls = kwargs.get('files')
    else:
-        # Exclude files with different names
        for exc in ls:
            if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
                ls = list(set(ls) - set([exc]))
@ -56,8 +70,7 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
        r_start = kwargs.get('r_start')
        if len(r_start) != replica:
            raise Exception('r_start does not match number of replicas')
-        # Adjust Configuration numbering to python index
-        r_start = [o - 1 if o else None for o in r_start]
+        r_start = [o if o else None for o in r_start]
    else:
        r_start = [None] * replica

@ -68,16 +81,22 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    else:
        r_stop = [None] * replica

+    if 'r_step' in kwargs:
+        r_step = kwargs.get('r_step')
+    else:
+        r_step = 1
+
    print('Read reweighting factors from', prefix[:-1], ',',
          replica, 'replica', end='')

-    # Adjust replica names to new bookmarking system
    if names is None:
        rep_names = []
        for entry in ls:
            truncated_entry = entry.split('.')[0]
            idx = truncated_entry.index('r')
            rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
+    else:
+        rep_names = names

    print_err = 0
    if 'print_err' in kwargs:
@ -86,11 +105,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):

    deltas = []

+    configlist = []
+    r_start_index = []
+    r_stop_index = []
+
    for rep in range(replica):
        tmp_array = []
        with open(path + '/' + ls[rep], 'rb') as fp:

-            # header
            t = fp.read(4)  # number of reweighting factors
            if rep == 0:
                nrw = struct.unpack('i', t)[0]
@ -99,7 +121,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for k in range(nrw):
                    deltas.append([])
            else:
-                # little weird if-clause due to the /2 operation needed.
                if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
                    raise Exception('Error: different number of reweighting factors for replicum', rep)

@ -112,8 +133,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for i in range(nrw):
                    t = fp.read(4)
                    nfct.append(struct.unpack('i', t)[0])
-                # print('nfct: ', nfct) # Hasenbusch factor,
-                # 1 for rat reweighting
            else:
                for i in range(nrw):
                    nfct.append(1)
@ -126,13 +145,13 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                if not struct.unpack('i', fp.read(4))[0] == 0:
                    print('something is wrong!')

-            # body
+            configlist.append([])
            while 0 < 1:
                t = fp.read(4)
                if len(t) < 4:
                    break
-                if print_err:
-                    config_no = struct.unpack('i', t)
+                config_no = struct.unpack('i', t)[0]
+                configlist[-1].append(config_no)
                for i in range(nrw):
                    if(version == '2.0'):
                        tmpd = _read_array_openQCD2(fp)
@ -163,8 +182,32 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                                print('Partial factor:', tmp_nfct)
                    tmp_array[i].append(tmp_nfct)

+            if r_start[rep] is None:
+                r_start_index.append(0)
+            else:
+                try:
+                    r_start_index.append(configlist[-1].index(r_start[rep]))
+                except ValueError:
+                    raise Exception('Config %d not in file with range [%d, %d]' % (
+                        r_start[rep], configlist[-1][0], configlist[-1][-1])) from None
+
+            if r_stop[rep] is None:
+                r_stop_index.append(len(configlist[-1]) - 1)
+            else:
+                try:
+                    r_stop_index.append(configlist[-1].index(r_stop[rep]))
+                except ValueError:
+                    raise Exception('Config %d not in file with range [%d, %d]' % (
+                        r_stop[rep], configlist[-1][0], configlist[-1][-1])) from None
+
            for k in range(nrw):
-                deltas[k].append(tmp_array[k][r_start[rep]:r_stop[rep]])
+                deltas[k].append(tmp_array[k][r_start_index[rep]:r_stop_index[rep]][::r_step])
+
+    if np.any([len(np.unique(np.diff(cl))) != 1 for cl in configlist]):
+        raise Exception('Irregular spaced data in input file!', [len(np.unique(np.diff(cl))) for cl in configlist])
+    stepsizes = [list(np.unique(np.diff(cl)))[0] for cl in configlist]
+    if np.any([step != 1 for step in stepsizes]):
+        warnings.warn('Stepsize between configurations is greater than one!' + str(stepsizes), RuntimeWarning)

    print(',', nrw, 'reweighting factors with', nsrc, 'sources')
    if "idl_offsets" in kwargs:
@ -172,15 +215,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    else:
        idl_offsets = np.ones(nrw, dtype=int)
    result = []
+    idl = [range(configlist[rep][r_start_index[rep]], configlist[rep][r_stop_index[rep]], r_step) for rep in range(replica)]
    for t in range(nrw):
-        idl = []
-        for rep in range(replica):
-            idl.append(range(idl_offsets[rep], len(deltas[t][rep] + idl_offsets[rep])))
-        if names is None:
-            result.append(Obs(deltas[t], rep_names, idl=idl))
-        else:
-            print(names)
-            result.append(Obs(deltas[t], names, idl=idl))
+        result.append(Obs(deltas[t], rep_names, idl=idl))
    return result


@ -193,7 +230,12 @@ def extract_t0(path, prefix, dtr_read, xmin,
    The data around the zero crossing of t^2<E> - 0.3
    is fitted with a linear function
    from which the exact root is extracted.
-    Only works with openQCD v 1.2.
+    Only works with openQCD
+
+    It is assumed that one measurement is performed for each config.
+    If this is not the case, the resulting idl, as well as the handling
+    of r_start, r_stop and r_step is wrong and the user has to correct
+    this in the resulting observable.

    Parameters
    ----------
@ -215,10 +257,26 @@ def extract_t0(path, prefix, dtr_read, xmin,
        crossing to be included in the linear fit. (Default: 5)
    r_start : list
        list which contains the first config to be read for each replicum.
-    r_stop: list
+    r_stop : list
        list which contains the last config to be read for each replicum.
+    r_step : int
+        integer that defines a fixed step size between two measurements (in units of configs)
+        If not given, r_step=1 is assumed.
    plaquette : bool
        If true extract the plaquette estimate of t0 instead.
+    names : list
+        list of names that is assigned to the data according according
+        to the order in the file list. Use careful, if you do not provide file names!
+    files : list
+        list which contains the filenames to be read. No automatic detection of
+        files performed if given.
+    plot_fit : bool
+        If true, the fit for the extraction of t0 is shown together with the data.
+    assume_thermalization : bool
+        If True: If the first record divided by the distance between two measurements is larger than
+        1, it is assumed that this is due to thermalization and the first measurement belongs
+        to the first config (default).
+        If False: The config numbers are assumed to be traj_number // difference
    """

    ls = []
@ -229,20 +287,21 @@ def extract_t0(path, prefix, dtr_read, xmin,
    if not ls:
        raise Exception('Error, directory not found')

-    # Exclude files with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
+    if 'files' in kwargs:
+        ls = kwargs.get('files')
+    else:
+        for exc in ls:
+            if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
+                ls = list(set(ls) - set([exc]))
+        if len(ls) > 1:
+            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
    replica = len(ls)

    if 'r_start' in kwargs:
        r_start = kwargs.get('r_start')
        if len(r_start) != replica:
            raise Exception('r_start does not match number of replicas')
-        # Adjust Configuration numbering to python index
-        r_start = [o - 1 if o else None for o in r_start]
+        r_start = [o if o else None for o in r_start]
    else:
        r_start = [None] * replica

@ -253,14 +312,31 @@ def extract_t0(path, prefix, dtr_read, xmin,
    else:
        r_stop = [None] * replica

+    if 'r_step' in kwargs:
+        r_step = kwargs.get('r_step')
+    else:
+        r_step = 1
+
    print('Extract t0 from', prefix, ',', replica, 'replica')

+    if 'names' in kwargs:
+        rep_names = kwargs.get('names')
+    else:
+        rep_names = []
+        for entry in ls:
+            truncated_entry = entry.split('.')[0]
+            idx = truncated_entry.index('r')
+            rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
+
    Ysum = []

+    configlist = []
+    r_start_index = []
+    r_stop_index = []
+
    for rep in range(replica):

        with open(path + '/' + ls[rep], 'rb') as fp:
-            # Read header
            t = fp.read(12)
            header = struct.unpack('iii', t)
            if rep == 0:
@ -279,12 +355,13 @@ def extract_t0(path, prefix, dtr_read, xmin,

            Ysl = []

-            # Read body
+            configlist.append([])
            while 0 < 1:
                t = fp.read(4)
                if(len(t) < 4):
                    break
                nc = struct.unpack('i', t)[0]
+                configlist[-1].append(nc)

                t = fp.read(8 * tmax * (nn + 1))
                if kwargs.get('plaquette'):
@ -302,6 +379,38 @@ def extract_t0(path, prefix, dtr_read, xmin,
                             current + tmax - xmin])
                            for current in range(0, len(item), tmax)])

+        diffmeas = configlist[-1][-1] - configlist[-1][-2]
+        configlist[-1] = [item // diffmeas for item in configlist[-1]]
+        if kwargs.get('assume_thermalization', True) and configlist[-1][0] > 1:
+            warnings.warn('Assume thermalization and that the first measurement belongs to the first config.')
+            offset = configlist[-1][0] - 1
+            configlist[-1] = [item - offset for item in configlist[-1]]
+
+        if r_start[rep] is None:
+            r_start_index.append(0)
+        else:
+            try:
+                r_start_index.append(configlist[-1].index(r_start[rep]))
+            except ValueError:
+                raise Exception('Config %d not in file with range [%d, %d]' % (
+                    r_start[rep], configlist[-1][0], configlist[-1][-1])) from None
+
+        if r_stop[rep] is None:
+            r_stop_index.append(len(configlist[-1]) - 1)
+        else:
+            try:
+                r_stop_index.append(configlist[-1].index(r_stop[rep]))
+            except ValueError:
+                raise Exception('Config %d not in file with range [%d, %d]' % (
+                    r_stop[rep], configlist[-1][0], configlist[-1][-1])) from None
+
+    if np.any([len(np.unique(np.diff(cl))) != 1 for cl in configlist]):
+        raise Exception('Irregular spaced data in input file!', [len(np.unique(np.diff(cl))) for cl in configlist])
+    stepsizes = [list(np.unique(np.diff(cl)))[0] for cl in configlist]
+    if np.any([step != 1 for step in stepsizes]):
+        warnings.warn('Stepsize between configurations is greater than one!' + str(stepsizes), RuntimeWarning)
+
+    idl = [range(configlist[rep][r_start_index[rep]], configlist[rep][r_stop_index[rep]], r_step) for rep in range(replica)]
    t2E_dict = {}
    for n in range(nn + 1):
        samples = []
@ -309,8 +418,8 @@ def extract_t0(path, prefix, dtr_read, xmin,
            samples.append([])
            for cnfg in rep:
                samples[-1].append(cnfg[n])
-            samples[-1] = samples[-1][r_start[nrep]:r_stop[nrep]]
-        new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
+            samples[-1] = samples[-1][r_start_index[nrep]:r_stop_index[nrep]][::r_step]
+        new_obs = Obs(samples, rep_names, idl=idl)
        t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3

    zero_crossing = np.argmax(np.array(
@ -323,32 +432,62 @@ def extract_t0(path, prefix, dtr_read, xmin,
    [o.gamma_method() for o in y]

    fit_result = fit_lin(x, y)
+
+    if kwargs.get('plot_fit'):
+        plt.figure()
+        gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
+        ax0 = plt.subplot(gs[0])
+        xmore = list(t2E_dict.keys())[zero_crossing - fit_range - 2: zero_crossing + fit_range + 2]
+        ymore = list(t2E_dict.values())[zero_crossing - fit_range - 2: zero_crossing + fit_range + 2]
+        [o.gamma_method() for o in ymore]
+        ax0.errorbar(xmore, [yi.value for yi in ymore], yerr=[yi.dvalue for yi in ymore], fmt='x')
+        xplot = np.linspace(np.min(x), np.max(x))
+        yplot = [fit_result[0] + fit_result[1] * xi for xi in xplot]
+        [yi.gamma_method() for yi in yplot]
+        ax0.fill_between(xplot, y1=[yi.value - yi.dvalue for yi in yplot], y2=[yi.value + yi.dvalue for yi in yplot])
+        retval = (-fit_result[0] / fit_result[1])
+        retval.gamma_method()
+        ylim = ax0.get_ylim()
+        ax0.fill_betweenx(ylim, x1=retval.value - retval.dvalue, x2=retval.value + retval.dvalue, color='gray', alpha=0.4)
+        ax0.set_ylim(ylim)
+        ax0.set_ylabel(r'$t^2 \langle E(t) \rangle - 0.3 $')
+        xlim = ax0.get_xlim()
+
+        fit_res = [fit_result[0] + fit_result[1] * xi for xi in x]
+        residuals = (np.asarray([o.value for o in y]) - [o.value for o in fit_res]) / np.asarray([o.dvalue for o in y])
+        ax1 = plt.subplot(gs[1])
+        ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
+        ax1.tick_params(direction='out')
+        ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
+        ax1.axhline(y=0.0, ls='--', color='k')
+        ax1.fill_between(xlim, -1.0, 1.0, alpha=0.1, facecolor='k')
+        ax1.set_xlim(xlim)
+        ax1.set_ylabel('Residuals')
+        ax1.set_xlabel(r'$t/a^2$')
+
+        plt.show()
    return -fit_result[0] / fit_result[1]


 def _parse_array_openQCD2(d, n, size, wa, quadrupel=False):
    arr = []
    if d == 2:
-        tot = 0
-        for i in range(n[d - 1] - 1):
+        for i in range(n[0]):
+            tmp = wa[i * n[1]:(i + 1) * n[1]]
            if quadrupel:
-                tmp = wa[tot:n[d - 1]]
                tmp2 = []
-                for i in range(len(tmp)):
-                    if i % 2 == 0:
-                        tmp2.append(tmp[i])
+                for j in range(0, len(tmp), 2):
+                    tmp2.append(tmp[j])
                arr.append(tmp2)
            else:
-                arr.append(np.asarray(wa[tot:n[d - 1]]))
+                arr.append(np.asarray(tmp))
+
+    else:
+        raise Exception('Only two-dimensional arrays supported!')
+
    return arr


-# mimic the read_array routine of openQCD-2.0.
-# fp is the opened file handle
-# returns the dict array
-# at this point we only parse a 2d array
-# d = 2
-# n = [nfct[irw], 2*nsrc[irw]]
 def _read_array_openQCD2(fp):
    t = fp.read(4)
    d = struct.unpack('i', t)[0]
@ -380,42 +519,40 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):

    Parameters
    ----------
-    path:
+    path : str
        path of the measurement files
-    prefix:
+    prefix : str
        prefix of the measurement files, e.g. <prefix>_id0_r0.ms.dat
-    c: double
+    c : double
        Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L
-    dtr_cnfg: int
+    dtr_cnfg : int
        (optional) parameter that specifies the number of trajectories
        between two configs.
        if it is not set, the distance between two measurements
        in the file is assumed to be
        the distance between two configurations.
-    steps: int
+    steps : int
        (optional) (maybe only necessary for openQCD2.0)
        nt step size, guessed if not given
-    version: str
+    version : str
        version string of the openQCD (sfqcd) version used to create
        the ensemble
-    L: int
+    L : int
        spatial length of the lattice in L/a.
        HAS to be set if version != sfqcd, since openQCD does not provide
        this in the header
-    r_start: list
+    r_start : list
        offset of the first ensemble, making it easier to match
        later on with other Obs
-    r_stop: list
+    r_stop : list
        last configurations that need to be read (per replicum)
-    files: list
+    files : list
        specify the exact files that need to be read
-        from path, pratical if e.g. only one replicum is needed
-    names: list
+        from path, practical if e.g. only one replicum is needed
+    names : list
        Alternative labeling for replicas/ensembles.
        Has to have the appropriate length
    """
-    # one could read L from the header in case of sfQCD
-    # c = 0.35
    known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]

    if version not in known_versions:
@ -435,11 +572,9 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
        r_start = kwargs.get("r_start")
    if "r_stop" in kwargs:
        r_stop = kwargs.get("r_stop")
-    # if one wants to read specific files with this method...
    if "files" in kwargs:
        files = kwargs.get("files")
    else:
-        # find files in path
        found = []
        files = []
        for (dirpath, dirnames, filenames) in os.walk(path + "/"):
@ -450,14 +585,12 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
            if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
                files.append(f)
        print(files)
-    # now that we found our files, we dechiffer them...
    rep_names = []

    deltas = []
    idl = []
    for rep, file in enumerate(files):
        with open(path + "/" + file, "rb") as fp:
-            # header
            t = fp.read(12)
            header = struct.unpack('<iii', t)
            # step size in integration steps "dnms"
@ -486,7 +619,6 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
            Q = []
            ncs = []
            while 0 < 1:
-                # int nt
                t = fp.read(4)
                if(len(t) < 4):
                    break
@ -533,8 +665,6 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
        if len(Q_round) != len(ncs) // dtr_cnfg:
            raise Exception("qtops and ncs dont have the same length")

-        # replica = len(files)
-
        truncated_file = file[:-7]
        print(truncated_file)
        idl_start = 1
@ -562,17 +692,24 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
            rep_names = names
        deltas.append(np.array(Q_round))
        idl.append(range(idl_start, idl_stop))
-    # print(idl)
    result = Obs(deltas, rep_names, idl=idl)
    return result


 def read_qtop_sector(target=0, **kwargs):
-    """target: int
-            specifies the topological sector to be reweighted to (default 0)
-        q_top: Obs
-        alternatively takes args of read_qtop method as kwargs
+    """Constructs reweighting factors to a specified topological sector.
+
+    Parameters
+    ----------
+    target : int
+        Specifies the topological sector to be reweighted to (default 0)
+    q_top : Obs
+        Alternatively takes args of read_qtop method as kwargs
    """
+
+    if not isinstance(target, int):
+        raise Exception("'target' has to be an integer.")
+
    if "q_top" in kwargs:
        qtop = kwargs.get("q_top")
    else:
@ -603,7 +740,6 @@ def read_qtop_sector(target=0, **kwargs):
            dtr_cnfg = 1
        qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
                         version=version, **kwargs)
-    # unpack to original values, project onto target sector
    names = qtop.names
    print(names)
    print(qtop.deltas.keys())
--- a/pyerrors/input/utils.py
+++ b/pyerrors/input/utils.py
@ -2,11 +2,20 @@


 def check_idl(idl, che):
+    """Checks if list of configurations is contained in an idl
+
+    Parameters
+    ----------
+    idl : range or list
+        idl of the current replicum
+    che : list
+        list of configurations to be checked against
+    """
    missing = []
    for c in che:
        if c not in idl:
            missing.append(c)
-    # print missing such that it can directly be parsed to slurm terminal
+    # print missing configurations such that it can directly be parsed to slurm terminal
    if not (len(missing) == 0):
        print(len(missing), "configs missing")
        miss_str = str(missing[0])