pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from . import utils
  7
  8
  9def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 10    """Read sfcf c format from given folder structure.
 11
 12    Parameters
 13    ----------
 14    path : str
 15        Path to the sfcf files.
 16    prefix : str
 17        Prefix of the sfcf files.
 18    name : str
 19        Name of the correlation function to read.
 20    quarks : str
 21        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 22        for version 0.0 this does NOT need to be given with the typical " - "
 23        that is present in the output file,
 24        this is done automatically for this version
 25    corr_type : str
 26        Type of correlation function to read. Can be
 27        - 'bi' for boundary-inner
 28        - 'bb' for boundary-boundary
 29        - 'bib' for boundary-inner-boundary
 30    noffset : int
 31        Offset of the source (only relevant when wavefunctions are used)
 32    wf : int
 33        ID of wave function
 34    wf2 : int
 35        ID of the second wavefunction
 36        (only relevant for boundary-to-boundary correlation functions)
 37    im : bool
 38        if True, read imaginary instead of real part
 39        of the correlation function.
 40    names : list
 41        Alternative labeling for replicas/ensembles.
 42        Has to have the appropriate length
 43    ens_name : str
 44        replaces the name of the ensemble
 45    version: str
 46        version of SFCF, with which the measurement was done.
 47        if the compact output option (-c) was specified,
 48        append a "c" to the version (e.g. "1.0c")
 49        if the append output option (-a) was specified,
 50        append an "a" to the version
 51    cfg_separator : str
 52        String that separates the ensemble identifier from the configuration number (default 'n').
 53    replica: list
 54        list of replica to be read, default is all
 55    files: list
 56        list of files to be read per replica, default is all.
 57        for non-compact output format, hand the folders to be read here.
 58    check_configs:
 59        list of list of supposed configs, eg. [range(1,1000)]
 60        for one replicum with 1000 configs
 61    """
 62    if kwargs.get('im'):
 63        im = 1
 64        part = 'imaginary'
 65    else:
 66        im = 0
 67        part = 'real'
 68    if "replica" in kwargs:
 69        reps = kwargs.get("replica")
 70    if corr_type == 'bb':
 71        b2b = True
 72        single = True
 73    elif corr_type == 'bib':
 74        b2b = True
 75        single = False
 76    else:
 77        b2b = False
 78        single = False
 79    compact = True
 80    appended = False
 81    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 82
 83    if version not in known_versions:
 84        raise Exception("This version is not known!")
 85    if (version[-1] == "c"):
 86        appended = False
 87        compact = True
 88        version = version[:-1]
 89    elif (version[-1] == "a"):
 90        appended = True
 91        compact = False
 92        version = version[:-1]
 93    else:
 94        compact = False
 95        appended = False
 96    read = 0
 97    T = 0
 98    start = 0
 99    ls = []
100    if "replica" in kwargs:
101        ls = reps
102    else:
103        for (dirpath, dirnames, filenames) in os.walk(path):
104            if not appended:
105                ls.extend(dirnames)
106            else:
107                ls.extend(filenames)
108            break
109        if not ls:
110            raise Exception('Error, directory not found')
111        # Exclude folders with different names
112        for exc in ls:
113            if not fnmatch.fnmatch(exc, prefix + '*'):
114                ls = list(set(ls) - set([exc]))
115
116    if not appended:
117        if len(ls) > 1:
118            # New version, to cope with ids, etc.
119            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
120        replica = len(ls)
121    else:
122        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
123    print('Read', part, 'part of', name, 'from', prefix[:-1],
124          ',', replica, 'replica')
125    if 'names' in kwargs:
126        new_names = kwargs.get('names')
127        if len(new_names) != len(set(new_names)):
128            raise Exception("names are not unique!")
129        if len(new_names) != replica:
130            raise Exception('Names does not have the required length', replica)
131    else:
132        new_names = []
133        if not appended:
134            for entry in ls:
135                try:
136                    idx = entry.index('r')
137                except Exception:
138                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
139
140                if 'ens_name' in kwargs:
141                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
142                else:
143                    new_names.append(entry[:idx] + '|' + entry[idx:])
144        else:
145
146            for exc in ls:
147                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
148                    ls = list(set(ls) - set([exc]))
149            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
150            for entry in ls:
151                myentry = entry[:-len(name) - 1]
152                try:
153                    idx = myentry.index('r')
154                except Exception:
155                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
156
157                if 'ens_name' in kwargs:
158                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
159                else:
160                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
161    idl = []
162    if not appended:
163        for i, item in enumerate(ls):
164            sub_ls = []
165            if "files" in kwargs:
166                sub_ls = kwargs.get("files")
167                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
168            else:
169                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
170                    if compact:
171                        sub_ls.extend(filenames)
172                    else:
173                        sub_ls.extend(dirnames)
174                    break
175                if compact:
176                    for exc in sub_ls:
177                        if not fnmatch.fnmatch(exc, prefix + '*'):
178                            sub_ls = list(set(sub_ls) - set([exc]))
179                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
180                else:
181                    for exc in sub_ls:
182                        if not fnmatch.fnmatch(exc, 'cfg*'):
183                            sub_ls = list(set(sub_ls) - set([exc]))
184                    sub_ls.sort(key=lambda x: int(x[3:]))
185            rep_idl = []
186            no_cfg = len(sub_ls)
187            for cfg in sub_ls:
188                try:
189                    if compact:
190                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
191                    else:
192                        rep_idl.append(int(cfg[3:]))
193                except Exception:
194                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
195            rep_idl.sort()
196            # maybe there is a better way to print the idls
197            print(item, ':', no_cfg, ' configurations')
198            idl.append(rep_idl)
199            # here we have found all the files we need to look into.
200            if i == 0:
201                # here, we want to find the place within the file,
202                # where the correlator we need is stored.
203                # to do so, the pattern needed is put together
204                # from the input values
205                if version == "0.0":
206                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
207                    # if b2b, a second wf is needed
208                    if b2b:
209                        pattern += ", wf_2 " + str(wf2)
210                    qs = quarks.split(" ")
211                    pattern += " : " + qs[0] + " - " + qs[1]
212                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
213                    for k, line in enumerate(file):
214                        if read == 1 and not line.strip() and k > start + 1:
215                            break
216                        if read == 1 and k >= start:
217                            T += 1
218                        if pattern in line:
219                            read = 1
220                            start = k + 1
221                    print(str(T) + " entries found.")
222                    file.close()
223                else:
224                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
225                    if b2b:
226                        pattern += '\nwf_2      ' + str(wf2)
227                    # and the file is parsed through to find the pattern
228                    if compact:
229                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
230                    else:
231                        # for non-compactified versions of the files
232                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
233
234                    content = file.read()
235                    match = re.search(pattern, content)
236                    if match:
237                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
238                        end_match = re.search(r'\n\s*\n', content[match.start():])
239                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
240                        assert T > 0
241                        print(T, 'entries, starting to read in line', start_read)
242                        file.close()
243                    else:
244                        file.close()
245                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
246
247                # we found where the correlator
248                # that is to be read is in the files
249                # after preparing the datastructure
250                # the correlators get parsed into...
251                deltas = []
252                for j in range(T):
253                    deltas.append([])
254
255            for t in range(T):
256                deltas[t].append(np.zeros(no_cfg))
257            if compact:
258                for cfg in range(no_cfg):
259                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
260                        lines = fp.readlines()
261                        # check, if the correlator is in fact
262                        # printed completely
263                        if (start_read + T > len(lines)):
264                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
265                        # and start to read the correlator.
266                        # the range here is chosen like this,
267                        # since this allows for implementing
268                        # a security check for every read correlator later...
269                        for k in range(start_read - 6, start_read + T):
270                            if k == start_read - 5 - b2b:
271                                if lines[k].strip() != 'name      ' + name:
272                                    raise Exception('Wrong format', sub_ls[cfg])
273                            if (k >= start_read and k < start_read + T):
274                                floats = list(map(float, lines[k].split()))
275                                deltas[k - start_read][i][cfg] = floats[-2:][im]
276            else:
277                for cnfg, subitem in enumerate(sub_ls):
278                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
279                        # since the non-compatified files
280                        # are typically not so long,
281                        # we can iterate over the whole file.
282                        # here one can also implement the chekc from above.
283                        for k, line in enumerate(fp):
284                            if (k >= start_read and k < start_read + T):
285                                floats = list(map(float, line.split()))
286                                if version == "0.0":
287                                    deltas[k - start][i][cnfg] = floats[im - single]
288                                else:
289                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
290
291    else:
292        if "files" in kwargs:
293            ls = kwargs.get("files")
294        else:
295            for exc in ls:
296                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
297                    ls = list(set(ls) - set([exc]))
298                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
299        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
300        if b2b:
301            pattern += '\nwf_2      ' + str(wf2)
302        for rep, file in enumerate(ls):
303            rep_idl = []
304            with open(path + '/' + file, 'r') as fp:
305                content = fp.readlines()
306                data_starts = []
307                for linenumber, line in enumerate(content):
308                    if "[run]" in line:
309                        data_starts.append(linenumber)
310                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
311                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
312                chunk = content[:data_starts[1]]
313                for linenumber, line in enumerate(chunk):
314                    if line.startswith("gauge_name"):
315                        gauge_line = linenumber
316                    elif line.startswith("[correlator]"):
317                        corr_line = linenumber
318                        found_pat = ""
319                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
320                            found_pat += li
321                        if re.search(pattern, found_pat):
322                            start_read = corr_line + 7 + b2b
323                            break
324                endline = corr_line + 6 + b2b
325                while not chunk[endline] == "\n":
326                    endline += 1
327                T = endline - start_read
328                if rep == 0:
329                    deltas = []
330                    for t in range(T):
331                        deltas.append([])
332                for t in range(T):
333                    deltas[t].append(np.zeros(len(data_starts)))
334                # all other chunks should follow the same structure
335                for cnfg in range(len(data_starts)):
336                    start = data_starts[cnfg]
337                    stop = start + data_starts[1]
338                    chunk = content[start:stop]
339                    try:
340                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
341                    except Exception:
342                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
343
344                    found_pat = ""
345                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
346                        found_pat += li
347                    if re.search(pattern, found_pat):
348                        for t, line in enumerate(chunk[start_read:start_read + T]):
349                            floats = list(map(float, line.split()))
350                            deltas[t][rep][cnfg] = floats[im + 1 - single]
351            idl.append(rep_idl)
352
353    if "check_configs" in kwargs:
354        print("Checking for missing configs...")
355        che = kwargs.get("check_configs")
356        if not (len(che) == len(idl)):
357            raise Exception("check_configs has to be the same length as replica!")
358        for r in range(len(idl)):
359            print("checking " + new_names[r])
360            utils.check_idl(idl[r], che[r])
361        print("Done")
362    result = []
363    for t in range(T):
364        result.append(Obs(deltas[t], new_names, idl=idl))
365    return result
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', **kwargs):
 10def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 11    """Read sfcf c format from given folder structure.
 12
 13    Parameters
 14    ----------
 15    path : str
 16        Path to the sfcf files.
 17    prefix : str
 18        Prefix of the sfcf files.
 19    name : str
 20        Name of the correlation function to read.
 21    quarks : str
 22        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 23        for version 0.0 this does NOT need to be given with the typical " - "
 24        that is present in the output file,
 25        this is done automatically for this version
 26    corr_type : str
 27        Type of correlation function to read. Can be
 28        - 'bi' for boundary-inner
 29        - 'bb' for boundary-boundary
 30        - 'bib' for boundary-inner-boundary
 31    noffset : int
 32        Offset of the source (only relevant when wavefunctions are used)
 33    wf : int
 34        ID of wave function
 35    wf2 : int
 36        ID of the second wavefunction
 37        (only relevant for boundary-to-boundary correlation functions)
 38    im : bool
 39        if True, read imaginary instead of real part
 40        of the correlation function.
 41    names : list
 42        Alternative labeling for replicas/ensembles.
 43        Has to have the appropriate length
 44    ens_name : str
 45        replaces the name of the ensemble
 46    version: str
 47        version of SFCF, with which the measurement was done.
 48        if the compact output option (-c) was specified,
 49        append a "c" to the version (e.g. "1.0c")
 50        if the append output option (-a) was specified,
 51        append an "a" to the version
 52    cfg_separator : str
 53        String that separates the ensemble identifier from the configuration number (default 'n').
 54    replica: list
 55        list of replica to be read, default is all
 56    files: list
 57        list of files to be read per replica, default is all.
 58        for non-compact output format, hand the folders to be read here.
 59    check_configs:
 60        list of list of supposed configs, eg. [range(1,1000)]
 61        for one replicum with 1000 configs
 62    """
 63    if kwargs.get('im'):
 64        im = 1
 65        part = 'imaginary'
 66    else:
 67        im = 0
 68        part = 'real'
 69    if "replica" in kwargs:
 70        reps = kwargs.get("replica")
 71    if corr_type == 'bb':
 72        b2b = True
 73        single = True
 74    elif corr_type == 'bib':
 75        b2b = True
 76        single = False
 77    else:
 78        b2b = False
 79        single = False
 80    compact = True
 81    appended = False
 82    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 83
 84    if version not in known_versions:
 85        raise Exception("This version is not known!")
 86    if (version[-1] == "c"):
 87        appended = False
 88        compact = True
 89        version = version[:-1]
 90    elif (version[-1] == "a"):
 91        appended = True
 92        compact = False
 93        version = version[:-1]
 94    else:
 95        compact = False
 96        appended = False
 97    read = 0
 98    T = 0
 99    start = 0
100    ls = []
101    if "replica" in kwargs:
102        ls = reps
103    else:
104        for (dirpath, dirnames, filenames) in os.walk(path):
105            if not appended:
106                ls.extend(dirnames)
107            else:
108                ls.extend(filenames)
109            break
110        if not ls:
111            raise Exception('Error, directory not found')
112        # Exclude folders with different names
113        for exc in ls:
114            if not fnmatch.fnmatch(exc, prefix + '*'):
115                ls = list(set(ls) - set([exc]))
116
117    if not appended:
118        if len(ls) > 1:
119            # New version, to cope with ids, etc.
120            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
121        replica = len(ls)
122    else:
123        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
124    print('Read', part, 'part of', name, 'from', prefix[:-1],
125          ',', replica, 'replica')
126    if 'names' in kwargs:
127        new_names = kwargs.get('names')
128        if len(new_names) != len(set(new_names)):
129            raise Exception("names are not unique!")
130        if len(new_names) != replica:
131            raise Exception('Names does not have the required length', replica)
132    else:
133        new_names = []
134        if not appended:
135            for entry in ls:
136                try:
137                    idx = entry.index('r')
138                except Exception:
139                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
140
141                if 'ens_name' in kwargs:
142                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
143                else:
144                    new_names.append(entry[:idx] + '|' + entry[idx:])
145        else:
146
147            for exc in ls:
148                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
149                    ls = list(set(ls) - set([exc]))
150            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
151            for entry in ls:
152                myentry = entry[:-len(name) - 1]
153                try:
154                    idx = myentry.index('r')
155                except Exception:
156                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
157
158                if 'ens_name' in kwargs:
159                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
160                else:
161                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
162    idl = []
163    if not appended:
164        for i, item in enumerate(ls):
165            sub_ls = []
166            if "files" in kwargs:
167                sub_ls = kwargs.get("files")
168                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
169            else:
170                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
171                    if compact:
172                        sub_ls.extend(filenames)
173                    else:
174                        sub_ls.extend(dirnames)
175                    break
176                if compact:
177                    for exc in sub_ls:
178                        if not fnmatch.fnmatch(exc, prefix + '*'):
179                            sub_ls = list(set(sub_ls) - set([exc]))
180                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
181                else:
182                    for exc in sub_ls:
183                        if not fnmatch.fnmatch(exc, 'cfg*'):
184                            sub_ls = list(set(sub_ls) - set([exc]))
185                    sub_ls.sort(key=lambda x: int(x[3:]))
186            rep_idl = []
187            no_cfg = len(sub_ls)
188            for cfg in sub_ls:
189                try:
190                    if compact:
191                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
192                    else:
193                        rep_idl.append(int(cfg[3:]))
194                except Exception:
195                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
196            rep_idl.sort()
197            # maybe there is a better way to print the idls
198            print(item, ':', no_cfg, ' configurations')
199            idl.append(rep_idl)
200            # here we have found all the files we need to look into.
201            if i == 0:
202                # here, we want to find the place within the file,
203                # where the correlator we need is stored.
204                # to do so, the pattern needed is put together
205                # from the input values
206                if version == "0.0":
207                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
208                    # if b2b, a second wf is needed
209                    if b2b:
210                        pattern += ", wf_2 " + str(wf2)
211                    qs = quarks.split(" ")
212                    pattern += " : " + qs[0] + " - " + qs[1]
213                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
214                    for k, line in enumerate(file):
215                        if read == 1 and not line.strip() and k > start + 1:
216                            break
217                        if read == 1 and k >= start:
218                            T += 1
219                        if pattern in line:
220                            read = 1
221                            start = k + 1
222                    print(str(T) + " entries found.")
223                    file.close()
224                else:
225                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
226                    if b2b:
227                        pattern += '\nwf_2      ' + str(wf2)
228                    # and the file is parsed through to find the pattern
229                    if compact:
230                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
231                    else:
232                        # for non-compactified versions of the files
233                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
234
235                    content = file.read()
236                    match = re.search(pattern, content)
237                    if match:
238                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
239                        end_match = re.search(r'\n\s*\n', content[match.start():])
240                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
241                        assert T > 0
242                        print(T, 'entries, starting to read in line', start_read)
243                        file.close()
244                    else:
245                        file.close()
246                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
247
248                # we found where the correlator
249                # that is to be read is in the files
250                # after preparing the datastructure
251                # the correlators get parsed into...
252                deltas = []
253                for j in range(T):
254                    deltas.append([])
255
256            for t in range(T):
257                deltas[t].append(np.zeros(no_cfg))
258            if compact:
259                for cfg in range(no_cfg):
260                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
261                        lines = fp.readlines()
262                        # check, if the correlator is in fact
263                        # printed completely
264                        if (start_read + T > len(lines)):
265                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
266                        # and start to read the correlator.
267                        # the range here is chosen like this,
268                        # since this allows for implementing
269                        # a security check for every read correlator later...
270                        for k in range(start_read - 6, start_read + T):
271                            if k == start_read - 5 - b2b:
272                                if lines[k].strip() != 'name      ' + name:
273                                    raise Exception('Wrong format', sub_ls[cfg])
274                            if (k >= start_read and k < start_read + T):
275                                floats = list(map(float, lines[k].split()))
276                                deltas[k - start_read][i][cfg] = floats[-2:][im]
277            else:
278                for cnfg, subitem in enumerate(sub_ls):
279                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
280                        # since the non-compatified files
281                        # are typically not so long,
282                        # we can iterate over the whole file.
283                        # here one can also implement the chekc from above.
284                        for k, line in enumerate(fp):
285                            if (k >= start_read and k < start_read + T):
286                                floats = list(map(float, line.split()))
287                                if version == "0.0":
288                                    deltas[k - start][i][cnfg] = floats[im - single]
289                                else:
290                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
291
292    else:
293        if "files" in kwargs:
294            ls = kwargs.get("files")
295        else:
296            for exc in ls:
297                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
298                    ls = list(set(ls) - set([exc]))
299                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
300        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
301        if b2b:
302            pattern += '\nwf_2      ' + str(wf2)
303        for rep, file in enumerate(ls):
304            rep_idl = []
305            with open(path + '/' + file, 'r') as fp:
306                content = fp.readlines()
307                data_starts = []
308                for linenumber, line in enumerate(content):
309                    if "[run]" in line:
310                        data_starts.append(linenumber)
311                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
312                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
313                chunk = content[:data_starts[1]]
314                for linenumber, line in enumerate(chunk):
315                    if line.startswith("gauge_name"):
316                        gauge_line = linenumber
317                    elif line.startswith("[correlator]"):
318                        corr_line = linenumber
319                        found_pat = ""
320                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
321                            found_pat += li
322                        if re.search(pattern, found_pat):
323                            start_read = corr_line + 7 + b2b
324                            break
325                endline = corr_line + 6 + b2b
326                while not chunk[endline] == "\n":
327                    endline += 1
328                T = endline - start_read
329                if rep == 0:
330                    deltas = []
331                    for t in range(T):
332                        deltas.append([])
333                for t in range(T):
334                    deltas[t].append(np.zeros(len(data_starts)))
335                # all other chunks should follow the same structure
336                for cnfg in range(len(data_starts)):
337                    start = data_starts[cnfg]
338                    stop = start + data_starts[1]
339                    chunk = content[start:stop]
340                    try:
341                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
342                    except Exception:
343                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
344
345                    found_pat = ""
346                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
347                        found_pat += li
348                    if re.search(pattern, found_pat):
349                        for t, line in enumerate(chunk[start_read:start_read + T]):
350                            floats = list(map(float, line.split()))
351                            deltas[t][rep][cnfg] = floats[im + 1 - single]
352            idl.append(rep_idl)
353
354    if "check_configs" in kwargs:
355        print("Checking for missing configs...")
356        che = kwargs.get("check_configs")
357        if not (len(che) == len(idl)):
358            raise Exception("check_configs has to be the same length as replica!")
359        for r in range(len(idl)):
360            print("checking " + new_names[r])
361            utils.check_idl(idl[r], che[r])
362        print("Done")
363    result = []
364    for t in range(T):
365        result.append(Obs(deltas[t], new_names, idl=idl))
366    return result

Read sfcf c format from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs:: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs