pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from .utils import sort_names, check_idl
  7import itertools
  8
  9
 10sep = "/"
 11
 12
 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
 14    """Read sfcf files from given folder structure.
 15
 16    Parameters
 17    ----------
 18    path : str
 19        Path to the sfcf files.
 20    prefix : str
 21        Prefix of the sfcf files.
 22    name : str
 23        Name of the correlation function to read.
 24    quarks : str
 25        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 26        for version 0.0 this does NOT need to be given with the typical " - "
 27        that is present in the output file,
 28        this is done automatically for this version
 29    corr_type : str
 30        Type of correlation function to read. Can be
 31        - 'bi' for boundary-inner
 32        - 'bb' for boundary-boundary
 33        - 'bib' for boundary-inner-boundary
 34    noffset : int
 35        Offset of the source (only relevant when wavefunctions are used)
 36    wf : int
 37        ID of wave function
 38    wf2 : int
 39        ID of the second wavefunction
 40        (only relevant for boundary-to-boundary correlation functions)
 41    im : bool
 42        if True, read imaginary instead of real part
 43        of the correlation function.
 44    names : list
 45        Alternative labeling for replicas/ensembles.
 46        Has to have the appropriate length
 47    ens_name : str
 48        replaces the name of the ensemble
 49    version: str
 50        version of SFCF, with which the measurement was done.
 51        if the compact output option (-c) was specified,
 52        append a "c" to the version (e.g. "1.0c")
 53        if the append output option (-a) was specified,
 54        append an "a" to the version
 55    cfg_separator : str
 56        String that separates the ensemble identifier from the configuration number (default 'n').
 57    replica: list
 58        list of replica to be read, default is all
 59    files: list
 60        list of files to be read per replica, default is all.
 61        for non-compact output format, hand the folders to be read here.
 62    check_configs: list[list[int]]
 63        list of list of supposed configs, eg. [range(1,1000)]
 64        for one replicum with 1000 configs
 65
 66    Returns
 67    -------
 68    result: list[Obs]
 69        list of Observables with length T, observable per timeslice.
 70        bb-type correlators have length 1.
 71    """
 72    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
 73                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
 74                          cfg_separator=cfg_separator, silent=silent, **kwargs)
 75    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
 76
 77
 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 79    """Read sfcf files from given folder structure.
 80
 81    Parameters
 82    ----------
 83    path : str
 84        Path to the sfcf files.
 85    prefix : str
 86        Prefix of the sfcf files.
 87    name : str
 88        Name of the correlation function to read.
 89    quarks_list : list[str]
 90        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 91        for version 0.0 this does NOT need to be given with the typical " - "
 92        that is present in the output file,
 93        this is done automatically for this version
 94    corr_type_list : list[str]
 95        Type of correlation function to read. Can be
 96        - 'bi' for boundary-inner
 97        - 'bb' for boundary-boundary
 98        - 'bib' for boundary-inner-boundary
 99    noffset_list : list[int]
100        Offset of the source (only relevant when wavefunctions are used)
101    wf_list : int
102        ID of wave function
103    wf2_list : list[int]
104        ID of the second wavefunction
105        (only relevant for boundary-to-boundary correlation functions)
106    im : bool
107        if True, read imaginary instead of real part
108        of the correlation function.
109    names : list
110        Alternative labeling for replicas/ensembles.
111        Has to have the appropriate length
112    ens_name : str
113        replaces the name of the ensemble
114    version: str
115        version of SFCF, with which the measurement was done.
116        if the compact output option (-c) was specified,
117        append a "c" to the version (e.g. "1.0c")
118        if the append output option (-a) was specified,
119        append an "a" to the version
120    cfg_separator : str
121        String that separates the ensemble identifier from the configuration number (default 'n').
122    replica: list
123        list of replica to be read, default is all
124    files: list[list[int]]
125        list of files to be read per replica, default is all.
126        for non-compact output format, hand the folders to be read here.
127    check_configs: list[list[int]]
128        list of list of supposed configs, eg. [range(1,1000)]
129        for one replicum with 1000 configs
130
131    Returns
132    -------
133    result: dict[list[Obs]]
134        dict with one of the following properties:
135        if keyed_out:
136            dict[key] = list[Obs]
137            where key has the form name/quarks/offset/wf/wf2
138        if not keyed_out:
139            dict[name][quarks][offset][wf][wf2] = list[Obs]
140    """
141
142    if kwargs.get('im'):
143        im = 1
144        part = 'imaginary'
145    else:
146        im = 0
147        part = 'real'
148
149    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
150
151    if version not in known_versions:
152        raise Exception("This version is not known!")
153    if (version[-1] == "c"):
154        appended = False
155        compact = True
156        version = version[:-1]
157    elif (version[-1] == "a"):
158        appended = True
159        compact = False
160        version = version[:-1]
161    else:
162        compact = False
163        appended = False
164    ls = []
165    if "replica" in kwargs:
166        ls = kwargs.get("replica")
167    else:
168        for (dirpath, dirnames, filenames) in os.walk(path):
169            if not appended:
170                ls.extend(dirnames)
171            else:
172                ls.extend(filenames)
173            break
174        if not ls:
175            raise Exception('Error, directory not found')
176        # Exclude folders with different names
177        for exc in ls:
178            if not fnmatch.fnmatch(exc, prefix + '*'):
179                ls = list(set(ls) - set([exc]))
180
181    if not appended:
182        ls = sort_names(ls)
183        replica = len(ls)
184
185    else:
186        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
187    if not silent:
188        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
189
190    if 'names' in kwargs:
191        new_names = kwargs.get('names')
192        if len(new_names) != len(set(new_names)):
193            raise Exception("names are not unique!")
194        if len(new_names) != replica:
195            raise Exception('names should have the length', replica)
196
197    else:
198        ens_name = kwargs.get("ens_name")
199        if not appended:
200            new_names = _get_rep_names(ls, ens_name)
201        else:
202            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
203        new_names = sort_names(new_names)
204
205    idl = []
206
207    noffset_list = [str(x) for x in noffset_list]
208    wf_list = [str(x) for x in wf_list]
209    wf2_list = [str(x) for x in wf2_list]
210
211    # setup dict structures
212    intern = {}
213    for name, corr_type in zip(name_list, corr_type_list):
214        intern[name] = {}
215        b2b, single = _extract_corr_type(corr_type)
216        intern[name]["b2b"] = b2b
217        intern[name]["single"] = single
218        intern[name]["spec"] = {}
219        for quarks in quarks_list:
220            intern[name]["spec"][quarks] = {}
221            for off in noffset_list:
222                intern[name]["spec"][quarks][off] = {}
223                for w in wf_list:
224                    intern[name]["spec"][quarks][off][w] = {}
225                    for w2 in wf2_list:
226                        intern[name]["spec"][quarks][off][w][w2] = {}
227                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
228
229    internal_ret_dict = {}
230    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
231    for key in needed_keys:
232        internal_ret_dict[key] = []
233
234    if not appended:
235        for i, item in enumerate(ls):
236            rep_path = path + '/' + item
237            if "files" in kwargs:
238                files = kwargs.get("files")
239                if isinstance(files, list):
240                    if all(isinstance(f, list) for f in files):
241                        files = files[i]
242                    elif all(isinstance(f, str) for f in files):
243                        files = files
244                    else:
245                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
246                else:
247                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
248
249            else:
250                files = []
251            sub_ls = _find_files(rep_path, prefix, compact, files)
252            rep_idl = []
253            no_cfg = len(sub_ls)
254            for cfg in sub_ls:
255                try:
256                    if compact:
257                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
258                    else:
259                        rep_idl.append(int(cfg[3:]))
260                except Exception:
261                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
262            rep_idl.sort()
263            # maybe there is a better way to print the idls
264            if not silent:
265                print(item, ':', no_cfg, ' configurations')
266            idl.append(rep_idl)
267            # here we have found all the files we need to look into.
268            if i == 0:
269                if version != "0.0" and compact:
270                    file = path + '/' + item + '/' + sub_ls[0]
271                for name in name_list:
272                    if version == "0.0" or not compact:
273                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
274                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
275                        specs = _key2specs(key)
276                        quarks = specs[0]
277                        off = specs[1]
278                        w = specs[2]
279                        w2 = specs[3]
280                        # here, we want to find the place within the file,
281                        # where the correlator we need is stored.
282                        # to do so, the pattern needed is put together
283                        # from the input values
284                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
285                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
286                        intern[name]["T"] = T
287                        # preparing the datastructure
288                        # the correlators get parsed into...
289                        deltas = []
290                        for j in range(intern[name]["T"]):
291                            deltas.append([])
292                        internal_ret_dict[sep.join([name, key])] = deltas
293
294            if compact:
295                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
296                for key in needed_keys:
297                    name = _key2specs(key)[0]
298                    for t in range(intern[name]["T"]):
299                        internal_ret_dict[key][t].append(rep_deltas[key][t])
300            else:
301                for key in needed_keys:
302                    rep_data = []
303                    name = _key2specs(key)[0]
304                    for subitem in sub_ls:
305                        cfg_path = path + '/' + item + '/' + subitem
306                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
307                        rep_data.append(file_data)
308                    print(rep_data)
309                    for t in range(intern[name]["T"]):
310                        internal_ret_dict[key][t].append([])
311                        for cfg in range(no_cfg):
312                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
313    else:
314        for key in needed_keys:
315            specs = _key2specs(key)
316            name = specs[0]
317            quarks = specs[1]
318            off = specs[2]
319            w = specs[3]
320            w2 = specs[4]
321            if "files" in kwargs:
322                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
323                    name_ls = kwargs.get("files")
324                else:
325                    raise TypeError("In append mode, files has to be of type list[str]!")
326            else:
327                name_ls = ls
328                for exc in name_ls:
329                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
330                        name_ls = list(set(name_ls) - set([exc]))
331            name_ls = sort_names(name_ls)
332            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
333            deltas = []
334            for rep, file in enumerate(name_ls):
335                rep_idl = []
336                filename = path + '/' + file
337                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
338                if rep == 0:
339                    intern[name]['T'] = T
340                    for t in range(intern[name]['T']):
341                        deltas.append([])
342                for t in range(intern[name]['T']):
343                    deltas[t].append(rep_data[t])
344                internal_ret_dict[key] = deltas
345                if name == name_list[0]:
346                    idl.append(rep_idl)
347
348    if kwargs.get("check_configs") is True:
349        if not silent:
350            print("Checking for missing configs...")
351        che = kwargs.get("check_configs")
352        if not (len(che) == len(idl)):
353            raise Exception("check_configs has to be the same length as replica!")
354        for r in range(len(idl)):
355            if not silent:
356                print("checking " + new_names[r])
357            check_idl(idl[r], che[r])
358        if not silent:
359            print("Done")
360
361    result_dict = {}
362    if keyed_out:
363        for key in needed_keys:
364            result = []
365            for t in range(intern[name]["T"]):
366                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
367            result_dict[key] = result
368    else:
369        for name in name_list:
370            result_dict[name] = {}
371            for quarks in quarks_list:
372                result_dict[name][quarks] = {}
373                for off in noffset_list:
374                    result_dict[name][quarks][off] = {}
375                    for w in wf_list:
376                        result_dict[name][quarks][off][w] = {}
377                        for w2 in wf2_list:
378                            key = _specs2key(name, quarks, off, w, w2)
379                            result = []
380                            for t in range(intern[name]["T"]):
381                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
382                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
383    return result_dict
384
385
386def _lists2key(*lists):
387    keys = []
388    for tup in itertools.product(*lists):
389        keys.append(sep.join(tup))
390    return keys
391
392
393def _key2specs(key):
394    return key.split(sep)
395
396
397def _specs2key(*specs):
398    return sep.join(specs)
399
400
401def _read_o_file(cfg_path, name, needed_keys, intern, version, im):
402    return_vals = {}
403    for key in needed_keys:
404        file = cfg_path + '/' + name
405        specs = _key2specs(key)
406        if specs[0] == name:
407            with open(file) as fp:
408                lines = fp.readlines()
409                quarks = specs[1]
410                off = specs[2]
411                w = specs[3]
412                w2 = specs[4]
413                T = intern[name]["T"]
414                start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
415                deltas = []
416                for line in lines[start_read:start_read + T]:
417                    floats = list(map(float, line.split()))
418                    if version == "0.0":
419                        deltas.append(floats[im - intern[name]["single"]])
420                    else:
421                        deltas.append(floats[1 + im - intern[name]["single"]])
422                return_vals[key] = deltas
423    return return_vals
424
425
426def _extract_corr_type(corr_type):
427    if corr_type == 'bb':
428        b2b = True
429        single = True
430    elif corr_type == 'bib':
431        b2b = True
432        single = False
433    else:
434        b2b = False
435        single = False
436    return b2b, single
437
438
439def _find_files(rep_path, prefix, compact, files=[]):
440    sub_ls = []
441    if not files == []:
442        files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
443    else:
444        for (dirpath, dirnames, filenames) in os.walk(rep_path):
445            if compact:
446                sub_ls.extend(filenames)
447            else:
448                sub_ls.extend(dirnames)
449            break
450        if compact:
451            for exc in sub_ls:
452                if not fnmatch.fnmatch(exc, prefix + '*'):
453                    sub_ls = list(set(sub_ls) - set([exc]))
454            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
455        else:
456            for exc in sub_ls:
457                if not fnmatch.fnmatch(exc, 'cfg*'):
458                    sub_ls = list(set(sub_ls) - set([exc]))
459            sub_ls.sort(key=lambda x: int(x[3:]))
460        files = sub_ls
461    if len(files) == 0:
462        raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.")
463    return files
464
465
466def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks):
467    if version == "0.0":
468        pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
469        if b2b:
470            pattern += ", wf_2 " + str(wf2)
471        qs = quarks.split(" ")
472        pattern += " : " + qs[0] + " - " + qs[1]
473    else:
474        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
475        if b2b:
476            pattern += '\nwf_2      ' + str(wf2)
477    return pattern
478
479
480def _find_correlator(file_name, version, pattern, b2b, silent=False):
481    T = 0
482
483    with open(file_name, "r") as my_file:
484
485        content = my_file.read()
486        match = re.search(pattern, content)
487        if match:
488            if version == "0.0":
489                start_read = content.count('\n', 0, match.start()) + 1
490                T = content.count('\n', start_read)
491            else:
492                start_read = content.count('\n', 0, match.start()) + 5 + b2b
493                end_match = re.search(r'\n\s*\n', content[match.start():])
494                T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
495            if not T > 0:
496                raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!")
497            if not silent:
498                print(T, 'entries, starting to read in line', start_read)
499
500        else:
501            raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.')
502
503    return start_read, T
504
505
506def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im):
507    return_vals = {}
508    with open(rep_path + cfg_file) as fp:
509        lines = fp.readlines()
510        for key in needed_keys:
511            keys = _key2specs(key)
512            name = keys[0]
513            quarks = keys[1]
514            off = keys[2]
515            w = keys[3]
516            w2 = keys[4]
517
518            T = intern[name]["T"]
519            start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
520            # check, if the correlator is in fact
521            # printed completely
522            if (start_read + T + 1 > len(lines)):
523                raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?")
524            corr_lines = lines[start_read - 6: start_read + T]
525            t_vals = []
526
527            if corr_lines[1 - intern[name]["b2b"]].strip() != 'name      ' + name:
528                raise Exception('Wrong format in file', cfg_file)
529
530            for k in range(6, T + 6):
531                floats = list(map(float, corr_lines[k].split()))
532                t_vals.append(floats[-2:][im])
533            return_vals[key] = t_vals
534    return return_vals
535
536
537def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im):
538    rep_path = path + '/' + rep + '/'
539    no_cfg = len(sub_ls)
540
541    return_vals = {}
542    for key in needed_keys:
543        name = _key2specs(key)[0]
544        deltas = []
545        for t in range(intern[name]["T"]):
546            deltas.append(np.zeros(no_cfg))
547        return_vals[key] = deltas
548
549    for cfg in range(no_cfg):
550        cfg_file = sub_ls[cfg]
551        cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im)
552        for key in needed_keys:
553            name = _key2specs(key)[0]
554            for t in range(intern[name]["T"]):
555                return_vals[key][t][cfg] = cfg_data[key][t]
556    return return_vals
557
558
559def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):
560    try:
561        idl = int(chunk[gauge_line].split(cfg_sep)[-1])
562    except Exception:
563        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
564
565    found_pat = ""
566    data = []
567    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
568        found_pat += li
569    if re.search(pattern, found_pat):
570        for t, line in enumerate(chunk[start_read:start_read + T]):
571            floats = list(map(float, line.split()))
572            data.append(floats[im + 1 - single])
573    return idl, data
574
575
576def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single):
577    with open(filename, 'r') as fp:
578        content = fp.readlines()
579        data_starts = []
580        for linenumber, line in enumerate(content):
581            if "[run]" in line:
582                data_starts.append(linenumber)
583        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
584            raise Exception("Irregularities in file structure found, not all runs have the same output length")
585        chunk = content[:data_starts[1]]
586        for linenumber, line in enumerate(chunk):
587            if line.startswith("gauge_name"):
588                gauge_line = linenumber
589            elif line.startswith("[correlator]"):
590                corr_line = linenumber
591                found_pat = ""
592                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
593                    found_pat += li
594                if re.search(pattern, found_pat):
595                    start_read = corr_line + 7 + b2b
596                    break
597                else:
598                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
599        endline = corr_line + 6 + b2b
600        while not chunk[endline] == "\n":
601            endline += 1
602        T = endline - start_read
603
604        # all other chunks should follow the same structure
605        rep_idl = []
606        rep_data = []
607
608        for cnfg in range(len(data_starts)):
609            start = data_starts[cnfg]
610            stop = start + data_starts[1]
611            chunk = content[start:stop]
612            idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single)
613            rep_idl.append(idl)
614            rep_data.append(data)
615
616        data = []
617
618        for t in range(T):
619            data.append([])
620            for c in range(len(rep_data)):
621                data[t].append(rep_data[c][t])
622        return T, rep_idl, data
623
624
625def _get_rep_names(ls, ens_name=None):
626    new_names = []
627    for entry in ls:
628        try:
629            idx = entry.index('r')
630        except Exception:
631            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
632
633        if ens_name:
634            new_names.append('ens_name' + '|' + entry[idx:])
635        else:
636            new_names.append(entry[:idx] + '|' + entry[idx:])
637    return new_names
638
639
640def _get_appended_rep_names(ls, prefix, name, ens_name=None):
641    new_names = []
642    for exc in ls:
643        if not fnmatch.fnmatch(exc, prefix + '*.' + name):
644            ls = list(set(ls) - set([exc]))
645    ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
646    for entry in ls:
647        myentry = entry[:-len(name) - 1]
648        try:
649            idx = myentry.index('r')
650        except Exception:
651            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
652
653        if ens_name:
654            new_names.append('ens_name' + '|' + entry[idx:])
655        else:
656            new_names.append(myentry[:idx] + '|' + myentry[idx:])
657    return new_names
sep = '/'
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs):
14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
15    """Read sfcf files from given folder structure.
16
17    Parameters
18    ----------
19    path : str
20        Path to the sfcf files.
21    prefix : str
22        Prefix of the sfcf files.
23    name : str
24        Name of the correlation function to read.
25    quarks : str
26        Label of the quarks used in the sfcf input file. e.g. "quark quark"
27        for version 0.0 this does NOT need to be given with the typical " - "
28        that is present in the output file,
29        this is done automatically for this version
30    corr_type : str
31        Type of correlation function to read. Can be
32        - 'bi' for boundary-inner
33        - 'bb' for boundary-boundary
34        - 'bib' for boundary-inner-boundary
35    noffset : int
36        Offset of the source (only relevant when wavefunctions are used)
37    wf : int
38        ID of wave function
39    wf2 : int
40        ID of the second wavefunction
41        (only relevant for boundary-to-boundary correlation functions)
42    im : bool
43        if True, read imaginary instead of real part
44        of the correlation function.
45    names : list
46        Alternative labeling for replicas/ensembles.
47        Has to have the appropriate length
48    ens_name : str
49        replaces the name of the ensemble
50    version: str
51        version of SFCF, with which the measurement was done.
52        if the compact output option (-c) was specified,
53        append a "c" to the version (e.g. "1.0c")
54        if the append output option (-a) was specified,
55        append an "a" to the version
56    cfg_separator : str
57        String that separates the ensemble identifier from the configuration number (default 'n').
58    replica: list
59        list of replica to be read, default is all
60    files: list
61        list of files to be read per replica, default is all.
62        for non-compact output format, hand the folders to be read here.
63    check_configs: list[list[int]]
64        list of list of supposed configs, eg. [range(1,1000)]
65        for one replicum with 1000 configs
66
67    Returns
68    -------
69    result: list[Obs]
70        list of Observables with length T, observable per timeslice.
71        bb-type correlators have length 1.
72    """
73    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
74                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
75                          cfg_separator=cfg_separator, silent=silent, **kwargs)
76    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.
def read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs):
 79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 80    """Read sfcf files from given folder structure.
 81
 82    Parameters
 83    ----------
 84    path : str
 85        Path to the sfcf files.
 86    prefix : str
 87        Prefix of the sfcf files.
 88    name : str
 89        Name of the correlation function to read.
 90    quarks_list : list[str]
 91        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 92        for version 0.0 this does NOT need to be given with the typical " - "
 93        that is present in the output file,
 94        this is done automatically for this version
 95    corr_type_list : list[str]
 96        Type of correlation function to read. Can be
 97        - 'bi' for boundary-inner
 98        - 'bb' for boundary-boundary
 99        - 'bib' for boundary-inner-boundary
100    noffset_list : list[int]
101        Offset of the source (only relevant when wavefunctions are used)
102    wf_list : int
103        ID of wave function
104    wf2_list : list[int]
105        ID of the second wavefunction
106        (only relevant for boundary-to-boundary correlation functions)
107    im : bool
108        if True, read imaginary instead of real part
109        of the correlation function.
110    names : list
111        Alternative labeling for replicas/ensembles.
112        Has to have the appropriate length
113    ens_name : str
114        replaces the name of the ensemble
115    version: str
116        version of SFCF, with which the measurement was done.
117        if the compact output option (-c) was specified,
118        append a "c" to the version (e.g. "1.0c")
119        if the append output option (-a) was specified,
120        append an "a" to the version
121    cfg_separator : str
122        String that separates the ensemble identifier from the configuration number (default 'n').
123    replica: list
124        list of replica to be read, default is all
125    files: list[list[int]]
126        list of files to be read per replica, default is all.
127        for non-compact output format, hand the folders to be read here.
128    check_configs: list[list[int]]
129        list of list of supposed configs, eg. [range(1,1000)]
130        for one replicum with 1000 configs
131
132    Returns
133    -------
134    result: dict[list[Obs]]
135        dict with one of the following properties:
136        if keyed_out:
137            dict[key] = list[Obs]
138            where key has the form name/quarks/offset/wf/wf2
139        if not keyed_out:
140            dict[name][quarks][offset][wf][wf2] = list[Obs]
141    """
142
143    if kwargs.get('im'):
144        im = 1
145        part = 'imaginary'
146    else:
147        im = 0
148        part = 'real'
149
150    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
151
152    if version not in known_versions:
153        raise Exception("This version is not known!")
154    if (version[-1] == "c"):
155        appended = False
156        compact = True
157        version = version[:-1]
158    elif (version[-1] == "a"):
159        appended = True
160        compact = False
161        version = version[:-1]
162    else:
163        compact = False
164        appended = False
165    ls = []
166    if "replica" in kwargs:
167        ls = kwargs.get("replica")
168    else:
169        for (dirpath, dirnames, filenames) in os.walk(path):
170            if not appended:
171                ls.extend(dirnames)
172            else:
173                ls.extend(filenames)
174            break
175        if not ls:
176            raise Exception('Error, directory not found')
177        # Exclude folders with different names
178        for exc in ls:
179            if not fnmatch.fnmatch(exc, prefix + '*'):
180                ls = list(set(ls) - set([exc]))
181
182    if not appended:
183        ls = sort_names(ls)
184        replica = len(ls)
185
186    else:
187        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
188    if not silent:
189        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
190
191    if 'names' in kwargs:
192        new_names = kwargs.get('names')
193        if len(new_names) != len(set(new_names)):
194            raise Exception("names are not unique!")
195        if len(new_names) != replica:
196            raise Exception('names should have the length', replica)
197
198    else:
199        ens_name = kwargs.get("ens_name")
200        if not appended:
201            new_names = _get_rep_names(ls, ens_name)
202        else:
203            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
204        new_names = sort_names(new_names)
205
206    idl = []
207
208    noffset_list = [str(x) for x in noffset_list]
209    wf_list = [str(x) for x in wf_list]
210    wf2_list = [str(x) for x in wf2_list]
211
212    # setup dict structures
213    intern = {}
214    for name, corr_type in zip(name_list, corr_type_list):
215        intern[name] = {}
216        b2b, single = _extract_corr_type(corr_type)
217        intern[name]["b2b"] = b2b
218        intern[name]["single"] = single
219        intern[name]["spec"] = {}
220        for quarks in quarks_list:
221            intern[name]["spec"][quarks] = {}
222            for off in noffset_list:
223                intern[name]["spec"][quarks][off] = {}
224                for w in wf_list:
225                    intern[name]["spec"][quarks][off][w] = {}
226                    for w2 in wf2_list:
227                        intern[name]["spec"][quarks][off][w][w2] = {}
228                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
229
230    internal_ret_dict = {}
231    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
232    for key in needed_keys:
233        internal_ret_dict[key] = []
234
235    if not appended:
236        for i, item in enumerate(ls):
237            rep_path = path + '/' + item
238            if "files" in kwargs:
239                files = kwargs.get("files")
240                if isinstance(files, list):
241                    if all(isinstance(f, list) for f in files):
242                        files = files[i]
243                    elif all(isinstance(f, str) for f in files):
244                        files = files
245                    else:
246                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
247                else:
248                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
249
250            else:
251                files = []
252            sub_ls = _find_files(rep_path, prefix, compact, files)
253            rep_idl = []
254            no_cfg = len(sub_ls)
255            for cfg in sub_ls:
256                try:
257                    if compact:
258                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
259                    else:
260                        rep_idl.append(int(cfg[3:]))
261                except Exception:
262                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
263            rep_idl.sort()
264            # maybe there is a better way to print the idls
265            if not silent:
266                print(item, ':', no_cfg, ' configurations')
267            idl.append(rep_idl)
268            # here we have found all the files we need to look into.
269            if i == 0:
270                if version != "0.0" and compact:
271                    file = path + '/' + item + '/' + sub_ls[0]
272                for name in name_list:
273                    if version == "0.0" or not compact:
274                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
275                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
276                        specs = _key2specs(key)
277                        quarks = specs[0]
278                        off = specs[1]
279                        w = specs[2]
280                        w2 = specs[3]
281                        # here, we want to find the place within the file,
282                        # where the correlator we need is stored.
283                        # to do so, the pattern needed is put together
284                        # from the input values
285                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
286                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
287                        intern[name]["T"] = T
288                        # preparing the datastructure
289                        # the correlators get parsed into...
290                        deltas = []
291                        for j in range(intern[name]["T"]):
292                            deltas.append([])
293                        internal_ret_dict[sep.join([name, key])] = deltas
294
295            if compact:
296                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
297                for key in needed_keys:
298                    name = _key2specs(key)[0]
299                    for t in range(intern[name]["T"]):
300                        internal_ret_dict[key][t].append(rep_deltas[key][t])
301            else:
302                for key in needed_keys:
303                    rep_data = []
304                    name = _key2specs(key)[0]
305                    for subitem in sub_ls:
306                        cfg_path = path + '/' + item + '/' + subitem
307                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
308                        rep_data.append(file_data)
309                    print(rep_data)
310                    for t in range(intern[name]["T"]):
311                        internal_ret_dict[key][t].append([])
312                        for cfg in range(no_cfg):
313                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
314    else:
315        for key in needed_keys:
316            specs = _key2specs(key)
317            name = specs[0]
318            quarks = specs[1]
319            off = specs[2]
320            w = specs[3]
321            w2 = specs[4]
322            if "files" in kwargs:
323                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
324                    name_ls = kwargs.get("files")
325                else:
326                    raise TypeError("In append mode, files has to be of type list[str]!")
327            else:
328                name_ls = ls
329                for exc in name_ls:
330                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
331                        name_ls = list(set(name_ls) - set([exc]))
332            name_ls = sort_names(name_ls)
333            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
334            deltas = []
335            for rep, file in enumerate(name_ls):
336                rep_idl = []
337                filename = path + '/' + file
338                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
339                if rep == 0:
340                    intern[name]['T'] = T
341                    for t in range(intern[name]['T']):
342                        deltas.append([])
343                for t in range(intern[name]['T']):
344                    deltas[t].append(rep_data[t])
345                internal_ret_dict[key] = deltas
346                if name == name_list[0]:
347                    idl.append(rep_idl)
348
349    if kwargs.get("check_configs") is True:
350        if not silent:
351            print("Checking for missing configs...")
352        che = kwargs.get("check_configs")
353        if not (len(che) == len(idl)):
354            raise Exception("check_configs has to be the same length as replica!")
355        for r in range(len(idl)):
356            if not silent:
357                print("checking " + new_names[r])
358            check_idl(idl[r], che[r])
359        if not silent:
360            print("Done")
361
362    result_dict = {}
363    if keyed_out:
364        for key in needed_keys:
365            result = []
366            for t in range(intern[name]["T"]):
367                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
368            result_dict[key] = result
369    else:
370        for name in name_list:
371            result_dict[name] = {}
372            for quarks in quarks_list:
373                result_dict[name][quarks] = {}
374                for off in noffset_list:
375                    result_dict[name][quarks][off] = {}
376                    for w in wf_list:
377                        result_dict[name][quarks][off][w] = {}
378                        for w2 in wf2_list:
379                            key = _specs2key(name, quarks, off, w, w2)
380                            result = []
381                            for t in range(intern[name]["T"]):
382                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
383                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
384    return result_dict

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type_list (list[str]): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
  • wf_list (int): ID of wave function
  • wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list[list[int]]): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]