pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from .utils import sort_names, check_idl
  7import itertools
  8
  9
 10sep = "/"
 11
 12
 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
 14    """Read sfcf files from given folder structure.
 15
 16    Parameters
 17    ----------
 18    path : str
 19        Path to the sfcf files.
 20    prefix : str
 21        Prefix of the sfcf files.
 22    name : str
 23        Name of the correlation function to read.
 24    quarks : str
 25        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 26        for version 0.0 this does NOT need to be given with the typical " - "
 27        that is present in the output file,
 28        this is done automatically for this version
 29    corr_type : str
 30        Type of correlation function to read. Can be
 31        - 'bi' for boundary-inner
 32        - 'bb' for boundary-boundary
 33        - 'bib' for boundary-inner-boundary
 34    noffset : int
 35        Offset of the source (only relevant when wavefunctions are used)
 36    wf : int
 37        ID of wave function
 38    wf2 : int
 39        ID of the second wavefunction
 40        (only relevant for boundary-to-boundary correlation functions)
 41    im : bool
 42        if True, read imaginary instead of real part
 43        of the correlation function.
 44    names : list
 45        Alternative labeling for replicas/ensembles.
 46        Has to have the appropriate length
 47    ens_name : str
 48        replaces the name of the ensemble
 49    version: str
 50        version of SFCF, with which the measurement was done.
 51        if the compact output option (-c) was specified,
 52        append a "c" to the version (e.g. "1.0c")
 53        if the append output option (-a) was specified,
 54        append an "a" to the version
 55    cfg_separator : str
 56        String that separates the ensemble identifier from the configuration number (default 'n').
 57    replica: list
 58        list of replica to be read, default is all
 59    files: list
 60        list of files to be read per replica, default is all.
 61        for non-compact output format, hand the folders to be read here.
 62    check_configs: list[list[int]]
 63        list of list of supposed configs, eg. [range(1,1000)]
 64        for one replicum with 1000 configs
 65
 66    Returns
 67    -------
 68    result: list[Obs]
 69        list of Observables with length T, observable per timeslice.
 70        bb-type correlators have length 1.
 71    """
 72    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
 73                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
 74                          cfg_separator=cfg_separator, silent=silent, **kwargs)
 75    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
 76
 77
 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 79    """Read sfcf files from given folder structure.
 80
 81    Parameters
 82    ----------
 83    path : str
 84        Path to the sfcf files.
 85    prefix : str
 86        Prefix of the sfcf files.
 87    name : str
 88        Name of the correlation function to read.
 89    quarks_list : list[str]
 90        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 91        for version 0.0 this does NOT need to be given with the typical " - "
 92        that is present in the output file,
 93        this is done automatically for this version
 94    corr_type_list : list[str]
 95        Type of correlation function to read. Can be
 96        - 'bi' for boundary-inner
 97        - 'bb' for boundary-boundary
 98        - 'bib' for boundary-inner-boundary
 99    noffset_list : list[int]
100        Offset of the source (only relevant when wavefunctions are used)
101    wf_list : int
102        ID of wave function
103    wf2_list : list[int]
104        ID of the second wavefunction
105        (only relevant for boundary-to-boundary correlation functions)
106    im : bool
107        if True, read imaginary instead of real part
108        of the correlation function.
109    names : list
110        Alternative labeling for replicas/ensembles.
111        Has to have the appropriate length
112    ens_name : str
113        replaces the name of the ensemble
114    version: str
115        version of SFCF, with which the measurement was done.
116        if the compact output option (-c) was specified,
117        append a "c" to the version (e.g. "1.0c")
118        if the append output option (-a) was specified,
119        append an "a" to the version
120    cfg_separator : str
121        String that separates the ensemble identifier from the configuration number (default 'n').
122    replica: list
123        list of replica to be read, default is all
124    files: list[list[int]]
125        list of files to be read per replica, default is all.
126        for non-compact output format, hand the folders to be read here.
127    check_configs: list[list[int]]
128        list of list of supposed configs, eg. [range(1,1000)]
129        for one replicum with 1000 configs
130
131    Returns
132    -------
133    result: dict[list[Obs]]
134        dict with one of the following properties:
135        if keyed_out:
136            dict[key] = list[Obs]
137            where key has the form name/quarks/offset/wf/wf2
138        if not keyed_out:
139            dict[name][quarks][offset][wf][wf2] = list[Obs]
140    """
141
142    if kwargs.get('im'):
143        im = 1
144        part = 'imaginary'
145    else:
146        im = 0
147        part = 'real'
148
149    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
150
151    if version not in known_versions:
152        raise Exception("This version is not known!")
153    if (version[-1] == "c"):
154        appended = False
155        compact = True
156        version = version[:-1]
157    elif (version[-1] == "a"):
158        appended = True
159        compact = False
160        version = version[:-1]
161    else:
162        compact = False
163        appended = False
164    ls = []
165    if "replica" in kwargs:
166        ls = kwargs.get("replica")
167    else:
168        for (dirpath, dirnames, filenames) in os.walk(path):
169            if not appended:
170                ls.extend(dirnames)
171            else:
172                ls.extend(filenames)
173            break
174        if not ls:
175            raise Exception('Error, directory not found')
176        # Exclude folders with different names
177        for exc in ls:
178            if not fnmatch.fnmatch(exc, prefix + '*'):
179                ls = list(set(ls) - set([exc]))
180
181    if not appended:
182        ls = sort_names(ls)
183        replica = len(ls)
184
185    else:
186        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
187    if replica == 0:
188        raise Exception('No replica found in directory')
189    if not silent:
190        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
191
192    if 'names' in kwargs:
193        new_names = kwargs.get('names')
194        if len(new_names) != len(set(new_names)):
195            raise Exception("names are not unique!")
196        if len(new_names) != replica:
197            raise Exception('names should have the length', replica)
198
199    else:
200        ens_name = kwargs.get("ens_name")
201        if not appended:
202            new_names = _get_rep_names(ls, ens_name)
203        else:
204            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
205        new_names = sort_names(new_names)
206
207    idl = []
208
209    noffset_list = [str(x) for x in noffset_list]
210    wf_list = [str(x) for x in wf_list]
211    wf2_list = [str(x) for x in wf2_list]
212
213    # setup dict structures
214    intern = {}
215    for name, corr_type in zip(name_list, corr_type_list):
216        intern[name] = {}
217        b2b, single = _extract_corr_type(corr_type)
218        intern[name]["b2b"] = b2b
219        intern[name]["single"] = single
220        intern[name]["spec"] = {}
221        for quarks in quarks_list:
222            intern[name]["spec"][quarks] = {}
223            for off in noffset_list:
224                intern[name]["spec"][quarks][off] = {}
225                for w in wf_list:
226                    intern[name]["spec"][quarks][off][w] = {}
227                    for w2 in wf2_list:
228                        intern[name]["spec"][quarks][off][w][w2] = {}
229                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
230
231    internal_ret_dict = {}
232    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
233    for key in needed_keys:
234        internal_ret_dict[key] = []
235
236    if not appended:
237        for i, item in enumerate(ls):
238            rep_path = path + '/' + item
239            if "files" in kwargs:
240                files = kwargs.get("files")
241                if isinstance(files, list):
242                    if all(isinstance(f, list) for f in files):
243                        files = files[i]
244                    elif all(isinstance(f, str) for f in files):
245                        files = files
246                    else:
247                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
248                else:
249                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
250
251            else:
252                files = []
253            sub_ls = _find_files(rep_path, prefix, compact, files)
254            rep_idl = []
255            no_cfg = len(sub_ls)
256            for cfg in sub_ls:
257                try:
258                    if compact:
259                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
260                    else:
261                        rep_idl.append(int(cfg[3:]))
262                except Exception:
263                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
264            rep_idl.sort()
265            # maybe there is a better way to print the idls
266            if not silent:
267                print(item, ':', no_cfg, ' configurations')
268            idl.append(rep_idl)
269            # here we have found all the files we need to look into.
270            if i == 0:
271                if version != "0.0" and compact:
272                    file = path + '/' + item + '/' + sub_ls[0]
273                for name in name_list:
274                    if version == "0.0" or not compact:
275                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
276                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
277                        specs = _key2specs(key)
278                        quarks = specs[0]
279                        off = specs[1]
280                        w = specs[2]
281                        w2 = specs[3]
282                        # here, we want to find the place within the file,
283                        # where the correlator we need is stored.
284                        # to do so, the pattern needed is put together
285                        # from the input values
286                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
287                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
288                        intern[name]["T"] = T
289                        # preparing the datastructure
290                        # the correlators get parsed into...
291                        deltas = []
292                        for j in range(intern[name]["T"]):
293                            deltas.append([])
294                        internal_ret_dict[sep.join([name, key])] = deltas
295
296            if compact:
297                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
298                for key in needed_keys:
299                    name = _key2specs(key)[0]
300                    for t in range(intern[name]["T"]):
301                        internal_ret_dict[key][t].append(rep_deltas[key][t])
302            else:
303                for key in needed_keys:
304                    rep_data = []
305                    name = _key2specs(key)[0]
306                    for subitem in sub_ls:
307                        cfg_path = path + '/' + item + '/' + subitem
308                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
309                        rep_data.append(file_data)
310                    print(rep_data)
311                    for t in range(intern[name]["T"]):
312                        internal_ret_dict[key][t].append([])
313                        for cfg in range(no_cfg):
314                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
315    else:
316        for key in needed_keys:
317            specs = _key2specs(key)
318            name = specs[0]
319            quarks = specs[1]
320            off = specs[2]
321            w = specs[3]
322            w2 = specs[4]
323            if "files" in kwargs:
324                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
325                    name_ls = kwargs.get("files")
326                else:
327                    raise TypeError("In append mode, files has to be of type list[str]!")
328            else:
329                name_ls = ls
330                for exc in name_ls:
331                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
332                        name_ls = list(set(name_ls) - set([exc]))
333            name_ls = sort_names(name_ls)
334            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
335            deltas = []
336            for rep, file in enumerate(name_ls):
337                rep_idl = []
338                filename = path + '/' + file
339                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
340                if rep == 0:
341                    intern[name]['T'] = T
342                    for t in range(intern[name]['T']):
343                        deltas.append([])
344                for t in range(intern[name]['T']):
345                    deltas[t].append(rep_data[t])
346                internal_ret_dict[key] = deltas
347                if name == name_list[0]:
348                    idl.append(rep_idl)
349
350    if kwargs.get("check_configs") is True:
351        if not silent:
352            print("Checking for missing configs...")
353        che = kwargs.get("check_configs")
354        if not (len(che) == len(idl)):
355            raise Exception("check_configs has to be the same length as replica!")
356        for r in range(len(idl)):
357            if not silent:
358                print("checking " + new_names[r])
359            check_idl(idl[r], che[r])
360        if not silent:
361            print("Done")
362
363    result_dict = {}
364    if keyed_out:
365        for key in needed_keys:
366            result = []
367            for t in range(intern[name]["T"]):
368                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
369            result_dict[key] = result
370    else:
371        for name in name_list:
372            result_dict[name] = {}
373            for quarks in quarks_list:
374                result_dict[name][quarks] = {}
375                for off in noffset_list:
376                    result_dict[name][quarks][off] = {}
377                    for w in wf_list:
378                        result_dict[name][quarks][off][w] = {}
379                        for w2 in wf2_list:
380                            key = _specs2key(name, quarks, off, w, w2)
381                            result = []
382                            for t in range(intern[name]["T"]):
383                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
384                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
385    return result_dict
386
387
388def _lists2key(*lists):
389    keys = []
390    for tup in itertools.product(*lists):
391        keys.append(sep.join(tup))
392    return keys
393
394
395def _key2specs(key):
396    return key.split(sep)
397
398
399def _specs2key(*specs):
400    return sep.join(specs)
401
402
403def _read_o_file(cfg_path, name, needed_keys, intern, version, im):
404    return_vals = {}
405    for key in needed_keys:
406        file = cfg_path + '/' + name
407        specs = _key2specs(key)
408        if specs[0] == name:
409            with open(file) as fp:
410                lines = fp.readlines()
411                quarks = specs[1]
412                off = specs[2]
413                w = specs[3]
414                w2 = specs[4]
415                T = intern[name]["T"]
416                start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
417                deltas = []
418                for line in lines[start_read:start_read + T]:
419                    floats = list(map(float, line.split()))
420                    if version == "0.0":
421                        deltas.append(floats[im - intern[name]["single"]])
422                    else:
423                        deltas.append(floats[1 + im - intern[name]["single"]])
424                return_vals[key] = deltas
425    return return_vals
426
427
428def _extract_corr_type(corr_type):
429    if corr_type == 'bb':
430        b2b = True
431        single = True
432    elif corr_type == 'bib':
433        b2b = True
434        single = False
435    else:
436        b2b = False
437        single = False
438    return b2b, single
439
440
441def _find_files(rep_path, prefix, compact, files=[]):
442    sub_ls = []
443    if not files == []:
444        files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
445    else:
446        for (dirpath, dirnames, filenames) in os.walk(rep_path):
447            if compact:
448                sub_ls.extend(filenames)
449            else:
450                sub_ls.extend(dirnames)
451            break
452        if compact:
453            for exc in sub_ls:
454                if not fnmatch.fnmatch(exc, prefix + '*'):
455                    sub_ls = list(set(sub_ls) - set([exc]))
456            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
457        else:
458            for exc in sub_ls:
459                if not fnmatch.fnmatch(exc, 'cfg*'):
460                    sub_ls = list(set(sub_ls) - set([exc]))
461            sub_ls.sort(key=lambda x: int(x[3:]))
462        files = sub_ls
463    if len(files) == 0:
464        raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.")
465    return files
466
467
468def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks):
469    if version == "0.0":
470        pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
471        if b2b:
472            pattern += ", wf_2 " + str(wf2)
473        qs = quarks.split(" ")
474        pattern += " : " + qs[0] + " - " + qs[1]
475    else:
476        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
477        if b2b:
478            pattern += '\nwf_2      ' + str(wf2)
479    return pattern
480
481
482def _find_correlator(file_name, version, pattern, b2b, silent=False):
483    T = 0
484
485    with open(file_name, "r") as my_file:
486
487        content = my_file.read()
488        match = re.search(pattern, content)
489        if match:
490            if version == "0.0":
491                start_read = content.count('\n', 0, match.start()) + 1
492                T = content.count('\n', start_read)
493            else:
494                start_read = content.count('\n', 0, match.start()) + 5 + b2b
495                end_match = re.search(r'\n\s*\n', content[match.start():])
496                T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
497            if not T > 0:
498                raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!")
499            if not silent:
500                print(T, 'entries, starting to read in line', start_read)
501
502        else:
503            raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.')
504
505    return start_read, T
506
507
508def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im):
509    return_vals = {}
510    with open(rep_path + cfg_file) as fp:
511        lines = fp.readlines()
512        for key in needed_keys:
513            keys = _key2specs(key)
514            name = keys[0]
515            quarks = keys[1]
516            off = keys[2]
517            w = keys[3]
518            w2 = keys[4]
519
520            T = intern[name]["T"]
521            start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
522            # check, if the correlator is in fact
523            # printed completely
524            if (start_read + T + 1 > len(lines)):
525                raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?")
526            corr_lines = lines[start_read - 6: start_read + T]
527            t_vals = []
528
529            if corr_lines[1 - intern[name]["b2b"]].strip() != 'name      ' + name:
530                raise Exception('Wrong format in file', cfg_file)
531
532            for k in range(6, T + 6):
533                floats = list(map(float, corr_lines[k].split()))
534                t_vals.append(floats[-2:][im])
535            return_vals[key] = t_vals
536    return return_vals
537
538
539def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im):
540    rep_path = path + '/' + rep + '/'
541    no_cfg = len(sub_ls)
542
543    return_vals = {}
544    for key in needed_keys:
545        name = _key2specs(key)[0]
546        deltas = []
547        for t in range(intern[name]["T"]):
548            deltas.append(np.zeros(no_cfg))
549        return_vals[key] = deltas
550
551    for cfg in range(no_cfg):
552        cfg_file = sub_ls[cfg]
553        cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im)
554        for key in needed_keys:
555            name = _key2specs(key)[0]
556            for t in range(intern[name]["T"]):
557                return_vals[key][t][cfg] = cfg_data[key][t]
558    return return_vals
559
560
561def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):
562    try:
563        idl = int(chunk[gauge_line].split(cfg_sep)[-1])
564    except Exception:
565        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
566
567    found_pat = ""
568    data = []
569    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
570        found_pat += li
571    if re.search(pattern, found_pat):
572        for t, line in enumerate(chunk[start_read:start_read + T]):
573            floats = list(map(float, line.split()))
574            data.append(floats[im + 1 - single])
575    return idl, data
576
577
578def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single):
579    with open(filename, 'r') as fp:
580        content = fp.readlines()
581        data_starts = []
582        for linenumber, line in enumerate(content):
583            if "[run]" in line:
584                data_starts.append(linenumber)
585        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
586            raise Exception("Irregularities in file structure found, not all runs have the same output length")
587        chunk = content[:data_starts[1]]
588        for linenumber, line in enumerate(chunk):
589            if line.startswith("gauge_name"):
590                gauge_line = linenumber
591            elif line.startswith("[correlator]"):
592                corr_line = linenumber
593                found_pat = ""
594                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
595                    found_pat += li
596                if re.search(pattern, found_pat):
597                    start_read = corr_line + 7 + b2b
598                    break
599                else:
600                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
601        endline = corr_line + 6 + b2b
602        while not chunk[endline] == "\n":
603            endline += 1
604        T = endline - start_read
605
606        # all other chunks should follow the same structure
607        rep_idl = []
608        rep_data = []
609
610        for cnfg in range(len(data_starts)):
611            start = data_starts[cnfg]
612            stop = start + data_starts[1]
613            chunk = content[start:stop]
614            idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single)
615            rep_idl.append(idl)
616            rep_data.append(data)
617
618        data = []
619
620        for t in range(T):
621            data.append([])
622            for c in range(len(rep_data)):
623                data[t].append(rep_data[c][t])
624        return T, rep_idl, data
625
626
627def _get_rep_names(ls, ens_name=None):
628    new_names = []
629    for entry in ls:
630        try:
631            idx = entry.index('r')
632        except Exception:
633            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
634
635        if ens_name:
636            new_names.append('ens_name' + '|' + entry[idx:])
637        else:
638            new_names.append(entry[:idx] + '|' + entry[idx:])
639    return new_names
640
641
642def _get_appended_rep_names(ls, prefix, name, ens_name=None):
643    new_names = []
644    for exc in ls:
645        if not fnmatch.fnmatch(exc, prefix + '*.' + name):
646            ls = list(set(ls) - set([exc]))
647    ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
648    for entry in ls:
649        myentry = entry[:-len(name) - 1]
650        try:
651            idx = myentry.index('r')
652        except Exception:
653            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
654
655        if ens_name:
656            new_names.append('ens_name' + '|' + entry[idx:])
657        else:
658            new_names.append(myentry[:idx] + '|' + myentry[idx:])
659    return new_names
sep = '/'
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs):
14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
15    """Read sfcf files from given folder structure.
16
17    Parameters
18    ----------
19    path : str
20        Path to the sfcf files.
21    prefix : str
22        Prefix of the sfcf files.
23    name : str
24        Name of the correlation function to read.
25    quarks : str
26        Label of the quarks used in the sfcf input file. e.g. "quark quark"
27        for version 0.0 this does NOT need to be given with the typical " - "
28        that is present in the output file,
29        this is done automatically for this version
30    corr_type : str
31        Type of correlation function to read. Can be
32        - 'bi' for boundary-inner
33        - 'bb' for boundary-boundary
34        - 'bib' for boundary-inner-boundary
35    noffset : int
36        Offset of the source (only relevant when wavefunctions are used)
37    wf : int
38        ID of wave function
39    wf2 : int
40        ID of the second wavefunction
41        (only relevant for boundary-to-boundary correlation functions)
42    im : bool
43        if True, read imaginary instead of real part
44        of the correlation function.
45    names : list
46        Alternative labeling for replicas/ensembles.
47        Has to have the appropriate length
48    ens_name : str
49        replaces the name of the ensemble
50    version: str
51        version of SFCF, with which the measurement was done.
52        if the compact output option (-c) was specified,
53        append a "c" to the version (e.g. "1.0c")
54        if the append output option (-a) was specified,
55        append an "a" to the version
56    cfg_separator : str
57        String that separates the ensemble identifier from the configuration number (default 'n').
58    replica: list
59        list of replica to be read, default is all
60    files: list
61        list of files to be read per replica, default is all.
62        for non-compact output format, hand the folders to be read here.
63    check_configs: list[list[int]]
64        list of list of supposed configs, eg. [range(1,1000)]
65        for one replicum with 1000 configs
66
67    Returns
68    -------
69    result: list[Obs]
70        list of Observables with length T, observable per timeslice.
71        bb-type correlators have length 1.
72    """
73    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
74                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
75                          cfg_separator=cfg_separator, silent=silent, **kwargs)
76    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.
def read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs):
 79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 80    """Read sfcf files from given folder structure.
 81
 82    Parameters
 83    ----------
 84    path : str
 85        Path to the sfcf files.
 86    prefix : str
 87        Prefix of the sfcf files.
 88    name : str
 89        Name of the correlation function to read.
 90    quarks_list : list[str]
 91        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 92        for version 0.0 this does NOT need to be given with the typical " - "
 93        that is present in the output file,
 94        this is done automatically for this version
 95    corr_type_list : list[str]
 96        Type of correlation function to read. Can be
 97        - 'bi' for boundary-inner
 98        - 'bb' for boundary-boundary
 99        - 'bib' for boundary-inner-boundary
100    noffset_list : list[int]
101        Offset of the source (only relevant when wavefunctions are used)
102    wf_list : int
103        ID of wave function
104    wf2_list : list[int]
105        ID of the second wavefunction
106        (only relevant for boundary-to-boundary correlation functions)
107    im : bool
108        if True, read imaginary instead of real part
109        of the correlation function.
110    names : list
111        Alternative labeling for replicas/ensembles.
112        Has to have the appropriate length
113    ens_name : str
114        replaces the name of the ensemble
115    version: str
116        version of SFCF, with which the measurement was done.
117        if the compact output option (-c) was specified,
118        append a "c" to the version (e.g. "1.0c")
119        if the append output option (-a) was specified,
120        append an "a" to the version
121    cfg_separator : str
122        String that separates the ensemble identifier from the configuration number (default 'n').
123    replica: list
124        list of replica to be read, default is all
125    files: list[list[int]]
126        list of files to be read per replica, default is all.
127        for non-compact output format, hand the folders to be read here.
128    check_configs: list[list[int]]
129        list of list of supposed configs, eg. [range(1,1000)]
130        for one replicum with 1000 configs
131
132    Returns
133    -------
134    result: dict[list[Obs]]
135        dict with one of the following properties:
136        if keyed_out:
137            dict[key] = list[Obs]
138            where key has the form name/quarks/offset/wf/wf2
139        if not keyed_out:
140            dict[name][quarks][offset][wf][wf2] = list[Obs]
141    """
142
143    if kwargs.get('im'):
144        im = 1
145        part = 'imaginary'
146    else:
147        im = 0
148        part = 'real'
149
150    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
151
152    if version not in known_versions:
153        raise Exception("This version is not known!")
154    if (version[-1] == "c"):
155        appended = False
156        compact = True
157        version = version[:-1]
158    elif (version[-1] == "a"):
159        appended = True
160        compact = False
161        version = version[:-1]
162    else:
163        compact = False
164        appended = False
165    ls = []
166    if "replica" in kwargs:
167        ls = kwargs.get("replica")
168    else:
169        for (dirpath, dirnames, filenames) in os.walk(path):
170            if not appended:
171                ls.extend(dirnames)
172            else:
173                ls.extend(filenames)
174            break
175        if not ls:
176            raise Exception('Error, directory not found')
177        # Exclude folders with different names
178        for exc in ls:
179            if not fnmatch.fnmatch(exc, prefix + '*'):
180                ls = list(set(ls) - set([exc]))
181
182    if not appended:
183        ls = sort_names(ls)
184        replica = len(ls)
185
186    else:
187        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
188    if replica == 0:
189        raise Exception('No replica found in directory')
190    if not silent:
191        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
192
193    if 'names' in kwargs:
194        new_names = kwargs.get('names')
195        if len(new_names) != len(set(new_names)):
196            raise Exception("names are not unique!")
197        if len(new_names) != replica:
198            raise Exception('names should have the length', replica)
199
200    else:
201        ens_name = kwargs.get("ens_name")
202        if not appended:
203            new_names = _get_rep_names(ls, ens_name)
204        else:
205            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
206        new_names = sort_names(new_names)
207
208    idl = []
209
210    noffset_list = [str(x) for x in noffset_list]
211    wf_list = [str(x) for x in wf_list]
212    wf2_list = [str(x) for x in wf2_list]
213
214    # setup dict structures
215    intern = {}
216    for name, corr_type in zip(name_list, corr_type_list):
217        intern[name] = {}
218        b2b, single = _extract_corr_type(corr_type)
219        intern[name]["b2b"] = b2b
220        intern[name]["single"] = single
221        intern[name]["spec"] = {}
222        for quarks in quarks_list:
223            intern[name]["spec"][quarks] = {}
224            for off in noffset_list:
225                intern[name]["spec"][quarks][off] = {}
226                for w in wf_list:
227                    intern[name]["spec"][quarks][off][w] = {}
228                    for w2 in wf2_list:
229                        intern[name]["spec"][quarks][off][w][w2] = {}
230                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
231
232    internal_ret_dict = {}
233    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
234    for key in needed_keys:
235        internal_ret_dict[key] = []
236
237    if not appended:
238        for i, item in enumerate(ls):
239            rep_path = path + '/' + item
240            if "files" in kwargs:
241                files = kwargs.get("files")
242                if isinstance(files, list):
243                    if all(isinstance(f, list) for f in files):
244                        files = files[i]
245                    elif all(isinstance(f, str) for f in files):
246                        files = files
247                    else:
248                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
249                else:
250                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
251
252            else:
253                files = []
254            sub_ls = _find_files(rep_path, prefix, compact, files)
255            rep_idl = []
256            no_cfg = len(sub_ls)
257            for cfg in sub_ls:
258                try:
259                    if compact:
260                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
261                    else:
262                        rep_idl.append(int(cfg[3:]))
263                except Exception:
264                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
265            rep_idl.sort()
266            # maybe there is a better way to print the idls
267            if not silent:
268                print(item, ':', no_cfg, ' configurations')
269            idl.append(rep_idl)
270            # here we have found all the files we need to look into.
271            if i == 0:
272                if version != "0.0" and compact:
273                    file = path + '/' + item + '/' + sub_ls[0]
274                for name in name_list:
275                    if version == "0.0" or not compact:
276                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
277                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
278                        specs = _key2specs(key)
279                        quarks = specs[0]
280                        off = specs[1]
281                        w = specs[2]
282                        w2 = specs[3]
283                        # here, we want to find the place within the file,
284                        # where the correlator we need is stored.
285                        # to do so, the pattern needed is put together
286                        # from the input values
287                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
288                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
289                        intern[name]["T"] = T
290                        # preparing the datastructure
291                        # the correlators get parsed into...
292                        deltas = []
293                        for j in range(intern[name]["T"]):
294                            deltas.append([])
295                        internal_ret_dict[sep.join([name, key])] = deltas
296
297            if compact:
298                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
299                for key in needed_keys:
300                    name = _key2specs(key)[0]
301                    for t in range(intern[name]["T"]):
302                        internal_ret_dict[key][t].append(rep_deltas[key][t])
303            else:
304                for key in needed_keys:
305                    rep_data = []
306                    name = _key2specs(key)[0]
307                    for subitem in sub_ls:
308                        cfg_path = path + '/' + item + '/' + subitem
309                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
310                        rep_data.append(file_data)
311                    print(rep_data)
312                    for t in range(intern[name]["T"]):
313                        internal_ret_dict[key][t].append([])
314                        for cfg in range(no_cfg):
315                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
316    else:
317        for key in needed_keys:
318            specs = _key2specs(key)
319            name = specs[0]
320            quarks = specs[1]
321            off = specs[2]
322            w = specs[3]
323            w2 = specs[4]
324            if "files" in kwargs:
325                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
326                    name_ls = kwargs.get("files")
327                else:
328                    raise TypeError("In append mode, files has to be of type list[str]!")
329            else:
330                name_ls = ls
331                for exc in name_ls:
332                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
333                        name_ls = list(set(name_ls) - set([exc]))
334            name_ls = sort_names(name_ls)
335            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
336            deltas = []
337            for rep, file in enumerate(name_ls):
338                rep_idl = []
339                filename = path + '/' + file
340                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
341                if rep == 0:
342                    intern[name]['T'] = T
343                    for t in range(intern[name]['T']):
344                        deltas.append([])
345                for t in range(intern[name]['T']):
346                    deltas[t].append(rep_data[t])
347                internal_ret_dict[key] = deltas
348                if name == name_list[0]:
349                    idl.append(rep_idl)
350
351    if kwargs.get("check_configs") is True:
352        if not silent:
353            print("Checking for missing configs...")
354        che = kwargs.get("check_configs")
355        if not (len(che) == len(idl)):
356            raise Exception("check_configs has to be the same length as replica!")
357        for r in range(len(idl)):
358            if not silent:
359                print("checking " + new_names[r])
360            check_idl(idl[r], che[r])
361        if not silent:
362            print("Done")
363
364    result_dict = {}
365    if keyed_out:
366        for key in needed_keys:
367            result = []
368            for t in range(intern[name]["T"]):
369                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
370            result_dict[key] = result
371    else:
372        for name in name_list:
373            result_dict[name] = {}
374            for quarks in quarks_list:
375                result_dict[name][quarks] = {}
376                for off in noffset_list:
377                    result_dict[name][quarks][off] = {}
378                    for w in wf_list:
379                        result_dict[name][quarks][off][w] = {}
380                        for w2 in wf2_list:
381                            key = _specs2key(name, quarks, off, w, w2)
382                            result = []
383                            for t in range(intern[name]["T"]):
384                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
385                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
386    return result_dict

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type_list (list[str]): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
  • wf_list (int): ID of wave function
  • wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list[list[int]]): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]