diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 706e26a9..5915c56e 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -8,11 +8,11 @@ import numpy as np # Thinly-wrapped numpy from ..obs import Obs -def read_sfcf(path, prefix, name, **kwargs): - """Read sfcf C format from given folder structure. +def read_sfcf_old(path, prefix, name, quarks, noffset = 0, wf=0, wf2=0, **kwargs): + """Read sfcf format (from around 2012) from given folder structure. - Parameters - ---------- + Keyword arguments + ----------------- im -- if True, read imaginary instead of real part of the correlation function. single -- if True, read a boundary-to-boundary correlation function with a single value b2b -- if True, read a time-dependent boundary-to-boundary correlation function @@ -24,17 +24,13 @@ def read_sfcf(path, prefix, name, **kwargs): else: im = 0 part = 'real' - - if kwargs.get('single'): - b2b = 1 - single = 1 - else: - b2b = 0 - single = 0 + + b2b = 0 if kwargs.get('b2b'): b2b = 1 - + + quarks = quarks.split(" ") read = 0 T = 0 start = 0 @@ -43,7 +39,8 @@ def read_sfcf(path, prefix, name, **kwargs): ls.extend(dirnames) break if not ls: - raise Exception('Error, directory not found') + print('Error, directory not found') + #sys.exit() for exc in ls: if fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set(exc)) @@ -56,17 +53,12 @@ def read_sfcf(path, prefix, name, **kwargs): if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: - # Adjust replica names to new bookmarking system - new_names = [] - for entry in ls: - idx = entry.index('r') - new_names.append(entry[:idx] + '|' + entry[idx:]) - + new_names = ls print(replica, 'replica') for i, item in enumerate(ls): print(item) sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + for (dirpath, dirnames, filenames) in os.walk(path+'/'+item): sub_ls.extend(dirnames) break for exc in sub_ls: @@ -75,18 +67,25 @@ def read_sfcf(path, prefix, name, **kwargs): sub_ls.sort(key=lambda x: int(x[3:])) no_cfg = len(sub_ls) print(no_cfg, 'configurations') - if i == 0: with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): + #check if this is really the right file + pattern = "# "+name+" : offset "+str(noffset)+", wf "+"0" + #if b2b, a second wf is needed + if b2b: + pattern+=", wf_2 "+"0" + pattern+=" : "+quarks[0]+" - "+quarks[1] + if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: T += 1 - if '[correlator]' in line: + if pattern in line: + #print(line) read = 1 - start = k + 7 + b2b - T -= b2b + start = k+1 + print(str(T)+" entries found.") deltas = [] for j in range(T): @@ -97,11 +96,12 @@ def read_sfcf(path, prefix, name, **kwargs): deltas[j].append(np.zeros(sublength)) for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + with open(path + '/' + item + '/' + subitem + '/'+name) as fp: for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) - deltas[k - start][i][cnfg] = floats[1 + im - single] + deltas[k-start][i][cnfg] = floats[im] + result = [] for t in range(T): @@ -110,7 +110,7 @@ def read_sfcf(path, prefix, name, **kwargs): return result -def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): +def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): """Read sfcf c format from given folder structure. Parameters @@ -121,11 +121,11 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im -- if True, read imaginary instead of real part of the correlation function. b2b -- if True, read a time-dependent boundary-to-boundary correlation function + single -- if True, read time independent boundary to boundary correlation function names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble """ - if kwargs.get('im'): im = 1 part = 'imaginary' @@ -133,12 +133,38 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg im = 0 part = 'real' - if kwargs.get('b2b'): + if kwargs.get('single'): b2b = 1 + single = 1 else: - b2b = 0 + if kwargs.get('b2b'): + b2b = 1 + else: + b2b = 0 + single = 0 + files = [] + if "files" in kwargs: + files = kwargs.get("files") + + #due to higher usage in current projects, compact file format is default + compact = True + #get version string + version = "1.0" + known_versions = ["0.0","1.0","2.0","1.0c","2.0c"] + if "version" in kwargs: + version = kwargs.get("version") + if not version in known_versions: + raise Exception("This version is not known!") + #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) + if(version[-1] == "c"): + compact = True + version = version[:-1] + else: + compact = False + read = 0 T = 0 + start = 0 ls = [] for (dirpath, dirnames, filenames) in os.walk(path): ls.extend(dirnames) @@ -146,12 +172,17 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg if not ls: raise Exception('Error, directory not found') # Exclude folders with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set([exc])) + if len(files) != 0: + ls = files + else: + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*'): + ls = list(set(ls) - set([exc])) if len(ls) > 1: ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. replica = len(ls) + print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') + if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names) != replica: @@ -160,52 +191,99 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg # Adjust replica names to new bookmarking system new_names = [] for entry in ls: - idx = entry.index('r') + try: + idx = entry.index('r') + except: + idx = len(entry)-2 if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) - - print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') for i, item in enumerate(ls): sub_ls = [] for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - sub_ls.extend(filenames) + if compact: + sub_ls.extend(filenames) + else: + sub_ls.extend(dirnames) break - for exc in sub_ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + + #print(sub_ls) + for exc in sub_ls: + if compact: + if not fnmatch.fnmatch(exc, prefix + '*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + else: + if not fnmatch.fnmatch(exc, 'cfg*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(x[3:])) + + if compact: + first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) - first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) + last_cfg = len(sub_ls) + first_cfg - 1 - last_cfg = len(sub_ls) + first_cfg - 1 + for cfg in range(1, len(sub_ls)): + if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: + last_cfg = cfg + first_cfg - 1 + break - for cfg in range(1, len(sub_ls)): - if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: - last_cfg = cfg + first_cfg - 1 - break - - no_cfg = last_cfg - first_cfg + 1 - print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') + no_cfg = last_cfg - first_cfg + 1 + print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') + else: + no_cfg = len(sub_ls) + print(no_cfg, 'configurations') + #here we have found all the files we need to look into. if i == 0: - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) - if b2b: - pattern += '\nwf_2 ' + str(wf2) + if compact: + + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) - with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: - content = file.read() - match = re.search(pattern, content) - if match: - start_read = content.count('\n', 0, match.start()) + 5 + b2b - end_match = re.search(r'\n\s*\n', content[match.start():]) - T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b - assert T > 0 - print(T, 'entries, starting to read in line', start_read) - else: - raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: + content = file.read() + match = re.search(pattern, content) + if match: + start_read = content.count('\n', 0, match.start()) + 5 + b2b + end_match = re.search(r'\n\s*\n', content[match.start():]) + T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b + assert T > 0 + print(T, 'entries, starting to read in line', start_read) + else: + raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + else: + #print(path + '/' + item + '/')# + sub_ls[0] + '/' + name) + with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: + for k, line in enumerate(fp): + if version == "0.0": + #check if this is really the right file + pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) + #if b2b, a second wf is needed + if b2b: + pattern+=", wf_2 "+str(wf2) + qs = quarks.split(" ") + pattern+=" : "+qs[0]+" - "+qs[1] + #print(pattern) + if read == 1 and not line.strip() and k > start + 1: + break + if read == 1 and k >= start: + T += 1 + if version == "0.0": + if pattern in line: + #print(line) + read = 1 + start = k+1 + else: + if '[correlator]' in line: + read = 1 + start = k + 7 + b2b + T -= b2b + print(str(T)+" entries found.") + #we found where the correlator that is to be read is in the files deltas = [] for j in range(T): deltas.append([]) @@ -213,16 +291,30 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg sublength = no_cfg for j in range(T): deltas[j].append(np.zeros(sublength)) + if compact: + for cfg in range(no_cfg): + with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: + lines = fp.readlines() + if(start_read + T>len(lines)): + raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") + for k in range(start_read - 6,start_read + T): + if k == start_read - 5 - b2b: + if lines[k].strip() != 'name ' + name: + raise Exception('Wrong format', sub_ls[cfg]) + if(k >= start_read and k < start_read + T): + floats = list(map(float, lines[k].split())) + deltas[k - start_read][i][cfg] = floats[-2:][im] + else: + for cnfg, subitem in enumerate(sub_ls): + with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + for k, line in enumerate(fp): + if(k >= start and k < start + T): + floats = list(map(float, line.split())) + if version == "0.0": + deltas[k-start][i][cnfg] = floats[im] + else: + deltas[k - start][i][cnfg] = floats[1 + im - single] - for cfg in range(no_cfg): - with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: - for k, line in enumerate(fp): - if k == start_read - 5 - b2b: - if line.strip() != 'name ' + name: - raise Exception('Wrong format', sub_ls[cfg]) - if(k >= start_read and k < start_read + T): - floats = list(map(float, line.split())) - deltas[k - start_read][i][cfg] = floats[-2:][im] result = [] for t in range(T):