Initial public release

This commit is contained in:
fjosw 2020-10-13 16:53:00 +02:00
commit d9b2077d2c
24 changed files with 6794 additions and 0 deletions

View file

@ -0,0 +1,2 @@
from .input import *
from . import bdio

628
pyerrors/input/bdio.py Normal file
View file

@ -0,0 +1,628 @@
#!/usr/bin/env python
# coding: utf-8
import ctypes
import hashlib
import autograd.numpy as np # Thinly-wrapped numpy
from ..pyerrors import Obs
def read_ADerrors(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract generic MCMC data from a bdio file
read_ADerrors requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_int32 = bdio.bdio_read_int32
bdio_read_int32.restype = ctypes.c_size_t
bdio_read_int32.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), None)
return_list = []
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo == 7:
print('MD5sum found') # For now we just ignore these entries and do not perform any checks on them
continue
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
def read_c_double():
d_buf = ctypes.c_double
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(8), ctypes.c_void_p(fbdio))
return pd_buf.value
mean = read_c_double()
print('mean', mean)
def read_c_size_t():
d_buf = ctypes.c_size_t
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_int32(ppd_buf, ctypes.c_size_t(4), ctypes.c_void_p(fbdio))
return pd_buf.value
neid = read_c_size_t()
print('neid', neid)
ndata = []
for index in range(neid):
ndata.append(read_c_size_t())
print('ndata', ndata)
nrep = []
for index in range(neid):
nrep.append(read_c_size_t())
print('nrep', nrep)
vrep = []
for index in range(neid):
vrep.append([])
for jndex in range(nrep[index]):
vrep[-1].append(read_c_size_t())
print('vrep', vrep)
ids = []
for index in range(neid):
ids.append(read_c_size_t())
print('ids', ids)
nt = []
for index in range(neid):
nt.append(read_c_size_t())
print('nt', nt)
zero = []
for index in range(neid):
zero.append(read_c_double())
print('zero', zero)
four = []
for index in range(neid):
four.append(read_c_double())
print('four', four)
d_buf = ctypes.c_double * np.sum(ndata)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(8 * np.sum(ndata)), ctypes.c_void_p(fbdio))
delta = pd_buf[:]
samples = np.split(np.asarray(delta) + mean, np.cumsum([a for su in vrep for a in su])[:-1])
no_reps = [len(o) for o in vrep]
assert len(ids) == len(no_reps)
tmp_names = []
ens_length = max([len(str(o)) for o in ids])
for loc_id, reps in zip(ids, no_reps):
for index in range(reps):
missing_chars = ens_length - len(str(loc_id))
tmp_names.append(str(loc_id) + ' ' * missing_chars + 'r' + '{0:03d}'.format(index))
return_list.append(Obs(samples, tmp_names))
bdio_close(fbdio)
print()
print(len(return_list), 'observable(s) extracted.')
return return_list
def write_ADerrors(obs_list, file_path, bdio_path='./libbdio.so', **kwargs):
""" Write Obs to a bdio file according to ADerrors conventions
read_mesons requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
"""
for obs in obs_list:
if not obs.e_names:
raise Exception('Run the gamma method first for all obs.')
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_start_record = bdio.bdio_start_record
bdio_start_record.restype = ctypes.c_int
bdio_start_record.argtypes = [ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p]
bdio_flush_record = bdio.bdio_flush_record
bdio_flush_record.restype = ctypes.c_int
bdio_flush_record.argytpes = [ctypes.c_void_p]
bdio_write_f64 = bdio.bdio_write_f64
bdio_write_f64.restype = ctypes.c_size_t
bdio_write_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_write_int32 = bdio.bdio_write_int32
bdio_write_int32.restype = ctypes.c_size_t
bdio_write_int32.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
write = 'w'
b_write = write.encode('utf-8')
form = 'pyerrors ADerror export'
b_form = form.encode('utf-8')
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_write), b_form)
for obs in obs_list:
mean = obs.value
neid = len(obs.e_names)
vrep = [[obs.shape[o] for o in sl] for sl in list(obs.e_content.values())]
vrep_write = [item for sublist in vrep for item in sublist]
ndata = [np.sum(o) for o in vrep]
nrep = [len(o) for o in vrep]
print('ndata', ndata)
print('nrep', nrep)
print('vrep', vrep)
keys = list(obs.e_content.keys())
ids = []
for key in keys:
try: # Try to convert key to integer
ids.append(int(key))
except: # If not possible construct a hash
ids.append(int(hashlib.sha256(key.encode('utf-8')).hexdigest(), 16) % 10 ** 8)
print('ids', ids)
nt = []
for e, e_name in enumerate(obs.e_names):
r_length = []
for r_name in obs.e_content[e_name]:
r_length.append(len(obs.deltas[r_name]))
#e_N = np.sum(r_length)
nt.append(max(r_length) // 2)
print('nt', nt)
zero = neid * [0.0]
four = neid * [4.0]
print('zero', zero)
print('four', four)
delta = np.concatenate([item for sublist in [[obs.deltas[o] for o in sl] for sl in list(obs.e_content.values())] for item in sublist])
bdio_start_record(0x00, 8, fbdio)
def write_c_double(double):
pd_buf = ctypes.c_double(double)
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iwrite = bdio_write_f64(ppd_buf, ctypes.c_size_t(8), ctypes.c_void_p(fbdio))
def write_c_size_t(int32):
pd_buf = ctypes.c_size_t(int32)
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iwrite = bdio_write_int32(ppd_buf, ctypes.c_size_t(4), ctypes.c_void_p(fbdio))
write_c_double(obs.value)
write_c_size_t(neid)
for element in ndata:
write_c_size_t(element)
for element in nrep:
write_c_size_t(element)
for element in vrep_write:
write_c_size_t(element)
for element in ids:
write_c_size_t(element)
for element in nt:
write_c_size_t(element)
for element in zero:
write_c_double(element)
for element in four:
write_c_double(element)
for element in delta:
write_c_double(element)
bdio_close(fbdio)
return 0
def _get_kwd(string, key):
return (string.split(key, 1)[1]).split(" ", 1)[0]
def _get_corr_name(string, key):
return (string.split(key, 1)[1]).split(' NDIM=', 1)[0]
def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract mesons data from a bdio file and return it as a dictionary
The dictionary can be accessed with a tuple consisting of (type, source_position, kappa1, kappa2)
read_mesons requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
alternative_ensemble_name -- Manually overwrite ensemble name
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
form = 'Generic Correlator Format 1.0'
b_form = form.encode('utf-8')
ensemble_name = ''
volume = [] # lattice volume
boundary_conditions = []
corr_name = [] # Contains correlator names
corr_type = [] # Contains correlator data type (important for reading out numerical data)
corr_props = [] # Contanis propagator types (Component of corr_kappa)
d0 = 0 # tvals
d1 = 0 # nnoise
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
prop_source = [] # Contains propagator source positions
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
if ruinfo == 5:
d_buf = ctypes.c_double * (2 + d0 * d1 * 2)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if corr_type[corr_no] == 'complex':
tmp_mean = np.mean(np.asarray(np.split(np.asarray(pd_buf[2 + 2 * d1:-2 * d1:2]), d0 - 2)), axis=1)
else:
tmp_mean = np.mean(np.asarray(np.split(np.asarray(pd_buf[2 + d1:-d0 * d1 - d1]), d0 - 2)), axis=1)
data[corr_no].append(tmp_mean)
corr_no += 1
else:
alt_buf = ctypes.create_string_buffer(1024)
palt_buf = ctypes.c_char_p(ctypes.addressof(alt_buf))
iread = bdio_read(palt_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if rlen != iread:
print('Error')
for i, item in enumerate(alt_buf):
if item == b'\x00':
alt_buf[i] = b' '
tmp_string = (alt_buf[:].decode("utf-8")).rstrip()
if ruinfo == 0:
ensemble_name = _get_kwd(tmp_string, 'ENSEMBLE=')
volume.append(int(_get_kwd(tmp_string, 'L0=')))
volume.append(int(_get_kwd(tmp_string, 'L1=')))
volume.append(int(_get_kwd(tmp_string, 'L2=')))
volume.append(int(_get_kwd(tmp_string, 'L3=')))
boundary_conditions.append(_get_kwd(tmp_string, 'BC0='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC1='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC2='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC3='))
if ruinfo == 1:
corr_name.append(_get_corr_name(tmp_string, 'CORR_NAME='))
corr_type.append(_get_kwd(tmp_string, 'DATATYPE='))
corr_props.append([_get_kwd(tmp_string, 'PROP0='), _get_kwd(tmp_string, 'PROP1=')])
if d0 == 0:
d0 = int(_get_kwd(tmp_string, 'D0='))
else:
if d0 != int(_get_kwd(tmp_string, 'D0=')):
print('Error: Varying number of time values')
if d1 == 0:
d1 = int(_get_kwd(tmp_string, 'D1='))
else:
if d1 != int(_get_kwd(tmp_string, 'D1=')):
print('Error: Varying number of random sources')
if ruinfo == 2:
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
prop_source.append(_get_kwd(tmp_string, 'x0='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
data.append([])
corr_no = 0
bdio_close(fbdio)
print('\nEnsemble: ', ensemble_name)
if 'alternative_ensemble_name' in kwargs:
ensemble_name = kwargs.get('alternative_ensemble_name')
print('Ensemble name overwritten to', ensemble_name)
print('Lattice volume: ', volume)
print('Boundary conditions: ', boundary_conditions)
print('Number of time values: ', d0)
print('Number of random sources: ', d1)
print('Number of corrs: ', len(corr_name))
print('Number of configurations: ', cnfg_no + 1)
corr_kappa = [] # Contains kappa values for both propagators of given correlation function
corr_source = []
for item in corr_props:
corr_kappa.append([float(prop_kappa[int(item[0])]), float(prop_kappa[int(item[1])])])
if prop_source[int(item[0])] != prop_source[int(item[1])]:
raise Exception('Source position do not match for correlator' + str(item))
else:
corr_source.append(int(prop_source[int(item[0])]))
result = {}
for c in range(no_corrs):
tmp_corr = []
for t in range(d0 - 2):
tmp_corr.append(Obs([np.asarray(data[c])[:, t]], [ensemble_name]))
result[(corr_name[c], corr_source[c]) + tuple(sorted(corr_kappa[c]))] = tmp_corr
# Check that all data entries have the same number of configurations
if len(set([o[0].N for o in list(result.values())])) != 1:
raise Exception('Error: Not all correlators have the same number of configurations. bdio file is possibly corrupted.')
return result
def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract dSdm data from a bdio file and return it as a dictionary
The dictionary can be accessed with a tuple consisting of (type, kappa)
read_dSdm requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
form = 'Generic Correlator Format 1.0'
b_form = form.encode('utf-8')
ensemble_name = ''
volume = [] # lattice volume
boundary_conditions = []
corr_name = [] # Contains correlator names
corr_type = [] # Contains correlator data type (important for reading out numerical data)
corr_props = [] # Contains propagator types (Component of corr_kappa)
d0 = 0 # tvals
d1 = 0 # nnoise
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
if ruinfo == 5:
d_buf = ctypes.c_double * (2 + d0)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
tmp_mean = np.mean(np.asarray(pd_buf[2:]))
data[corr_no].append(tmp_mean)
corr_no += 1
else:
alt_buf = ctypes.create_string_buffer(1024)
palt_buf = ctypes.c_char_p(ctypes.addressof(alt_buf))
iread = bdio_read(palt_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if rlen != iread:
print('Error')
for i, item in enumerate(alt_buf):
if item == b'\x00':
alt_buf[i] = b' '
tmp_string = (alt_buf[:].decode("utf-8")).rstrip()
if ruinfo == 0:
creator = _get_kwd(tmp_string, 'CREATOR=')
ensemble_name = _get_kwd(tmp_string, 'ENSEMBLE=')
volume.append(int(_get_kwd(tmp_string, 'L0=')))
volume.append(int(_get_kwd(tmp_string, 'L1=')))
volume.append(int(_get_kwd(tmp_string, 'L2=')))
volume.append(int(_get_kwd(tmp_string, 'L3=')))
boundary_conditions.append(_get_kwd(tmp_string, 'BC0='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC1='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC2='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC3='))
if ruinfo == 1:
corr_name.append(_get_corr_name(tmp_string, 'CORR_NAME='))
corr_type.append(_get_kwd(tmp_string, 'DATATYPE='))
corr_props.append(_get_kwd(tmp_string, 'PROP0='))
if d0 == 0:
d0 = int(_get_kwd(tmp_string, 'D0='))
else:
if d0 != int(_get_kwd(tmp_string, 'D0=')):
print('Error: Varying number of time values')
if ruinfo == 2:
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
data.append([])
corr_no = 0
bdio_close(fbdio)
print('\nCreator: ', creator)
print('Ensemble: ', ensemble_name)
print('Lattice volume: ', volume)
print('Boundary conditions: ', boundary_conditions)
print('Number of random sources: ', d0)
print('Number of corrs: ', len(corr_name))
print('Number of configurations: ', cnfg_no + 1)
corr_kappa = [] # Contains kappa values for both propagators of given correlation function
corr_source = []
for item in corr_props:
corr_kappa.append(float(prop_kappa[int(item)]))
result = {}
for c in range(no_corrs):
result[(corr_name[c], str(corr_kappa[c]))] = Obs([np.asarray(data[c])], [ensemble_name])
# Check that all data entries have the same number of configurations
if len(set([o.N for o in list(result.values())])) != 1:
raise Exception('Error: Not all correlators have the same number of configurations. bdio file is possibly corrupted.')
return result

660
pyerrors/input/input.py Normal file
View file

@ -0,0 +1,660 @@
#!/usr/bin/env python
# coding: utf-8
import sys
import os
import fnmatch
import re
import struct
import autograd.numpy as np # Thinly-wrapped numpy
from ..pyerrors import Obs
from ..fits import fit_lin
def read_sfcf(path, prefix, name, **kwargs):
"""Read sfcf C format from given folder structure.
Keyword arguments
-----------------
im -- if True, read imaginary instead of real part of the correlation function.
single -- if True, read a boundary-to-boundary correlation function with a single value
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
"""
if kwargs.get('im'):
im = 1
part = 'imaginary'
else:
im = 0
part = 'real'
if kwargs.get('single'):
b2b = 1
single = 1
else:
b2b = 0
single = 0
if kwargs.get('b2b'):
b2b = 1
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
print('Error, directory not found')
sys.exit()
for exc in ls:
if fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set(exc))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
new_names = ls
print(replica, 'replica')
for i, item in enumerate(ls):
print(item)
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
sub_ls.extend(dirnames)
break
for exc in sub_ls:
if fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set(exc))
sub_ls.sort(key=lambda x: int(x[3:]))
no_cfg = len(sub_ls)
print(no_cfg, 'configurations')
if i == 0:
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
deltas = []
for j in range(T):
deltas.append([])
sublength = len(sub_ls)
for j in range(T):
deltas[j].append(np.zeros(sublength))
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/'+name) as fp:
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
deltas[k-start][i][cnfg] = floats[1 + im - single]
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_sfcf_c(path, prefix, name, **kwargs):
"""Read sfcf c format from given folder structure.
Keyword arguments
-----------------
im -- if True, read imaginary instead of real part of the correlation function.
single -- if True, read a boundary-to-boundary correlation function with a single value
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
"""
if kwargs.get('im'):
im = 1
part = 'imaginary'
else:
im = 0
part = 'real'
if kwargs.get('single'):
b2b = 1
single = 1
else:
b2b = 0
single = 0
if kwargs.get('b2b'):
b2b = 1
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude folders with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
new_names = ls
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
for i, item in enumerate(ls):
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
sub_ls.extend(filenames)
break
for exc in sub_ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
last_cfg = len(sub_ls) + first_cfg - 1
for cfg in range(1, len(sub_ls)):
if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
last_cfg = cfg + first_cfg - 1
break
no_cfg = last_cfg - first_cfg + 1
print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
if i == 0:
read = 0
found = 0
with open(path+'/'+item+'/'+sub_ls[0]) as fp:
for k, line in enumerate(fp):
if 'quarks' in kwargs:
if found == 0 and read == 1:
if line.strip() == 'quarks ' + kwargs.get('quarks'):
found = 1
print('found', kwargs.get('quarks'))
else:
read = 0
if read == 1 and not line.strip():
break
if read == 1 and k >= start_read:
T += 1
if line.strip() == 'name '+name:
read = 1
start_read = k + 5 + b2b
print('T =', T, ', starting to read in line', start_read)
#TODO what to do if start_read was not found
if 'quarks' in kwargs:
if found == 0:
raise Exception(kwargs.get('quarks') + ' not found')
deltas = []
for j in range(T):
deltas.append([])
sublength = no_cfg
for j in range(T):
deltas[j].append(np.zeros(sublength))
for cfg in range(no_cfg):
with open(path+'/'+item+'/'+sub_ls[cfg]) as fp:
for k, line in enumerate(fp):
if k == start_read - 5 - b2b:
if line.strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, line.split()))
deltas[k-start_read][i][cfg] = floats[1 + im - single]
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_qtop(path, prefix, **kwargs):
"""Read qtop format from given folder structure.
Keyword arguments
-----------------
target -- specifies the topological sector to be reweighted to (default 0)
full -- if true read the charge instead of the reweighting factor.
"""
if 'target' in kwargs:
target = kwargs.get('target')
else:
target = 0
if kwargs.get('full'):
full = 1
else:
full = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
deltas = []
for rep in range(replica):
tmp = []
with open(path+'/'+ls[rep]) as fp:
for k, line in enumerate(fp):
floats = list(map(float, line.split()))
if full == 1:
tmp.append(floats[1])
else:
if int(floats[1]) == target:
tmp.append(1.0)
else:
tmp.append(0.0)
deltas.append(np.array(tmp))
result = Obs(deltas, [(w.split('.'))[0] for w in ls])
return result
def read_rwms(path, prefix, **kwargs):
"""Read rwms format from given folder structure. Returns a list of length nrw
Keyword arguments
-----------------
new_format -- if True, the array of the associated numbers of Hasenbusch factors is extracted (v>=openQCD1.6)
r_start -- list which contains the first config to be read for each replicum
r_stop -- list which contains the last config to be read for each replicum
"""
if kwargs.get('new_format'):
extract_nfct = 1
else:
extract_nfct = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
print_err = 0
if 'print_err' in kwargs:
print_err = 1
print()
deltas = []
for rep in range(replica):
tmp_array = []
with open(path+ '/' + ls[rep], 'rb') as fp:
#header
t = fp.read(4) # number of reweighting factors
if rep == 0:
nrw = struct.unpack('i', t)[0]
for k in range(nrw):
deltas.append([])
else:
if nrw != struct.unpack('i', t)[0]:
print('Error: different number of reweighting factors for replicum', rep)
sys.exit()
for k in range(nrw):
tmp_array.append([])
# This block is necessary for openQCD1.6 ms1 files
nfct = []
if extract_nfct == 1:
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
nsrc = []
for i in range(nrw):
t = fp.read(4)
nsrc.append(struct.unpack('i', t)[0])
#body
while 0 < 1:
t = fp.read(4)
if len(t) < 4:
break
if print_err:
config_no = struct.unpack('i', t)
for i in range(nrw):
tmp_nfct = 1.0
for j in range(nfct[i]):
t = fp.read(8 * nsrc[i])
t = fp.read(8 * nsrc[i])
tmp_rw = struct.unpack('d' * nsrc[i], t)
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
if print_err:
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
print('Sources:', np.exp(-np.asarray(tmp_rw)))
print('Partial factor:', tmp_nfct)
tmp_array[i].append(tmp_nfct)
for k in range(nrw):
deltas[k].append(tmp_array[k][r_start[rep]:r_stop[rep]])
print(',', nrw, 'reweighting factors with', nsrc, 'sources')
result = []
for t in range(nrw):
result.append(Obs(deltas[t], [(w.split('.'))[0] for w in ls]))
return result
def read_pbp(path, prefix, **kwargs):
"""Read pbp format from given folder structure. Returns a list of length nrw
Keyword arguments
-----------------
r_start -- list which contains the first config to be read for each replicum
r_stop -- list which contains the last config to be read for each replicum
"""
extract_nfct = 1
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Read <bar{psi}\psi> from', prefix[:-1], ',', replica, 'replica', end='')
print_err = 0
if 'print_err' in kwargs:
print_err = 1
print()
deltas = []
for rep in range(replica):
tmp_array = []
with open(path+ '/' + ls[rep], 'rb') as fp:
#header
t = fp.read(4) # number of reweighting factors
if rep == 0:
nrw = struct.unpack('i', t)[0]
for k in range(nrw):
deltas.append([])
else:
if nrw != struct.unpack('i', t)[0]:
print('Error: different number of reweighting factors for replicum', rep)
sys.exit()
for k in range(nrw):
tmp_array.append([])
# This block is necessary for openQCD1.6 ms1 files
nfct = []
if extract_nfct == 1:
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
nsrc = []
for i in range(nrw):
t = fp.read(4)
nsrc.append(struct.unpack('i', t)[0])
#body
while 0 < 1:
t = fp.read(4)
if len(t) < 4:
break
if print_err:
config_no = struct.unpack('i', t)
for i in range(nrw):
tmp_nfct = 1.0
for j in range(nfct[i]):
t = fp.read(8 * nsrc[i])
t = fp.read(8 * nsrc[i])
tmp_rw = struct.unpack('d' * nsrc[i], t)
tmp_nfct *= np.mean(np.asarray(tmp_rw))
if print_err:
print(config_no, i, j, np.mean(np.asarray(tmp_rw)), np.std(np.asarray(tmp_rw)))
print('Sources:', np.asarray(tmp_rw))
print('Partial factor:', tmp_nfct)
tmp_array[i].append(tmp_nfct)
for k in range(nrw):
deltas[k].append(tmp_array[k][r_start[rep]:r_stop[rep]])
print(',', nrw, '<bar{psi}\psi> with', nsrc, 'sources')
result = []
for t in range(nrw):
result.append(Obs(deltas[t], [(w.split('.'))[0] for w in ls]))
return result
def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
"""Extract t0 from given .ms.dat files. Returns t0 as Obs.
It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
from which the exact root is extracted.
Only works with openQCD v 1.2.
Parameters
----------
path -- Path to .ms.dat files
prefix -- Ensemble prefix
dtr_read -- Determines how many trajectories should be skipped when reading the ms.dat files.
Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
xmin -- First timeslice where the boundary effects have sufficiently decayed.
spatial_extent -- spatial extent of the lattice, required for normalization.
fit_range -- Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
Keyword arguments
-----------------
r_start -- list which contains the first config to be read for each replicum.
r_stop -- list which contains the last config to be read for each replicum.
plaquette -- If true extract the plaquette estimate of t0 instead.
"""
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Extract t0 from', prefix, ',', replica, 'replica')
Ysum = []
for rep in range(replica):
with open(path + '/' + ls[rep], 'rb') as fp:
# Read header
t = fp.read(12)
header = struct.unpack('iii', t)
if rep == 0:
dn = header[0]
nn = header[1]
tmax = header[2]
elif dn != header[0] or nn != header[1] or tmax != header[2]:
raise Exception('Replica parameters do not match.')
t = fp.read(8)
if rep == 0:
eps = struct.unpack('d', t)[0]
print('Step size:', eps, ', Maximal t value:', dn * (nn) * eps)
elif eps != struct.unpack('d', t)[0]:
raise Exception('Values for eps do not match among replica.')
Ysl = []
# Read body
while 0 < 1:
t = fp.read(4)
if(len(t) < 4):
break
nc = struct.unpack('i', t)[0]
t = fp.read(8 * tmax * (nn + 1))
if kwargs.get('plaquette'):
if nc % dtr_read == 0:
Ysl.append(struct.unpack('d' * tmax * (nn + 1), t))
t = fp.read(8 * tmax * (nn + 1))
if not kwargs.get('plaquette'):
if nc % dtr_read == 0:
Ysl.append(struct.unpack('d' * tmax * (nn + 1), t))
t = fp.read(8 * tmax * (nn + 1))
Ysum.append([])
for i, item in enumerate(Ysl):
Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
t2E_dict = {}
for n in range(nn + 1):
samples = []
for nrep, rep in enumerate(Ysum):
samples.append([])
for cnfg in rep:
samples[-1].append(cnfg[n])
samples[-1] = samples[-1][r_start[nrep]:r_stop[nrep]]
new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3
zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
[o.gamma_method() for o in y]
fit_result = fit_lin(x, y)
return -fit_result[0] / fit_result[1]