pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15# Improve print output of numpy.ndarrays containing Obs objects. 16np.set_printoptions(formatter={'object': lambda x: str(x)}) 17 18 19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 filter_eps = 1e-10 61 62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise Exception('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise Exception('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise Exception('names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise Exception('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.is_merged = {} 102 self.idl = {} 103 if idl is not None: 104 for name, idx in sorted(zip(names, idl)): 105 if isinstance(idx, range): 106 self.idl[name] = idx 107 elif isinstance(idx, (list, np.ndarray)): 108 dc = np.unique(np.diff(idx)) 109 if np.any(dc < 0): 110 raise Exception("Unsorted idx for idl[%s]" % (name)) 111 if len(dc) == 1: 112 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 113 else: 114 self.idl[name] = list(idx) 115 else: 116 raise Exception('incompatible type for idl[%s].' % (name)) 117 else: 118 for name, sample in sorted(zip(names, samples)): 119 self.idl[name] = range(1, len(sample) + 1) 120 121 if kwargs.get("means") is not None: 122 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 self.r_values[name] = mean 126 self.deltas[name] = sample 127 else: 128 for name, sample in sorted(zip(names, samples)): 129 self.shape[name] = len(self.idl[name]) 130 self.N += self.shape[name] 131 if len(sample) != self.shape[name]: 132 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 133 self.r_values[name] = np.mean(sample) 134 self.deltas[name] = sample - self.r_values[name] 135 self._value += self.shape[name] * self.r_values[name] 136 self._value /= self.N 137 138 self._dvalue = 0.0 139 self.ddvalue = 0.0 140 self.reweighted = False 141 142 self.tag = None 143 144 @property 145 def value(self): 146 return self._value 147 148 @property 149 def dvalue(self): 150 return self._dvalue 151 152 @property 153 def e_names(self): 154 return sorted(set([o.split('|')[0] for o in self.names])) 155 156 @property 157 def cov_names(self): 158 return sorted(set([o for o in self.covobs.keys()])) 159 160 @property 161 def mc_names(self): 162 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 163 164 @property 165 def e_content(self): 166 res = {} 167 for e, e_name in enumerate(self.e_names): 168 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 169 if e_name in self.names: 170 res[e_name].append(e_name) 171 return res 172 173 @property 174 def covobs(self): 175 return self._covobs 176 177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name])) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 gaps = [] 274 for r_name in e_content[e_name]: 275 if isinstance(self.idl[r_name], range): 276 gaps.append(1) 277 else: 278 gaps.append(np.min(np.diff(self.idl[r_name]))) 279 280 if not np.all([gi == gaps[0] for gi in gaps]): 281 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 282 else: 283 gapsize = gaps[0] 284 285 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 286 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 287 # Make sure no entry of tauint is smaller than 0.5 288 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 289 # hep-lat/0306017 eq. (42) 290 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 291 self.e_n_dtauint[e_name][0] = 0.0 292 293 def _compute_drho(i): 294 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 295 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 296 297 _compute_drho(gapsize) 298 if self.tau_exp[e_name] > 0: 299 texp = self.tau_exp[e_name] 300 # Critical slowing down analysis 301 if w_max // 2 <= 1: 302 raise Exception("Need at least 8 samples for tau_exp error analysis") 303 for n in range(gapsize, w_max // 2, gapsize): 304 _compute_drho(n + gapsize) 305 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 306 # Bias correction hep-lat/0306017 eq. (49) included 307 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 308 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 309 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 310 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 312 self.e_windowsize[e_name] = n 313 break 314 else: 315 if self.S[e_name] == 0.0: 316 self.e_tauint[e_name] = 0.5 317 self.e_dtauint[e_name] = 0.0 318 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 319 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 320 self.e_windowsize[e_name] = 0 321 else: 322 # Standard automatic windowing procedure 323 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 324 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 325 for n in range(1, w_max): 326 if n < w_max // 2 - 2: 327 _compute_drho(gapsize * n + gapsize) 328 if g_w[n - 1] < 0 or n >= w_max - 1: 329 n *= gapsize 330 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 331 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 332 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 333 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 334 self.e_windowsize[e_name] = n 335 break 336 337 self._dvalue += self.e_dvalue[e_name] ** 2 338 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 339 340 for e_name in self.cov_names: 341 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 342 self.e_ddvalue[e_name] = 0 343 self._dvalue += self.e_dvalue[e_name]**2 344 345 self._dvalue = np.sqrt(self._dvalue) 346 if self._dvalue == 0.0: 347 self.ddvalue = 0.0 348 else: 349 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 350 return 351 352 gm = gamma_method 353 354 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 355 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 356 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 357 358 Parameters 359 ---------- 360 deltas : list 361 List of fluctuations 362 idx : list 363 List or range of configurations on which the deltas are defined. 364 shape : int 365 Number of configurations in idx. 366 w_max : int 367 Upper bound for the summation window. 368 fft : bool 369 determines whether the fft algorithm is used for the computation 370 of the autocorrelation function. 371 """ 372 gamma = np.zeros(w_max) 373 deltas = _expand_deltas(deltas, idx, shape) 374 new_shape = len(deltas) 375 if fft: 376 max_gamma = min(new_shape, w_max) 377 # The padding for the fft has to be even 378 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 379 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 380 else: 381 for n in range(w_max): 382 if new_shape - n >= 0: 383 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 384 385 return gamma 386 387 def details(self, ens_content=True): 388 """Output detailed properties of the Obs. 389 390 Parameters 391 ---------- 392 ens_content : bool 393 print details about the ensembles and replica if true. 394 """ 395 if self.tag is not None: 396 print("Description:", self.tag) 397 if not hasattr(self, 'e_dvalue'): 398 print('Result\t %3.8e' % (self.value)) 399 else: 400 if self.value == 0.0: 401 percentage = np.nan 402 else: 403 percentage = np.abs(self._dvalue / self.value) * 100 404 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 405 if len(self.e_names) > 1: 406 print(' Ensemble errors:') 407 e_content = self.e_content 408 for e_name in self.mc_names: 409 if isinstance(self.idl[e_content[e_name][0]], range): 410 gap = self.idl[e_content[e_name][0]].step 411 else: 412 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 413 414 if len(self.e_names) > 1: 415 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 416 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 417 tau_string += f" in units of {gap} config" 418 if gap > 1: 419 tau_string += "s" 420 if self.tau_exp[e_name] > 0: 421 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 422 else: 423 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 424 print(tau_string) 425 for e_name in self.cov_names: 426 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 427 if ens_content is True: 428 if len(self.e_names) == 1: 429 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 430 else: 431 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 432 my_string_list = [] 433 for key, value in sorted(self.e_content.items()): 434 if key not in self.covobs: 435 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 436 if len(value) == 1: 437 my_string += f': {self.shape[value[0]]} configurations' 438 if isinstance(self.idl[value[0]], range): 439 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 440 else: 441 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 442 else: 443 sublist = [] 444 for v in value: 445 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 446 my_substring += f': {self.shape[v]} configurations' 447 if isinstance(self.idl[v], range): 448 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 449 else: 450 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 451 sublist.append(my_substring) 452 453 my_string += '\n' + '\n'.join(sublist) 454 else: 455 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 456 my_string_list.append(my_string) 457 print('\n'.join(my_string_list)) 458 459 def reweight(self, weight): 460 """Reweight the obs with given rewighting factors. 461 462 Parameters 463 ---------- 464 weight : Obs 465 Reweighting factor. An Observable that has to be defined on a superset of the 466 configurations in obs[i].idl for all i. 467 all_configs : bool 468 if True, the reweighted observables are normalized by the average of 469 the reweighting factor on all configurations in weight.idl and not 470 on the configurations in obs[i].idl. Default False. 471 """ 472 return reweight(weight, [self])[0] 473 474 def is_zero_within_error(self, sigma=1): 475 """Checks whether the observable is zero within 'sigma' standard errors. 476 477 Parameters 478 ---------- 479 sigma : int 480 Number of standard errors used for the check. 481 482 Works only properly when the gamma method was run. 483 """ 484 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 485 486 def is_zero(self, atol=1e-10): 487 """Checks whether the observable is zero within a given tolerance. 488 489 Parameters 490 ---------- 491 atol : float 492 Absolute tolerance (for details see numpy documentation). 493 """ 494 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 495 496 def plot_tauint(self, save=None): 497 """Plot integrated autocorrelation time for each ensemble. 498 499 Parameters 500 ---------- 501 save : str 502 saves the figure to a file named 'save' if. 503 """ 504 if not hasattr(self, 'e_dvalue'): 505 raise Exception('Run the gamma method first.') 506 507 for e, e_name in enumerate(self.mc_names): 508 fig = plt.figure() 509 plt.xlabel(r'$W$') 510 plt.ylabel(r'$\tau_\mathrm{int}$') 511 length = int(len(self.e_n_tauint[e_name])) 512 if self.tau_exp[e_name] > 0: 513 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 514 x_help = np.arange(2 * self.tau_exp[e_name]) 515 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 516 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 517 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 518 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 519 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 520 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 521 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 522 else: 523 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 524 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 525 526 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 527 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 528 plt.legend() 529 plt.xlim(-0.5, xmax) 530 ylim = plt.ylim() 531 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 532 plt.draw() 533 if save: 534 fig.savefig(save + "_" + str(e)) 535 536 def plot_rho(self, save=None): 537 """Plot normalized autocorrelation function time for each ensemble. 538 539 Parameters 540 ---------- 541 save : str 542 saves the figure to a file named 'save' if. 543 """ 544 if not hasattr(self, 'e_dvalue'): 545 raise Exception('Run the gamma method first.') 546 for e, e_name in enumerate(self.mc_names): 547 fig = plt.figure() 548 plt.xlabel('W') 549 plt.ylabel('rho') 550 length = int(len(self.e_drho[e_name])) 551 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 552 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 553 if self.tau_exp[e_name] > 0: 554 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 555 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 556 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 557 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 558 else: 559 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 560 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 561 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 562 plt.xlim(-0.5, xmax) 563 plt.draw() 564 if save: 565 fig.savefig(save + "_" + str(e)) 566 567 def plot_rep_dist(self): 568 """Plot replica distribution for each ensemble with more than one replicum.""" 569 if not hasattr(self, 'e_dvalue'): 570 raise Exception('Run the gamma method first.') 571 for e, e_name in enumerate(self.mc_names): 572 if len(self.e_content[e_name]) == 1: 573 print('No replica distribution for a single replicum (', e_name, ')') 574 continue 575 r_length = [] 576 sub_r_mean = 0 577 for r, r_name in enumerate(self.e_content[e_name]): 578 r_length.append(len(self.deltas[r_name])) 579 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 580 e_N = np.sum(r_length) 581 sub_r_mean /= e_N 582 arr = np.zeros(len(self.e_content[e_name])) 583 for r, r_name in enumerate(self.e_content[e_name]): 584 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 585 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 586 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 587 plt.draw() 588 589 def plot_history(self, expand=True): 590 """Plot derived Monte Carlo history for each ensemble 591 592 Parameters 593 ---------- 594 expand : bool 595 show expanded history for irregular Monte Carlo chains (default: True). 596 """ 597 for e, e_name in enumerate(self.mc_names): 598 plt.figure() 599 r_length = [] 600 tmp = [] 601 tmp_expanded = [] 602 for r, r_name in enumerate(self.e_content[e_name]): 603 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 604 if expand: 605 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 606 r_length.append(len(tmp_expanded[-1])) 607 else: 608 r_length.append(len(tmp[-1])) 609 e_N = np.sum(r_length) 610 x = np.arange(e_N) 611 y_test = np.concatenate(tmp, axis=0) 612 if expand: 613 y = np.concatenate(tmp_expanded, axis=0) 614 else: 615 y = y_test 616 plt.errorbar(x, y, fmt='.', markersize=3) 617 plt.xlim(-0.5, e_N - 0.5) 618 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 619 plt.draw() 620 621 def plot_piechart(self, save=None): 622 """Plot piechart which shows the fractional contribution of each 623 ensemble to the error and returns a dictionary containing the fractions. 624 625 Parameters 626 ---------- 627 save : str 628 saves the figure to a file named 'save' if. 629 """ 630 if not hasattr(self, 'e_dvalue'): 631 raise Exception('Run the gamma method first.') 632 if np.isclose(0.0, self._dvalue, atol=1e-15): 633 raise Exception('Error is 0.0') 634 labels = self.e_names 635 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 636 fig1, ax1 = plt.subplots() 637 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 638 ax1.axis('equal') 639 plt.draw() 640 if save: 641 fig1.savefig(save) 642 643 return dict(zip(self.e_names, sizes)) 644 645 def dump(self, filename, datatype="json.gz", description="", **kwargs): 646 """Dump the Obs to a file 'name' of chosen format. 647 648 Parameters 649 ---------- 650 filename : str 651 name of the file to be saved. 652 datatype : str 653 Format of the exported file. Supported formats include 654 "json.gz" and "pickle" 655 description : str 656 Description for output file, only relevant for json.gz format. 657 path : str 658 specifies a custom path for the file (default '.') 659 """ 660 if 'path' in kwargs: 661 file_name = kwargs.get('path') + '/' + filename 662 else: 663 file_name = filename 664 665 if datatype == "json.gz": 666 from .input.json import dump_to_json 667 dump_to_json([self], file_name, description=description) 668 elif datatype == "pickle": 669 with open(file_name + '.p', 'wb') as fb: 670 pickle.dump(self, fb) 671 else: 672 raise Exception("Unknown datatype " + str(datatype)) 673 674 def export_jackknife(self): 675 """Export jackknife samples from the Obs 676 677 Returns 678 ------- 679 numpy.ndarray 680 Returns a numpy array of length N + 1 where N is the number of samples 681 for the given ensemble and replicum. The zeroth entry of the array contains 682 the mean value of the Obs, entries 1 to N contain the N jackknife samples 683 derived from the Obs. The current implementation only works for observables 684 defined on exactly one ensemble and replicum. The derived jackknife samples 685 should agree with samples from a full jackknife analysis up to O(1/N). 686 """ 687 688 if len(self.names) != 1: 689 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 690 691 name = self.names[0] 692 full_data = self.deltas[name] + self.r_values[name] 693 n = full_data.size 694 mean = self.value 695 tmp_jacks = np.zeros(n + 1) 696 tmp_jacks[0] = mean 697 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 698 return tmp_jacks 699 700 def __float__(self): 701 return float(self.value) 702 703 def __repr__(self): 704 return 'Obs[' + str(self) + ']' 705 706 def __str__(self): 707 return _format_uncertainty(self.value, self._dvalue) 708 709 def __hash__(self): 710 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 711 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 712 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 713 hash_tuple += tuple([o.encode() for o in self.names]) 714 m = hashlib.md5() 715 [m.update(o) for o in hash_tuple] 716 return int(m.hexdigest(), 16) & 0xFFFFFFFF 717 718 # Overload comparisons 719 def __lt__(self, other): 720 return self.value < other 721 722 def __le__(self, other): 723 return self.value <= other 724 725 def __gt__(self, other): 726 return self.value > other 727 728 def __ge__(self, other): 729 return self.value >= other 730 731 def __eq__(self, other): 732 return (self - other).is_zero() 733 734 def __ne__(self, other): 735 return not (self - other).is_zero() 736 737 # Overload math operations 738 def __add__(self, y): 739 if isinstance(y, Obs): 740 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 741 else: 742 if isinstance(y, np.ndarray): 743 return np.array([self + o for o in y]) 744 elif y.__class__.__name__ in ['Corr', 'CObs']: 745 return NotImplemented 746 else: 747 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 748 749 def __radd__(self, y): 750 return self + y 751 752 def __mul__(self, y): 753 if isinstance(y, Obs): 754 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 755 else: 756 if isinstance(y, np.ndarray): 757 return np.array([self * o for o in y]) 758 elif isinstance(y, complex): 759 return CObs(self * y.real, self * y.imag) 760 elif y.__class__.__name__ in ['Corr', 'CObs']: 761 return NotImplemented 762 else: 763 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 764 765 def __rmul__(self, y): 766 return self * y 767 768 def __sub__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([self - o for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 778 779 def __rsub__(self, y): 780 return -1 * (self - y) 781 782 def __pos__(self): 783 return self 784 785 def __neg__(self): 786 return -1 * self 787 788 def __truediv__(self, y): 789 if isinstance(y, Obs): 790 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 791 else: 792 if isinstance(y, np.ndarray): 793 return np.array([self / o for o in y]) 794 elif y.__class__.__name__ in ['Corr', 'CObs']: 795 return NotImplemented 796 else: 797 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 798 799 def __rtruediv__(self, y): 800 if isinstance(y, Obs): 801 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 802 else: 803 if isinstance(y, np.ndarray): 804 return np.array([o / self for o in y]) 805 elif y.__class__.__name__ in ['Corr', 'CObs']: 806 return NotImplemented 807 else: 808 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 809 810 def __pow__(self, y): 811 if isinstance(y, Obs): 812 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 813 else: 814 return derived_observable(lambda x: x[0] ** y, [self]) 815 816 def __rpow__(self, y): 817 if isinstance(y, Obs): 818 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 819 else: 820 return derived_observable(lambda x: y ** x[0], [self]) 821 822 def __abs__(self): 823 return derived_observable(lambda x: anp.abs(x[0]), [self]) 824 825 # Overload numpy functions 826 def sqrt(self): 827 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 828 829 def log(self): 830 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 831 832 def exp(self): 833 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 834 835 def sin(self): 836 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 837 838 def cos(self): 839 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 840 841 def tan(self): 842 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 843 844 def arcsin(self): 845 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 846 847 def arccos(self): 848 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 849 850 def arctan(self): 851 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 852 853 def sinh(self): 854 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 855 856 def cosh(self): 857 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 858 859 def tanh(self): 860 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 861 862 def arcsinh(self): 863 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 864 865 def arccosh(self): 866 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 867 868 def arctanh(self): 869 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 870 871 872class CObs: 873 """Class for a complex valued observable.""" 874 __slots__ = ['_real', '_imag', 'tag'] 875 876 def __init__(self, real, imag=0.0): 877 self._real = real 878 self._imag = imag 879 self.tag = None 880 881 @property 882 def real(self): 883 return self._real 884 885 @property 886 def imag(self): 887 return self._imag 888 889 def gamma_method(self, **kwargs): 890 """Executes the gamma_method for the real and the imaginary part.""" 891 if isinstance(self.real, Obs): 892 self.real.gamma_method(**kwargs) 893 if isinstance(self.imag, Obs): 894 self.imag.gamma_method(**kwargs) 895 896 def is_zero(self): 897 """Checks whether both real and imaginary part are zero within machine precision.""" 898 return self.real == 0.0 and self.imag == 0.0 899 900 def conjugate(self): 901 return CObs(self.real, -self.imag) 902 903 def __add__(self, other): 904 if isinstance(other, np.ndarray): 905 return other + self 906 elif hasattr(other, 'real') and hasattr(other, 'imag'): 907 return CObs(self.real + other.real, 908 self.imag + other.imag) 909 else: 910 return CObs(self.real + other, self.imag) 911 912 def __radd__(self, y): 913 return self + y 914 915 def __sub__(self, other): 916 if isinstance(other, np.ndarray): 917 return -1 * (other - self) 918 elif hasattr(other, 'real') and hasattr(other, 'imag'): 919 return CObs(self.real - other.real, self.imag - other.imag) 920 else: 921 return CObs(self.real - other, self.imag) 922 923 def __rsub__(self, other): 924 return -1 * (self - other) 925 926 def __mul__(self, other): 927 if isinstance(other, np.ndarray): 928 return other * self 929 elif hasattr(other, 'real') and hasattr(other, 'imag'): 930 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 931 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 932 [self.real, other.real, self.imag, other.imag], 933 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 934 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 935 [self.real, other.real, self.imag, other.imag], 936 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 937 elif getattr(other, 'imag', 0) != 0: 938 return CObs(self.real * other.real - self.imag * other.imag, 939 self.imag * other.real + self.real * other.imag) 940 else: 941 return CObs(self.real * other.real, self.imag * other.real) 942 else: 943 return CObs(self.real * other, self.imag * other) 944 945 def __rmul__(self, other): 946 return self * other 947 948 def __truediv__(self, other): 949 if isinstance(other, np.ndarray): 950 return 1 / (other / self) 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 r = other.real ** 2 + other.imag ** 2 953 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 954 else: 955 return CObs(self.real / other, self.imag / other) 956 957 def __rtruediv__(self, other): 958 r = self.real ** 2 + self.imag ** 2 959 if hasattr(other, 'real') and hasattr(other, 'imag'): 960 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 961 else: 962 return CObs(self.real * other / r, -self.imag * other / r) 963 964 def __abs__(self): 965 return np.sqrt(self.real**2 + self.imag**2) 966 967 def __pos__(self): 968 return self 969 970 def __neg__(self): 971 return -1 * self 972 973 def __eq__(self, other): 974 return self.real == other.real and self.imag == other.imag 975 976 def __str__(self): 977 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 978 979 def __repr__(self): 980 return 'CObs[' + str(self) + ']' 981 982 983def _format_uncertainty(value, dvalue): 984 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 985 if dvalue == 0.0: 986 return str(value) 987 fexp = np.floor(np.log10(dvalue)) 988 if fexp < 0.0: 989 return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 990 elif fexp == 0.0: 991 return '{:.1f}({:1.1f})'.format(value, dvalue) 992 else: 993 return '{:.0f}({:2.0f})'.format(value, dvalue) 994 995 996def _expand_deltas(deltas, idx, shape): 997 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 998 If idx is of type range, the deltas are not changed 999 1000 Parameters 1001 ---------- 1002 deltas : list 1003 List of fluctuations 1004 idx : list 1005 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1006 shape : int 1007 Number of configs in idx. 1008 """ 1009 if isinstance(idx, range): 1010 return deltas 1011 else: 1012 ret = np.zeros(idx[-1] - idx[0] + 1) 1013 for i in range(shape): 1014 ret[idx[i] - idx[0]] = deltas[i] 1015 return ret 1016 1017 1018def _merge_idx(idl): 1019 """Returns the union of all lists in idl as sorted list 1020 1021 Parameters 1022 ---------- 1023 idl : list 1024 List of lists or ranges. 1025 """ 1026 1027 # Use groupby to efficiently check whether all elements of idl are identical 1028 try: 1029 g = groupby(idl) 1030 if next(g, True) and not next(g, False): 1031 return idl[0] 1032 except Exception: 1033 pass 1034 1035 if np.all([type(idx) is range for idx in idl]): 1036 if len(set([idx[0] for idx in idl])) == 1: 1037 idstart = min([idx.start for idx in idl]) 1038 idstop = max([idx.stop for idx in idl]) 1039 idstep = min([idx.step for idx in idl]) 1040 return range(idstart, idstop, idstep) 1041 1042 return sorted(set().union(*idl)) 1043 1044 1045def _intersection_idx(idl): 1046 """Returns the intersection of all lists in idl as sorted list 1047 1048 Parameters 1049 ---------- 1050 idl : list 1051 List of lists or ranges. 1052 """ 1053 1054 def _lcm(*args): 1055 """Returns the lowest common multiple of args. 1056 1057 From python 3.9 onwards the math library contains an lcm function.""" 1058 return reduce(lambda a, b: a * b // gcd(a, b), args) 1059 1060 # Use groupby to efficiently check whether all elements of idl are identical 1061 try: 1062 g = groupby(idl) 1063 if next(g, True) and not next(g, False): 1064 return idl[0] 1065 except Exception: 1066 pass 1067 1068 if np.all([type(idx) is range for idx in idl]): 1069 if len(set([idx[0] for idx in idl])) == 1: 1070 idstart = max([idx.start for idx in idl]) 1071 idstop = min([idx.stop for idx in idl]) 1072 idstep = _lcm(*[idx.step for idx in idl]) 1073 return range(idstart, idstop, idstep) 1074 1075 return sorted(set.intersection(*[set(o) for o in idl])) 1076 1077 1078def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1079 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1080 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1081 common divisor of the step sizes is used as new step size. 1082 1083 Parameters 1084 ---------- 1085 deltas : list 1086 List of fluctuations 1087 idx : list 1088 List or range of configs on which the deltas are defined. 1089 Has to be a subset of new_idx and has to be sorted in ascending order. 1090 shape : list 1091 Number of configs in idx. 1092 new_idx : list 1093 List of configs that defines the new range, has to be sorted in ascending order. 1094 """ 1095 1096 if type(idx) is range and type(new_idx) is range: 1097 if idx == new_idx: 1098 return deltas 1099 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1100 for i in range(shape): 1101 ret[idx[i] - new_idx[0]] = deltas[i] 1102 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1103 1104 1105def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1106 """Filter out all configurations with vanishing fluctuation such that they do not 1107 contribute to the error estimate anymore. Returns the new deltas and 1108 idx according to the filtering. 1109 A fluctuation is considered to be vanishing, if it is smaller than eps times 1110 the mean of the absolute values of all deltas in one list. 1111 1112 Parameters 1113 ---------- 1114 deltas : list 1115 List of fluctuations 1116 idx : list 1117 List or ranges of configs on which the deltas are defined. 1118 eps : float 1119 Prefactor that enters the filter criterion. 1120 """ 1121 new_deltas = [] 1122 new_idx = [] 1123 maxd = np.mean(np.fabs(deltas)) 1124 for i in range(len(deltas)): 1125 if abs(deltas[i]) > eps * maxd: 1126 new_deltas.append(deltas[i]) 1127 new_idx.append(idx[i]) 1128 if new_idx: 1129 return np.array(new_deltas), new_idx 1130 else: 1131 return deltas, idx 1132 1133 1134def derived_observable(func, data, array_mode=False, **kwargs): 1135 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1136 1137 Parameters 1138 ---------- 1139 func : object 1140 arbitrary function of the form func(data, **kwargs). For the 1141 automatic differentiation to work, all numpy functions have to have 1142 the autograd wrapper (use 'import autograd.numpy as anp'). 1143 data : list 1144 list of Obs, e.g. [obs1, obs2, obs3]. 1145 num_grad : bool 1146 if True, numerical derivatives are used instead of autograd 1147 (default False). To control the numerical differentiation the 1148 kwargs of numdifftools.step_generators.MaxStepGenerator 1149 can be used. 1150 man_grad : list 1151 manually supply a list or an array which contains the jacobian 1152 of func. Use cautiously, supplying the wrong derivative will 1153 not be intercepted. 1154 1155 Notes 1156 ----- 1157 For simple mathematical operations it can be practical to use anonymous 1158 functions. For the ratio of two observables one can e.g. use 1159 1160 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1161 """ 1162 1163 data = np.asarray(data) 1164 raveled_data = data.ravel() 1165 1166 # Workaround for matrix operations containing non Obs data 1167 if not all(isinstance(x, Obs) for x in raveled_data): 1168 for i in range(len(raveled_data)): 1169 if isinstance(raveled_data[i], (int, float)): 1170 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1171 1172 allcov = {} 1173 for o in raveled_data: 1174 for name in o.cov_names: 1175 if name in allcov: 1176 if not np.allclose(allcov[name], o.covobs[name].cov): 1177 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1178 else: 1179 allcov[name] = o.covobs[name].cov 1180 1181 n_obs = len(raveled_data) 1182 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1183 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1184 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1185 1186 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1187 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1188 1189 if data.ndim == 1: 1190 values = np.array([o.value for o in data]) 1191 else: 1192 values = np.vectorize(lambda x: x.value)(data) 1193 1194 new_values = func(values, **kwargs) 1195 1196 multi = int(isinstance(new_values, np.ndarray)) 1197 1198 new_r_values = {} 1199 new_idl_d = {} 1200 for name in new_sample_names: 1201 idl = [] 1202 tmp_values = np.zeros(n_obs) 1203 for i, item in enumerate(raveled_data): 1204 tmp_values[i] = item.r_values.get(name, item.value) 1205 tmp_idl = item.idl.get(name) 1206 if tmp_idl is not None: 1207 idl.append(tmp_idl) 1208 if multi > 0: 1209 tmp_values = np.array(tmp_values).reshape(data.shape) 1210 new_r_values[name] = func(tmp_values, **kwargs) 1211 new_idl_d[name] = _merge_idx(idl) 1212 if not is_merged[name]: 1213 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1214 1215 if 'man_grad' in kwargs: 1216 deriv = np.asarray(kwargs.get('man_grad')) 1217 if new_values.shape + data.shape != deriv.shape: 1218 raise Exception('Manual derivative does not have correct shape.') 1219 elif kwargs.get('num_grad') is True: 1220 if multi > 0: 1221 raise Exception('Multi mode currently not supported for numerical derivative') 1222 options = { 1223 'base_step': 0.1, 1224 'step_ratio': 2.5} 1225 for key in options.keys(): 1226 kwarg = kwargs.get(key) 1227 if kwarg is not None: 1228 options[key] = kwarg 1229 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1230 if tmp_df.size == 1: 1231 deriv = np.array([tmp_df.real]) 1232 else: 1233 deriv = tmp_df.real 1234 else: 1235 deriv = jacobian(func)(values, **kwargs) 1236 1237 final_result = np.zeros(new_values.shape, dtype=object) 1238 1239 if array_mode is True: 1240 1241 class _Zero_grad(): 1242 def __init__(self, N): 1243 self.grad = np.zeros((N, 1)) 1244 1245 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1246 d_extracted = {} 1247 g_extracted = {} 1248 for name in new_sample_names: 1249 d_extracted[name] = [] 1250 ens_length = len(new_idl_d[name]) 1251 for i_dat, dat in enumerate(data): 1252 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1253 for name in new_cov_names: 1254 g_extracted[name] = [] 1255 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1256 for i_dat, dat in enumerate(data): 1257 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1258 1259 for i_val, new_val in np.ndenumerate(new_values): 1260 new_deltas = {} 1261 new_grad = {} 1262 if array_mode is True: 1263 for name in new_sample_names: 1264 ens_length = d_extracted[name][0].shape[-1] 1265 new_deltas[name] = np.zeros(ens_length) 1266 for i_dat, dat in enumerate(d_extracted[name]): 1267 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1268 for name in new_cov_names: 1269 new_grad[name] = 0 1270 for i_dat, dat in enumerate(g_extracted[name]): 1271 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1272 else: 1273 for j_obs, obs in np.ndenumerate(data): 1274 for name in obs.names: 1275 if name in obs.cov_names: 1276 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1277 else: 1278 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1279 1280 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1281 1282 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1283 raise Exception('The same name has been used for deltas and covobs!') 1284 new_samples = [] 1285 new_means = [] 1286 new_idl = [] 1287 new_names_obs = [] 1288 for name in new_names: 1289 if name not in new_covobs: 1290 if is_merged[name]: 1291 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1292 else: 1293 filtered_deltas = new_deltas[name] 1294 filtered_idl_d = new_idl_d[name] 1295 1296 new_samples.append(filtered_deltas) 1297 new_idl.append(filtered_idl_d) 1298 new_means.append(new_r_values[name][i_val]) 1299 new_names_obs.append(name) 1300 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1301 for name in new_covobs: 1302 final_result[i_val].names.append(name) 1303 final_result[i_val]._covobs = new_covobs 1304 final_result[i_val]._value = new_val 1305 final_result[i_val].is_merged = is_merged 1306 final_result[i_val].reweighted = reweighted 1307 1308 if multi == 0: 1309 final_result = final_result.item() 1310 1311 return final_result 1312 1313 1314def _reduce_deltas(deltas, idx_old, idx_new): 1315 """Extract deltas defined on idx_old on all configs of idx_new. 1316 1317 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1318 are ordered in an ascending order. 1319 1320 Parameters 1321 ---------- 1322 deltas : list 1323 List of fluctuations 1324 idx_old : list 1325 List or range of configs on which the deltas are defined 1326 idx_new : list 1327 List of configs for which we want to extract the deltas. 1328 Has to be a subset of idx_old. 1329 """ 1330 if not len(deltas) == len(idx_old): 1331 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1332 if type(idx_old) is range and type(idx_new) is range: 1333 if idx_old == idx_new: 1334 return deltas 1335 # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical 1336 try: 1337 g = groupby([idx_old, idx_new]) 1338 if next(g, True) and not next(g, False): 1339 return deltas 1340 except Exception: 1341 pass 1342 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1343 if len(indices) < len(idx_new): 1344 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1345 return np.array(deltas)[indices] 1346 1347 1348def reweight(weight, obs, **kwargs): 1349 """Reweight a list of observables. 1350 1351 Parameters 1352 ---------- 1353 weight : Obs 1354 Reweighting factor. An Observable that has to be defined on a superset of the 1355 configurations in obs[i].idl for all i. 1356 obs : list 1357 list of Obs, e.g. [obs1, obs2, obs3]. 1358 all_configs : bool 1359 if True, the reweighted observables are normalized by the average of 1360 the reweighting factor on all configurations in weight.idl and not 1361 on the configurations in obs[i].idl. Default False. 1362 """ 1363 result = [] 1364 for i in range(len(obs)): 1365 if len(obs[i].cov_names): 1366 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1367 if not set(obs[i].names).issubset(weight.names): 1368 raise Exception('Error: Ensembles do not fit') 1369 for name in obs[i].names: 1370 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1371 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1372 new_samples = [] 1373 w_deltas = {} 1374 for name in sorted(obs[i].names): 1375 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1376 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1377 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1378 1379 if kwargs.get('all_configs'): 1380 new_weight = weight 1381 else: 1382 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1383 1384 result.append(tmp_obs / new_weight) 1385 result[-1].reweighted = True 1386 result[-1].is_merged = obs[i].is_merged 1387 1388 return result 1389 1390 1391def correlate(obs_a, obs_b): 1392 """Correlate two observables. 1393 1394 Parameters 1395 ---------- 1396 obs_a : Obs 1397 First observable 1398 obs_b : Obs 1399 Second observable 1400 1401 Notes 1402 ----- 1403 Keep in mind to only correlate primary observables which have not been reweighted 1404 yet. The reweighting has to be applied after correlating the observables. 1405 Currently only works if ensembles are identical (this is not strictly necessary). 1406 """ 1407 1408 if sorted(obs_a.names) != sorted(obs_b.names): 1409 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1410 if len(obs_a.cov_names) or len(obs_b.cov_names): 1411 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1412 for name in obs_a.names: 1413 if obs_a.shape[name] != obs_b.shape[name]: 1414 raise Exception('Shapes of ensemble', name, 'do not fit') 1415 if obs_a.idl[name] != obs_b.idl[name]: 1416 raise Exception('idl of ensemble', name, 'do not fit') 1417 1418 if obs_a.reweighted is True: 1419 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1420 if obs_b.reweighted is True: 1421 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1422 1423 new_samples = [] 1424 new_idl = [] 1425 for name in sorted(obs_a.names): 1426 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1427 new_idl.append(obs_a.idl[name]) 1428 1429 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1430 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1431 o.reweighted = obs_a.reweighted or obs_b.reweighted 1432 return o 1433 1434 1435def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1436 r'''Calculates the error covariance matrix of a set of observables. 1437 1438 WARNING: This function should be used with care, especially for observables with support on multiple 1439 ensembles with differing autocorrelations. See the notes below for details. 1440 1441 The gamma method has to be applied first to all observables. 1442 1443 Parameters 1444 ---------- 1445 obs : list or numpy.ndarray 1446 List or one dimensional array of Obs 1447 visualize : bool 1448 If True plots the corresponding normalized correlation matrix (default False). 1449 correlation : bool 1450 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1451 smooth : None or int 1452 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1453 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1454 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1455 small ones. 1456 1457 Notes 1458 ----- 1459 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1460 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1461 in the absence of autocorrelation. 1462 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1463 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1464 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1465 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1466 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1467 ''' 1468 1469 length = len(obs) 1470 1471 max_samples = np.max([o.N for o in obs]) 1472 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1473 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1474 1475 cov = np.zeros((length, length)) 1476 for i in range(length): 1477 for j in range(i, length): 1478 cov[i, j] = _covariance_element(obs[i], obs[j]) 1479 cov = cov + cov.T - np.diag(np.diag(cov)) 1480 1481 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1482 1483 if isinstance(smooth, int): 1484 corr = _smooth_eigenvalues(corr, smooth) 1485 1486 if visualize: 1487 plt.matshow(corr, vmin=-1, vmax=1) 1488 plt.set_cmap('RdBu') 1489 plt.colorbar() 1490 plt.draw() 1491 1492 if correlation is True: 1493 return corr 1494 1495 errors = [o.dvalue for o in obs] 1496 cov = np.diag(errors) @ corr @ np.diag(errors) 1497 1498 eigenvalues = np.linalg.eigh(cov)[0] 1499 if not np.all(eigenvalues >= 0): 1500 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1501 1502 return cov 1503 1504 1505def _smooth_eigenvalues(corr, E): 1506 """Eigenvalue smoothing as described in hep-lat/9412087 1507 1508 corr : np.ndarray 1509 correlation matrix 1510 E : integer 1511 Number of eigenvalues to be left substantially unchanged 1512 """ 1513 if not (2 < E < corr.shape[0] - 1): 1514 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1515 vals, vec = np.linalg.eigh(corr) 1516 lambda_min = np.mean(vals[:-E]) 1517 vals[vals < lambda_min] = lambda_min 1518 vals /= np.mean(vals) 1519 return vec @ np.diag(vals) @ vec.T 1520 1521 1522def _covariance_element(obs1, obs2): 1523 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1524 1525 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1526 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1527 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1528 return np.sum(deltas1 * deltas2) 1529 1530 if set(obs1.names).isdisjoint(set(obs2.names)): 1531 return 0.0 1532 1533 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1534 raise Exception('The gamma method has to be applied to both Obs first.') 1535 1536 dvalue = 0.0 1537 1538 for e_name in obs1.mc_names: 1539 1540 if e_name not in obs2.mc_names: 1541 continue 1542 1543 idl_d = {} 1544 for r_name in obs1.e_content[e_name]: 1545 if r_name not in obs2.e_content[e_name]: 1546 continue 1547 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1548 1549 gamma = 0.0 1550 1551 for r_name in obs1.e_content[e_name]: 1552 if r_name not in obs2.e_content[e_name]: 1553 continue 1554 if len(idl_d[r_name]) == 0: 1555 continue 1556 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1557 1558 if gamma == 0.0: 1559 continue 1560 1561 gamma_div = 0.0 1562 for r_name in obs1.e_content[e_name]: 1563 if r_name not in obs2.e_content[e_name]: 1564 continue 1565 if len(idl_d[r_name]) == 0: 1566 continue 1567 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1568 gamma /= gamma_div 1569 1570 dvalue += gamma 1571 1572 for e_name in obs1.cov_names: 1573 1574 if e_name not in obs2.cov_names: 1575 continue 1576 1577 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1578 1579 return dvalue 1580 1581 1582def import_jackknife(jacks, name, idl=None): 1583 """Imports jackknife samples and returns an Obs 1584 1585 Parameters 1586 ---------- 1587 jacks : numpy.ndarray 1588 numpy array containing the mean value as zeroth entry and 1589 the N jackknife samples as first to Nth entry. 1590 name : str 1591 name of the ensemble the samples are defined on. 1592 """ 1593 length = len(jacks) - 1 1594 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1595 samples = jacks[1:] @ prj 1596 mean = np.mean(samples) 1597 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1598 new_obs._value = jacks[0] 1599 return new_obs 1600 1601 1602def merge_obs(list_of_obs): 1603 """Combine all observables in list_of_obs into one new observable 1604 1605 Parameters 1606 ---------- 1607 list_of_obs : list 1608 list of the Obs object to be combined 1609 1610 Notes 1611 ----- 1612 It is not possible to combine obs which are based on the same replicum 1613 """ 1614 replist = [item for obs in list_of_obs for item in obs.names] 1615 if (len(replist) == len(set(replist))) is False: 1616 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1617 if any([len(o.cov_names) for o in list_of_obs]): 1618 raise Exception('Not possible to merge data that contains covobs!') 1619 new_dict = {} 1620 idl_dict = {} 1621 for o in list_of_obs: 1622 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1623 for key in set(o.deltas) | set(o.r_values)}) 1624 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1625 1626 names = sorted(new_dict.keys()) 1627 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1628 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1629 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1630 return o 1631 1632 1633def cov_Obs(means, cov, name, grad=None): 1634 """Create an Obs based on mean(s) and a covariance matrix 1635 1636 Parameters 1637 ---------- 1638 mean : list of floats or float 1639 N mean value(s) of the new Obs 1640 cov : list or array 1641 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1642 name : str 1643 identifier for the covariance matrix 1644 grad : list or array 1645 Gradient of the Covobs wrt. the means belonging to cov. 1646 """ 1647 1648 def covobs_to_obs(co): 1649 """Make an Obs out of a Covobs 1650 1651 Parameters 1652 ---------- 1653 co : Covobs 1654 Covobs to be embedded into the Obs 1655 """ 1656 o = Obs([], [], means=[]) 1657 o._value = co.value 1658 o.names.append(co.name) 1659 o._covobs[co.name] = co 1660 o._dvalue = np.sqrt(co.errsq()) 1661 return o 1662 1663 ol = [] 1664 if isinstance(means, (float, int)): 1665 means = [means] 1666 1667 for i in range(len(means)): 1668 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1669 if ol[0].covobs[name].N != len(means): 1670 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1671 if len(ol) == 1: 1672 return ol[0] 1673 return ol
20class Obs: 21 """Class for a general observable. 22 23 Instances of Obs are the basic objects of a pyerrors error analysis. 24 They are initialized with a list which contains arrays of samples for 25 different ensembles/replica and another list of same length which contains 26 the names of the ensembles/replica. Mathematical operations can be 27 performed on instances. The result is another instance of Obs. The error of 28 an instance can be computed with the gamma_method. Also contains additional 29 methods for output and visualization of the error calculation. 30 31 Attributes 32 ---------- 33 S_global : float 34 Standard value for S (default 2.0) 35 S_dict : dict 36 Dictionary for S values. If an entry for a given ensemble 37 exists this overwrites the standard value for that ensemble. 38 tau_exp_global : float 39 Standard value for tau_exp (default 0.0) 40 tau_exp_dict : dict 41 Dictionary for tau_exp values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 N_sigma_global : float 44 Standard value for N_sigma (default 1.0) 45 N_sigma_dict : dict 46 Dictionary for N_sigma values. If an entry for a given ensemble exists 47 this overwrites the standard value for that ensemble. 48 """ 49 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 50 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 51 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 52 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 53 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 54 55 S_global = 2.0 56 S_dict = {} 57 tau_exp_global = 0.0 58 tau_exp_dict = {} 59 N_sigma_global = 1.0 60 N_sigma_dict = {} 61 filter_eps = 1e-10 62 63 def __init__(self, samples, names, idl=None, **kwargs): 64 """ Initialize Obs object. 65 66 Parameters 67 ---------- 68 samples : list 69 list of numpy arrays containing the Monte Carlo samples 70 names : list 71 list of strings labeling the individual samples 72 idl : list, optional 73 list of ranges or lists on which the samples are defined 74 """ 75 76 if kwargs.get("means") is None and len(samples): 77 if len(samples) != len(names): 78 raise Exception('Length of samples and names incompatible.') 79 if idl is not None: 80 if len(idl) != len(names): 81 raise Exception('Length of idl incompatible with samples and names.') 82 name_length = len(names) 83 if name_length > 1: 84 if name_length != len(set(names)): 85 raise Exception('names are not unique.') 86 if not all(isinstance(x, str) for x in names): 87 raise TypeError('All names have to be strings.') 88 else: 89 if not isinstance(names[0], str): 90 raise TypeError('All names have to be strings.') 91 if min(len(x) for x in samples) <= 4: 92 raise Exception('Samples have to have at least 5 entries.') 93 94 self.names = sorted(names) 95 self.shape = {} 96 self.r_values = {} 97 self.deltas = {} 98 self._covobs = {} 99 100 self._value = 0 101 self.N = 0 102 self.is_merged = {} 103 self.idl = {} 104 if idl is not None: 105 for name, idx in sorted(zip(names, idl)): 106 if isinstance(idx, range): 107 self.idl[name] = idx 108 elif isinstance(idx, (list, np.ndarray)): 109 dc = np.unique(np.diff(idx)) 110 if np.any(dc < 0): 111 raise Exception("Unsorted idx for idl[%s]" % (name)) 112 if len(dc) == 1: 113 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 114 else: 115 self.idl[name] = list(idx) 116 else: 117 raise Exception('incompatible type for idl[%s].' % (name)) 118 else: 119 for name, sample in sorted(zip(names, samples)): 120 self.idl[name] = range(1, len(sample) + 1) 121 122 if kwargs.get("means") is not None: 123 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 124 self.shape[name] = len(self.idl[name]) 125 self.N += self.shape[name] 126 self.r_values[name] = mean 127 self.deltas[name] = sample 128 else: 129 for name, sample in sorted(zip(names, samples)): 130 self.shape[name] = len(self.idl[name]) 131 self.N += self.shape[name] 132 if len(sample) != self.shape[name]: 133 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 134 self.r_values[name] = np.mean(sample) 135 self.deltas[name] = sample - self.r_values[name] 136 self._value += self.shape[name] * self.r_values[name] 137 self._value /= self.N 138 139 self._dvalue = 0.0 140 self.ddvalue = 0.0 141 self.reweighted = False 142 143 self.tag = None 144 145 @property 146 def value(self): 147 return self._value 148 149 @property 150 def dvalue(self): 151 return self._dvalue 152 153 @property 154 def e_names(self): 155 return sorted(set([o.split('|')[0] for o in self.names])) 156 157 @property 158 def cov_names(self): 159 return sorted(set([o for o in self.covobs.keys()])) 160 161 @property 162 def mc_names(self): 163 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 164 165 @property 166 def e_content(self): 167 res = {} 168 for e, e_name in enumerate(self.e_names): 169 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 170 if e_name in self.names: 171 res[e_name].append(e_name) 172 return res 173 174 @property 175 def covobs(self): 176 return self._covobs 177 178 def gamma_method(self, **kwargs): 179 """Estimate the error and related properties of the Obs. 180 181 Parameters 182 ---------- 183 S : float 184 specifies a custom value for the parameter S (default 2.0). 185 If set to 0 it is assumed that the data exhibits no 186 autocorrelation. In this case the error estimates coincides 187 with the sample standard error. 188 tau_exp : float 189 positive value triggers the critical slowing down analysis 190 (default 0.0). 191 N_sigma : float 192 number of standard deviations from zero until the tail is 193 attached to the autocorrelation function (default 1). 194 fft : bool 195 determines whether the fft algorithm is used for the computation 196 of the autocorrelation function (default True) 197 """ 198 199 e_content = self.e_content 200 self.e_dvalue = {} 201 self.e_ddvalue = {} 202 self.e_tauint = {} 203 self.e_dtauint = {} 204 self.e_windowsize = {} 205 self.e_n_tauint = {} 206 self.e_n_dtauint = {} 207 e_gamma = {} 208 self.e_rho = {} 209 self.e_drho = {} 210 self._dvalue = 0 211 self.ddvalue = 0 212 213 self.S = {} 214 self.tau_exp = {} 215 self.N_sigma = {} 216 217 if kwargs.get('fft') is False: 218 fft = False 219 else: 220 fft = True 221 222 def _parse_kwarg(kwarg_name): 223 if kwarg_name in kwargs: 224 tmp = kwargs.get(kwarg_name) 225 if isinstance(tmp, (int, float)): 226 if tmp < 0: 227 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 228 for e, e_name in enumerate(self.e_names): 229 getattr(self, kwarg_name)[e_name] = tmp 230 else: 231 raise TypeError(kwarg_name + ' is not in proper format.') 232 else: 233 for e, e_name in enumerate(self.e_names): 234 if e_name in getattr(Obs, kwarg_name + '_dict'): 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 236 else: 237 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 238 239 _parse_kwarg('S') 240 _parse_kwarg('tau_exp') 241 _parse_kwarg('N_sigma') 242 243 for e, e_name in enumerate(self.mc_names): 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name])) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 gaps = [] 275 for r_name in e_content[e_name]: 276 if isinstance(self.idl[r_name], range): 277 gaps.append(1) 278 else: 279 gaps.append(np.min(np.diff(self.idl[r_name]))) 280 281 if not np.all([gi == gaps[0] for gi in gaps]): 282 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 283 else: 284 gapsize = gaps[0] 285 286 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 287 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 288 # Make sure no entry of tauint is smaller than 0.5 289 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 290 # hep-lat/0306017 eq. (42) 291 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 292 self.e_n_dtauint[e_name][0] = 0.0 293 294 def _compute_drho(i): 295 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 296 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 297 298 _compute_drho(gapsize) 299 if self.tau_exp[e_name] > 0: 300 texp = self.tau_exp[e_name] 301 # Critical slowing down analysis 302 if w_max // 2 <= 1: 303 raise Exception("Need at least 8 samples for tau_exp error analysis") 304 for n in range(gapsize, w_max // 2, gapsize): 305 _compute_drho(n + gapsize) 306 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 307 # Bias correction hep-lat/0306017 eq. (49) included 308 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 309 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 310 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 311 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 313 self.e_windowsize[e_name] = n 314 break 315 else: 316 if self.S[e_name] == 0.0: 317 self.e_tauint[e_name] = 0.5 318 self.e_dtauint[e_name] = 0.0 319 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 320 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 321 self.e_windowsize[e_name] = 0 322 else: 323 # Standard automatic windowing procedure 324 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 325 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 326 for n in range(1, w_max): 327 if n < w_max // 2 - 2: 328 _compute_drho(gapsize * n + gapsize) 329 if g_w[n - 1] < 0 or n >= w_max - 1: 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return 352 353 gm = gamma_method 354 355 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 356 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 357 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 358 359 Parameters 360 ---------- 361 deltas : list 362 List of fluctuations 363 idx : list 364 List or range of configurations on which the deltas are defined. 365 shape : int 366 Number of configurations in idx. 367 w_max : int 368 Upper bound for the summation window. 369 fft : bool 370 determines whether the fft algorithm is used for the computation 371 of the autocorrelation function. 372 """ 373 gamma = np.zeros(w_max) 374 deltas = _expand_deltas(deltas, idx, shape) 375 new_shape = len(deltas) 376 if fft: 377 max_gamma = min(new_shape, w_max) 378 # The padding for the fft has to be even 379 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 380 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 381 else: 382 for n in range(w_max): 383 if new_shape - n >= 0: 384 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 385 386 return gamma 387 388 def details(self, ens_content=True): 389 """Output detailed properties of the Obs. 390 391 Parameters 392 ---------- 393 ens_content : bool 394 print details about the ensembles and replica if true. 395 """ 396 if self.tag is not None: 397 print("Description:", self.tag) 398 if not hasattr(self, 'e_dvalue'): 399 print('Result\t %3.8e' % (self.value)) 400 else: 401 if self.value == 0.0: 402 percentage = np.nan 403 else: 404 percentage = np.abs(self._dvalue / self.value) * 100 405 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 406 if len(self.e_names) > 1: 407 print(' Ensemble errors:') 408 e_content = self.e_content 409 for e_name in self.mc_names: 410 if isinstance(self.idl[e_content[e_name][0]], range): 411 gap = self.idl[e_content[e_name][0]].step 412 else: 413 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 414 415 if len(self.e_names) > 1: 416 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 417 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 418 tau_string += f" in units of {gap} config" 419 if gap > 1: 420 tau_string += "s" 421 if self.tau_exp[e_name] > 0: 422 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 423 else: 424 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 425 print(tau_string) 426 for e_name in self.cov_names: 427 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 428 if ens_content is True: 429 if len(self.e_names) == 1: 430 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 431 else: 432 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 433 my_string_list = [] 434 for key, value in sorted(self.e_content.items()): 435 if key not in self.covobs: 436 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 437 if len(value) == 1: 438 my_string += f': {self.shape[value[0]]} configurations' 439 if isinstance(self.idl[value[0]], range): 440 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 441 else: 442 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 443 else: 444 sublist = [] 445 for v in value: 446 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 447 my_substring += f': {self.shape[v]} configurations' 448 if isinstance(self.idl[v], range): 449 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 450 else: 451 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 452 sublist.append(my_substring) 453 454 my_string += '\n' + '\n'.join(sublist) 455 else: 456 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 457 my_string_list.append(my_string) 458 print('\n'.join(my_string_list)) 459 460 def reweight(self, weight): 461 """Reweight the obs with given rewighting factors. 462 463 Parameters 464 ---------- 465 weight : Obs 466 Reweighting factor. An Observable that has to be defined on a superset of the 467 configurations in obs[i].idl for all i. 468 all_configs : bool 469 if True, the reweighted observables are normalized by the average of 470 the reweighting factor on all configurations in weight.idl and not 471 on the configurations in obs[i].idl. Default False. 472 """ 473 return reweight(weight, [self])[0] 474 475 def is_zero_within_error(self, sigma=1): 476 """Checks whether the observable is zero within 'sigma' standard errors. 477 478 Parameters 479 ---------- 480 sigma : int 481 Number of standard errors used for the check. 482 483 Works only properly when the gamma method was run. 484 """ 485 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 486 487 def is_zero(self, atol=1e-10): 488 """Checks whether the observable is zero within a given tolerance. 489 490 Parameters 491 ---------- 492 atol : float 493 Absolute tolerance (for details see numpy documentation). 494 """ 495 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 496 497 def plot_tauint(self, save=None): 498 """Plot integrated autocorrelation time for each ensemble. 499 500 Parameters 501 ---------- 502 save : str 503 saves the figure to a file named 'save' if. 504 """ 505 if not hasattr(self, 'e_dvalue'): 506 raise Exception('Run the gamma method first.') 507 508 for e, e_name in enumerate(self.mc_names): 509 fig = plt.figure() 510 plt.xlabel(r'$W$') 511 plt.ylabel(r'$\tau_\mathrm{int}$') 512 length = int(len(self.e_n_tauint[e_name])) 513 if self.tau_exp[e_name] > 0: 514 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 515 x_help = np.arange(2 * self.tau_exp[e_name]) 516 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 517 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 518 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 519 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 520 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 521 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 522 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 523 else: 524 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 525 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 526 527 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 528 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 529 plt.legend() 530 plt.xlim(-0.5, xmax) 531 ylim = plt.ylim() 532 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 533 plt.draw() 534 if save: 535 fig.savefig(save + "_" + str(e)) 536 537 def plot_rho(self, save=None): 538 """Plot normalized autocorrelation function time for each ensemble. 539 540 Parameters 541 ---------- 542 save : str 543 saves the figure to a file named 'save' if. 544 """ 545 if not hasattr(self, 'e_dvalue'): 546 raise Exception('Run the gamma method first.') 547 for e, e_name in enumerate(self.mc_names): 548 fig = plt.figure() 549 plt.xlabel('W') 550 plt.ylabel('rho') 551 length = int(len(self.e_drho[e_name])) 552 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 553 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 554 if self.tau_exp[e_name] > 0: 555 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 556 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 557 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 558 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 559 else: 560 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 561 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 562 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 563 plt.xlim(-0.5, xmax) 564 plt.draw() 565 if save: 566 fig.savefig(save + "_" + str(e)) 567 568 def plot_rep_dist(self): 569 """Plot replica distribution for each ensemble with more than one replicum.""" 570 if not hasattr(self, 'e_dvalue'): 571 raise Exception('Run the gamma method first.') 572 for e, e_name in enumerate(self.mc_names): 573 if len(self.e_content[e_name]) == 1: 574 print('No replica distribution for a single replicum (', e_name, ')') 575 continue 576 r_length = [] 577 sub_r_mean = 0 578 for r, r_name in enumerate(self.e_content[e_name]): 579 r_length.append(len(self.deltas[r_name])) 580 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 581 e_N = np.sum(r_length) 582 sub_r_mean /= e_N 583 arr = np.zeros(len(self.e_content[e_name])) 584 for r, r_name in enumerate(self.e_content[e_name]): 585 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 586 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 587 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 588 plt.draw() 589 590 def plot_history(self, expand=True): 591 """Plot derived Monte Carlo history for each ensemble 592 593 Parameters 594 ---------- 595 expand : bool 596 show expanded history for irregular Monte Carlo chains (default: True). 597 """ 598 for e, e_name in enumerate(self.mc_names): 599 plt.figure() 600 r_length = [] 601 tmp = [] 602 tmp_expanded = [] 603 for r, r_name in enumerate(self.e_content[e_name]): 604 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 605 if expand: 606 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 607 r_length.append(len(tmp_expanded[-1])) 608 else: 609 r_length.append(len(tmp[-1])) 610 e_N = np.sum(r_length) 611 x = np.arange(e_N) 612 y_test = np.concatenate(tmp, axis=0) 613 if expand: 614 y = np.concatenate(tmp_expanded, axis=0) 615 else: 616 y = y_test 617 plt.errorbar(x, y, fmt='.', markersize=3) 618 plt.xlim(-0.5, e_N - 0.5) 619 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 620 plt.draw() 621 622 def plot_piechart(self, save=None): 623 """Plot piechart which shows the fractional contribution of each 624 ensemble to the error and returns a dictionary containing the fractions. 625 626 Parameters 627 ---------- 628 save : str 629 saves the figure to a file named 'save' if. 630 """ 631 if not hasattr(self, 'e_dvalue'): 632 raise Exception('Run the gamma method first.') 633 if np.isclose(0.0, self._dvalue, atol=1e-15): 634 raise Exception('Error is 0.0') 635 labels = self.e_names 636 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 637 fig1, ax1 = plt.subplots() 638 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 639 ax1.axis('equal') 640 plt.draw() 641 if save: 642 fig1.savefig(save) 643 644 return dict(zip(self.e_names, sizes)) 645 646 def dump(self, filename, datatype="json.gz", description="", **kwargs): 647 """Dump the Obs to a file 'name' of chosen format. 648 649 Parameters 650 ---------- 651 filename : str 652 name of the file to be saved. 653 datatype : str 654 Format of the exported file. Supported formats include 655 "json.gz" and "pickle" 656 description : str 657 Description for output file, only relevant for json.gz format. 658 path : str 659 specifies a custom path for the file (default '.') 660 """ 661 if 'path' in kwargs: 662 file_name = kwargs.get('path') + '/' + filename 663 else: 664 file_name = filename 665 666 if datatype == "json.gz": 667 from .input.json import dump_to_json 668 dump_to_json([self], file_name, description=description) 669 elif datatype == "pickle": 670 with open(file_name + '.p', 'wb') as fb: 671 pickle.dump(self, fb) 672 else: 673 raise Exception("Unknown datatype " + str(datatype)) 674 675 def export_jackknife(self): 676 """Export jackknife samples from the Obs 677 678 Returns 679 ------- 680 numpy.ndarray 681 Returns a numpy array of length N + 1 where N is the number of samples 682 for the given ensemble and replicum. The zeroth entry of the array contains 683 the mean value of the Obs, entries 1 to N contain the N jackknife samples 684 derived from the Obs. The current implementation only works for observables 685 defined on exactly one ensemble and replicum. The derived jackknife samples 686 should agree with samples from a full jackknife analysis up to O(1/N). 687 """ 688 689 if len(self.names) != 1: 690 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 691 692 name = self.names[0] 693 full_data = self.deltas[name] + self.r_values[name] 694 n = full_data.size 695 mean = self.value 696 tmp_jacks = np.zeros(n + 1) 697 tmp_jacks[0] = mean 698 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 699 return tmp_jacks 700 701 def __float__(self): 702 return float(self.value) 703 704 def __repr__(self): 705 return 'Obs[' + str(self) + ']' 706 707 def __str__(self): 708 return _format_uncertainty(self.value, self._dvalue) 709 710 def __hash__(self): 711 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 712 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 713 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 714 hash_tuple += tuple([o.encode() for o in self.names]) 715 m = hashlib.md5() 716 [m.update(o) for o in hash_tuple] 717 return int(m.hexdigest(), 16) & 0xFFFFFFFF 718 719 # Overload comparisons 720 def __lt__(self, other): 721 return self.value < other 722 723 def __le__(self, other): 724 return self.value <= other 725 726 def __gt__(self, other): 727 return self.value > other 728 729 def __ge__(self, other): 730 return self.value >= other 731 732 def __eq__(self, other): 733 return (self - other).is_zero() 734 735 def __ne__(self, other): 736 return not (self - other).is_zero() 737 738 # Overload math operations 739 def __add__(self, y): 740 if isinstance(y, Obs): 741 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 742 else: 743 if isinstance(y, np.ndarray): 744 return np.array([self + o for o in y]) 745 elif y.__class__.__name__ in ['Corr', 'CObs']: 746 return NotImplemented 747 else: 748 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 749 750 def __radd__(self, y): 751 return self + y 752 753 def __mul__(self, y): 754 if isinstance(y, Obs): 755 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 756 else: 757 if isinstance(y, np.ndarray): 758 return np.array([self * o for o in y]) 759 elif isinstance(y, complex): 760 return CObs(self * y.real, self * y.imag) 761 elif y.__class__.__name__ in ['Corr', 'CObs']: 762 return NotImplemented 763 else: 764 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 765 766 def __rmul__(self, y): 767 return self * y 768 769 def __sub__(self, y): 770 if isinstance(y, Obs): 771 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 772 else: 773 if isinstance(y, np.ndarray): 774 return np.array([self - o for o in y]) 775 elif y.__class__.__name__ in ['Corr', 'CObs']: 776 return NotImplemented 777 else: 778 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 779 780 def __rsub__(self, y): 781 return -1 * (self - y) 782 783 def __pos__(self): 784 return self 785 786 def __neg__(self): 787 return -1 * self 788 789 def __truediv__(self, y): 790 if isinstance(y, Obs): 791 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 792 else: 793 if isinstance(y, np.ndarray): 794 return np.array([self / o for o in y]) 795 elif y.__class__.__name__ in ['Corr', 'CObs']: 796 return NotImplemented 797 else: 798 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 799 800 def __rtruediv__(self, y): 801 if isinstance(y, Obs): 802 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 803 else: 804 if isinstance(y, np.ndarray): 805 return np.array([o / self for o in y]) 806 elif y.__class__.__name__ in ['Corr', 'CObs']: 807 return NotImplemented 808 else: 809 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 810 811 def __pow__(self, y): 812 if isinstance(y, Obs): 813 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 814 else: 815 return derived_observable(lambda x: x[0] ** y, [self]) 816 817 def __rpow__(self, y): 818 if isinstance(y, Obs): 819 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 820 else: 821 return derived_observable(lambda x: y ** x[0], [self]) 822 823 def __abs__(self): 824 return derived_observable(lambda x: anp.abs(x[0]), [self]) 825 826 # Overload numpy functions 827 def sqrt(self): 828 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 829 830 def log(self): 831 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 832 833 def exp(self): 834 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 835 836 def sin(self): 837 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 838 839 def cos(self): 840 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 841 842 def tan(self): 843 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 844 845 def arcsin(self): 846 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 847 848 def arccos(self): 849 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 850 851 def arctan(self): 852 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 853 854 def sinh(self): 855 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 856 857 def cosh(self): 858 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 859 860 def tanh(self): 861 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 862 863 def arcsinh(self): 864 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 865 866 def arccosh(self): 867 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 868 869 def arctanh(self): 870 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
63 def __init__(self, samples, names, idl=None, **kwargs): 64 """ Initialize Obs object. 65 66 Parameters 67 ---------- 68 samples : list 69 list of numpy arrays containing the Monte Carlo samples 70 names : list 71 list of strings labeling the individual samples 72 idl : list, optional 73 list of ranges or lists on which the samples are defined 74 """ 75 76 if kwargs.get("means") is None and len(samples): 77 if len(samples) != len(names): 78 raise Exception('Length of samples and names incompatible.') 79 if idl is not None: 80 if len(idl) != len(names): 81 raise Exception('Length of idl incompatible with samples and names.') 82 name_length = len(names) 83 if name_length > 1: 84 if name_length != len(set(names)): 85 raise Exception('names are not unique.') 86 if not all(isinstance(x, str) for x in names): 87 raise TypeError('All names have to be strings.') 88 else: 89 if not isinstance(names[0], str): 90 raise TypeError('All names have to be strings.') 91 if min(len(x) for x in samples) <= 4: 92 raise Exception('Samples have to have at least 5 entries.') 93 94 self.names = sorted(names) 95 self.shape = {} 96 self.r_values = {} 97 self.deltas = {} 98 self._covobs = {} 99 100 self._value = 0 101 self.N = 0 102 self.is_merged = {} 103 self.idl = {} 104 if idl is not None: 105 for name, idx in sorted(zip(names, idl)): 106 if isinstance(idx, range): 107 self.idl[name] = idx 108 elif isinstance(idx, (list, np.ndarray)): 109 dc = np.unique(np.diff(idx)) 110 if np.any(dc < 0): 111 raise Exception("Unsorted idx for idl[%s]" % (name)) 112 if len(dc) == 1: 113 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 114 else: 115 self.idl[name] = list(idx) 116 else: 117 raise Exception('incompatible type for idl[%s].' % (name)) 118 else: 119 for name, sample in sorted(zip(names, samples)): 120 self.idl[name] = range(1, len(sample) + 1) 121 122 if kwargs.get("means") is not None: 123 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 124 self.shape[name] = len(self.idl[name]) 125 self.N += self.shape[name] 126 self.r_values[name] = mean 127 self.deltas[name] = sample 128 else: 129 for name, sample in sorted(zip(names, samples)): 130 self.shape[name] = len(self.idl[name]) 131 self.N += self.shape[name] 132 if len(sample) != self.shape[name]: 133 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 134 self.r_values[name] = np.mean(sample) 135 self.deltas[name] = sample - self.r_values[name] 136 self._value += self.shape[name] * self.r_values[name] 137 self._value /= self.N 138 139 self._dvalue = 0.0 140 self.ddvalue = 0.0 141 self.reweighted = False 142 143 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
178 def gamma_method(self, **kwargs): 179 """Estimate the error and related properties of the Obs. 180 181 Parameters 182 ---------- 183 S : float 184 specifies a custom value for the parameter S (default 2.0). 185 If set to 0 it is assumed that the data exhibits no 186 autocorrelation. In this case the error estimates coincides 187 with the sample standard error. 188 tau_exp : float 189 positive value triggers the critical slowing down analysis 190 (default 0.0). 191 N_sigma : float 192 number of standard deviations from zero until the tail is 193 attached to the autocorrelation function (default 1). 194 fft : bool 195 determines whether the fft algorithm is used for the computation 196 of the autocorrelation function (default True) 197 """ 198 199 e_content = self.e_content 200 self.e_dvalue = {} 201 self.e_ddvalue = {} 202 self.e_tauint = {} 203 self.e_dtauint = {} 204 self.e_windowsize = {} 205 self.e_n_tauint = {} 206 self.e_n_dtauint = {} 207 e_gamma = {} 208 self.e_rho = {} 209 self.e_drho = {} 210 self._dvalue = 0 211 self.ddvalue = 0 212 213 self.S = {} 214 self.tau_exp = {} 215 self.N_sigma = {} 216 217 if kwargs.get('fft') is False: 218 fft = False 219 else: 220 fft = True 221 222 def _parse_kwarg(kwarg_name): 223 if kwarg_name in kwargs: 224 tmp = kwargs.get(kwarg_name) 225 if isinstance(tmp, (int, float)): 226 if tmp < 0: 227 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 228 for e, e_name in enumerate(self.e_names): 229 getattr(self, kwarg_name)[e_name] = tmp 230 else: 231 raise TypeError(kwarg_name + ' is not in proper format.') 232 else: 233 for e, e_name in enumerate(self.e_names): 234 if e_name in getattr(Obs, kwarg_name + '_dict'): 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 236 else: 237 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 238 239 _parse_kwarg('S') 240 _parse_kwarg('tau_exp') 241 _parse_kwarg('N_sigma') 242 243 for e, e_name in enumerate(self.mc_names): 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name])) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 gaps = [] 275 for r_name in e_content[e_name]: 276 if isinstance(self.idl[r_name], range): 277 gaps.append(1) 278 else: 279 gaps.append(np.min(np.diff(self.idl[r_name]))) 280 281 if not np.all([gi == gaps[0] for gi in gaps]): 282 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 283 else: 284 gapsize = gaps[0] 285 286 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 287 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 288 # Make sure no entry of tauint is smaller than 0.5 289 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 290 # hep-lat/0306017 eq. (42) 291 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 292 self.e_n_dtauint[e_name][0] = 0.0 293 294 def _compute_drho(i): 295 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 296 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 297 298 _compute_drho(gapsize) 299 if self.tau_exp[e_name] > 0: 300 texp = self.tau_exp[e_name] 301 # Critical slowing down analysis 302 if w_max // 2 <= 1: 303 raise Exception("Need at least 8 samples for tau_exp error analysis") 304 for n in range(gapsize, w_max // 2, gapsize): 305 _compute_drho(n + gapsize) 306 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 307 # Bias correction hep-lat/0306017 eq. (49) included 308 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 309 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 310 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 311 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 313 self.e_windowsize[e_name] = n 314 break 315 else: 316 if self.S[e_name] == 0.0: 317 self.e_tauint[e_name] = 0.5 318 self.e_dtauint[e_name] = 0.0 319 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 320 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 321 self.e_windowsize[e_name] = 0 322 else: 323 # Standard automatic windowing procedure 324 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 325 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 326 for n in range(1, w_max): 327 if n < w_max // 2 - 2: 328 _compute_drho(gapsize * n + gapsize) 329 if g_w[n - 1] < 0 or n >= w_max - 1: 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
178 def gamma_method(self, **kwargs): 179 """Estimate the error and related properties of the Obs. 180 181 Parameters 182 ---------- 183 S : float 184 specifies a custom value for the parameter S (default 2.0). 185 If set to 0 it is assumed that the data exhibits no 186 autocorrelation. In this case the error estimates coincides 187 with the sample standard error. 188 tau_exp : float 189 positive value triggers the critical slowing down analysis 190 (default 0.0). 191 N_sigma : float 192 number of standard deviations from zero until the tail is 193 attached to the autocorrelation function (default 1). 194 fft : bool 195 determines whether the fft algorithm is used for the computation 196 of the autocorrelation function (default True) 197 """ 198 199 e_content = self.e_content 200 self.e_dvalue = {} 201 self.e_ddvalue = {} 202 self.e_tauint = {} 203 self.e_dtauint = {} 204 self.e_windowsize = {} 205 self.e_n_tauint = {} 206 self.e_n_dtauint = {} 207 e_gamma = {} 208 self.e_rho = {} 209 self.e_drho = {} 210 self._dvalue = 0 211 self.ddvalue = 0 212 213 self.S = {} 214 self.tau_exp = {} 215 self.N_sigma = {} 216 217 if kwargs.get('fft') is False: 218 fft = False 219 else: 220 fft = True 221 222 def _parse_kwarg(kwarg_name): 223 if kwarg_name in kwargs: 224 tmp = kwargs.get(kwarg_name) 225 if isinstance(tmp, (int, float)): 226 if tmp < 0: 227 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 228 for e, e_name in enumerate(self.e_names): 229 getattr(self, kwarg_name)[e_name] = tmp 230 else: 231 raise TypeError(kwarg_name + ' is not in proper format.') 232 else: 233 for e, e_name in enumerate(self.e_names): 234 if e_name in getattr(Obs, kwarg_name + '_dict'): 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 236 else: 237 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 238 239 _parse_kwarg('S') 240 _parse_kwarg('tau_exp') 241 _parse_kwarg('N_sigma') 242 243 for e, e_name in enumerate(self.mc_names): 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name])) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 gaps = [] 275 for r_name in e_content[e_name]: 276 if isinstance(self.idl[r_name], range): 277 gaps.append(1) 278 else: 279 gaps.append(np.min(np.diff(self.idl[r_name]))) 280 281 if not np.all([gi == gaps[0] for gi in gaps]): 282 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 283 else: 284 gapsize = gaps[0] 285 286 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 287 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 288 # Make sure no entry of tauint is smaller than 0.5 289 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 290 # hep-lat/0306017 eq. (42) 291 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 292 self.e_n_dtauint[e_name][0] = 0.0 293 294 def _compute_drho(i): 295 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 296 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 297 298 _compute_drho(gapsize) 299 if self.tau_exp[e_name] > 0: 300 texp = self.tau_exp[e_name] 301 # Critical slowing down analysis 302 if w_max // 2 <= 1: 303 raise Exception("Need at least 8 samples for tau_exp error analysis") 304 for n in range(gapsize, w_max // 2, gapsize): 305 _compute_drho(n + gapsize) 306 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 307 # Bias correction hep-lat/0306017 eq. (49) included 308 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 309 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 310 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 311 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 313 self.e_windowsize[e_name] = n 314 break 315 else: 316 if self.S[e_name] == 0.0: 317 self.e_tauint[e_name] = 0.5 318 self.e_dtauint[e_name] = 0.0 319 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 320 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 321 self.e_windowsize[e_name] = 0 322 else: 323 # Standard automatic windowing procedure 324 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 325 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 326 for n in range(1, w_max): 327 if n < w_max // 2 - 2: 328 _compute_drho(gapsize * n + gapsize) 329 if g_w[n - 1] < 0 or n >= w_max - 1: 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
388 def details(self, ens_content=True): 389 """Output detailed properties of the Obs. 390 391 Parameters 392 ---------- 393 ens_content : bool 394 print details about the ensembles and replica if true. 395 """ 396 if self.tag is not None: 397 print("Description:", self.tag) 398 if not hasattr(self, 'e_dvalue'): 399 print('Result\t %3.8e' % (self.value)) 400 else: 401 if self.value == 0.0: 402 percentage = np.nan 403 else: 404 percentage = np.abs(self._dvalue / self.value) * 100 405 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 406 if len(self.e_names) > 1: 407 print(' Ensemble errors:') 408 e_content = self.e_content 409 for e_name in self.mc_names: 410 if isinstance(self.idl[e_content[e_name][0]], range): 411 gap = self.idl[e_content[e_name][0]].step 412 else: 413 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 414 415 if len(self.e_names) > 1: 416 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 417 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 418 tau_string += f" in units of {gap} config" 419 if gap > 1: 420 tau_string += "s" 421 if self.tau_exp[e_name] > 0: 422 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 423 else: 424 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 425 print(tau_string) 426 for e_name in self.cov_names: 427 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 428 if ens_content is True: 429 if len(self.e_names) == 1: 430 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 431 else: 432 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 433 my_string_list = [] 434 for key, value in sorted(self.e_content.items()): 435 if key not in self.covobs: 436 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 437 if len(value) == 1: 438 my_string += f': {self.shape[value[0]]} configurations' 439 if isinstance(self.idl[value[0]], range): 440 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 441 else: 442 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 443 else: 444 sublist = [] 445 for v in value: 446 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 447 my_substring += f': {self.shape[v]} configurations' 448 if isinstance(self.idl[v], range): 449 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 450 else: 451 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 452 sublist.append(my_substring) 453 454 my_string += '\n' + '\n'.join(sublist) 455 else: 456 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 457 my_string_list.append(my_string) 458 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
460 def reweight(self, weight): 461 """Reweight the obs with given rewighting factors. 462 463 Parameters 464 ---------- 465 weight : Obs 466 Reweighting factor. An Observable that has to be defined on a superset of the 467 configurations in obs[i].idl for all i. 468 all_configs : bool 469 if True, the reweighted observables are normalized by the average of 470 the reweighting factor on all configurations in weight.idl and not 471 on the configurations in obs[i].idl. Default False. 472 """ 473 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
475 def is_zero_within_error(self, sigma=1): 476 """Checks whether the observable is zero within 'sigma' standard errors. 477 478 Parameters 479 ---------- 480 sigma : int 481 Number of standard errors used for the check. 482 483 Works only properly when the gamma method was run. 484 """ 485 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
487 def is_zero(self, atol=1e-10): 488 """Checks whether the observable is zero within a given tolerance. 489 490 Parameters 491 ---------- 492 atol : float 493 Absolute tolerance (for details see numpy documentation). 494 """ 495 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
497 def plot_tauint(self, save=None): 498 """Plot integrated autocorrelation time for each ensemble. 499 500 Parameters 501 ---------- 502 save : str 503 saves the figure to a file named 'save' if. 504 """ 505 if not hasattr(self, 'e_dvalue'): 506 raise Exception('Run the gamma method first.') 507 508 for e, e_name in enumerate(self.mc_names): 509 fig = plt.figure() 510 plt.xlabel(r'$W$') 511 plt.ylabel(r'$\tau_\mathrm{int}$') 512 length = int(len(self.e_n_tauint[e_name])) 513 if self.tau_exp[e_name] > 0: 514 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 515 x_help = np.arange(2 * self.tau_exp[e_name]) 516 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 517 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 518 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 519 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 520 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 521 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 522 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 523 else: 524 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 525 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 526 527 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 528 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 529 plt.legend() 530 plt.xlim(-0.5, xmax) 531 ylim = plt.ylim() 532 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 533 plt.draw() 534 if save: 535 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
537 def plot_rho(self, save=None): 538 """Plot normalized autocorrelation function time for each ensemble. 539 540 Parameters 541 ---------- 542 save : str 543 saves the figure to a file named 'save' if. 544 """ 545 if not hasattr(self, 'e_dvalue'): 546 raise Exception('Run the gamma method first.') 547 for e, e_name in enumerate(self.mc_names): 548 fig = plt.figure() 549 plt.xlabel('W') 550 plt.ylabel('rho') 551 length = int(len(self.e_drho[e_name])) 552 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 553 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 554 if self.tau_exp[e_name] > 0: 555 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 556 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 557 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 558 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 559 else: 560 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 561 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 562 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 563 plt.xlim(-0.5, xmax) 564 plt.draw() 565 if save: 566 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
568 def plot_rep_dist(self): 569 """Plot replica distribution for each ensemble with more than one replicum.""" 570 if not hasattr(self, 'e_dvalue'): 571 raise Exception('Run the gamma method first.') 572 for e, e_name in enumerate(self.mc_names): 573 if len(self.e_content[e_name]) == 1: 574 print('No replica distribution for a single replicum (', e_name, ')') 575 continue 576 r_length = [] 577 sub_r_mean = 0 578 for r, r_name in enumerate(self.e_content[e_name]): 579 r_length.append(len(self.deltas[r_name])) 580 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 581 e_N = np.sum(r_length) 582 sub_r_mean /= e_N 583 arr = np.zeros(len(self.e_content[e_name])) 584 for r, r_name in enumerate(self.e_content[e_name]): 585 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 586 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 587 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 588 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
590 def plot_history(self, expand=True): 591 """Plot derived Monte Carlo history for each ensemble 592 593 Parameters 594 ---------- 595 expand : bool 596 show expanded history for irregular Monte Carlo chains (default: True). 597 """ 598 for e, e_name in enumerate(self.mc_names): 599 plt.figure() 600 r_length = [] 601 tmp = [] 602 tmp_expanded = [] 603 for r, r_name in enumerate(self.e_content[e_name]): 604 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 605 if expand: 606 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 607 r_length.append(len(tmp_expanded[-1])) 608 else: 609 r_length.append(len(tmp[-1])) 610 e_N = np.sum(r_length) 611 x = np.arange(e_N) 612 y_test = np.concatenate(tmp, axis=0) 613 if expand: 614 y = np.concatenate(tmp_expanded, axis=0) 615 else: 616 y = y_test 617 plt.errorbar(x, y, fmt='.', markersize=3) 618 plt.xlim(-0.5, e_N - 0.5) 619 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 620 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
622 def plot_piechart(self, save=None): 623 """Plot piechart which shows the fractional contribution of each 624 ensemble to the error and returns a dictionary containing the fractions. 625 626 Parameters 627 ---------- 628 save : str 629 saves the figure to a file named 'save' if. 630 """ 631 if not hasattr(self, 'e_dvalue'): 632 raise Exception('Run the gamma method first.') 633 if np.isclose(0.0, self._dvalue, atol=1e-15): 634 raise Exception('Error is 0.0') 635 labels = self.e_names 636 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 637 fig1, ax1 = plt.subplots() 638 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 639 ax1.axis('equal') 640 plt.draw() 641 if save: 642 fig1.savefig(save) 643 644 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
646 def dump(self, filename, datatype="json.gz", description="", **kwargs): 647 """Dump the Obs to a file 'name' of chosen format. 648 649 Parameters 650 ---------- 651 filename : str 652 name of the file to be saved. 653 datatype : str 654 Format of the exported file. Supported formats include 655 "json.gz" and "pickle" 656 description : str 657 Description for output file, only relevant for json.gz format. 658 path : str 659 specifies a custom path for the file (default '.') 660 """ 661 if 'path' in kwargs: 662 file_name = kwargs.get('path') + '/' + filename 663 else: 664 file_name = filename 665 666 if datatype == "json.gz": 667 from .input.json import dump_to_json 668 dump_to_json([self], file_name, description=description) 669 elif datatype == "pickle": 670 with open(file_name + '.p', 'wb') as fb: 671 pickle.dump(self, fb) 672 else: 673 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
675 def export_jackknife(self): 676 """Export jackknife samples from the Obs 677 678 Returns 679 ------- 680 numpy.ndarray 681 Returns a numpy array of length N + 1 where N is the number of samples 682 for the given ensemble and replicum. The zeroth entry of the array contains 683 the mean value of the Obs, entries 1 to N contain the N jackknife samples 684 derived from the Obs. The current implementation only works for observables 685 defined on exactly one ensemble and replicum. The derived jackknife samples 686 should agree with samples from a full jackknife analysis up to O(1/N). 687 """ 688 689 if len(self.names) != 1: 690 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 691 692 name = self.names[0] 693 full_data = self.deltas[name] + self.r_values[name] 694 n = full_data.size 695 mean = self.value 696 tmp_jacks = np.zeros(n + 1) 697 tmp_jacks[0] = mean 698 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 699 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
873class CObs: 874 """Class for a complex valued observable.""" 875 __slots__ = ['_real', '_imag', 'tag'] 876 877 def __init__(self, real, imag=0.0): 878 self._real = real 879 self._imag = imag 880 self.tag = None 881 882 @property 883 def real(self): 884 return self._real 885 886 @property 887 def imag(self): 888 return self._imag 889 890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs) 896 897 def is_zero(self): 898 """Checks whether both real and imaginary part are zero within machine precision.""" 899 return self.real == 0.0 and self.imag == 0.0 900 901 def conjugate(self): 902 return CObs(self.real, -self.imag) 903 904 def __add__(self, other): 905 if isinstance(other, np.ndarray): 906 return other + self 907 elif hasattr(other, 'real') and hasattr(other, 'imag'): 908 return CObs(self.real + other.real, 909 self.imag + other.imag) 910 else: 911 return CObs(self.real + other, self.imag) 912 913 def __radd__(self, y): 914 return self + y 915 916 def __sub__(self, other): 917 if isinstance(other, np.ndarray): 918 return -1 * (other - self) 919 elif hasattr(other, 'real') and hasattr(other, 'imag'): 920 return CObs(self.real - other.real, self.imag - other.imag) 921 else: 922 return CObs(self.real - other, self.imag) 923 924 def __rsub__(self, other): 925 return -1 * (self - other) 926 927 def __mul__(self, other): 928 if isinstance(other, np.ndarray): 929 return other * self 930 elif hasattr(other, 'real') and hasattr(other, 'imag'): 931 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 932 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 933 [self.real, other.real, self.imag, other.imag], 934 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 935 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 936 [self.real, other.real, self.imag, other.imag], 937 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 938 elif getattr(other, 'imag', 0) != 0: 939 return CObs(self.real * other.real - self.imag * other.imag, 940 self.imag * other.real + self.real * other.imag) 941 else: 942 return CObs(self.real * other.real, self.imag * other.real) 943 else: 944 return CObs(self.real * other, self.imag * other) 945 946 def __rmul__(self, other): 947 return self * other 948 949 def __truediv__(self, other): 950 if isinstance(other, np.ndarray): 951 return 1 / (other / self) 952 elif hasattr(other, 'real') and hasattr(other, 'imag'): 953 r = other.real ** 2 + other.imag ** 2 954 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 955 else: 956 return CObs(self.real / other, self.imag / other) 957 958 def __rtruediv__(self, other): 959 r = self.real ** 2 + self.imag ** 2 960 if hasattr(other, 'real') and hasattr(other, 'imag'): 961 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 962 else: 963 return CObs(self.real * other / r, -self.imag * other / r) 964 965 def __abs__(self): 966 return np.sqrt(self.real**2 + self.imag**2) 967 968 def __pos__(self): 969 return self 970 971 def __neg__(self): 972 return -1 * self 973 974 def __eq__(self, other): 975 return self.real == other.real and self.imag == other.imag 976 977 def __str__(self): 978 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 979 980 def __repr__(self): 981 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1135def derived_observable(func, data, array_mode=False, **kwargs): 1136 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1137 1138 Parameters 1139 ---------- 1140 func : object 1141 arbitrary function of the form func(data, **kwargs). For the 1142 automatic differentiation to work, all numpy functions have to have 1143 the autograd wrapper (use 'import autograd.numpy as anp'). 1144 data : list 1145 list of Obs, e.g. [obs1, obs2, obs3]. 1146 num_grad : bool 1147 if True, numerical derivatives are used instead of autograd 1148 (default False). To control the numerical differentiation the 1149 kwargs of numdifftools.step_generators.MaxStepGenerator 1150 can be used. 1151 man_grad : list 1152 manually supply a list or an array which contains the jacobian 1153 of func. Use cautiously, supplying the wrong derivative will 1154 not be intercepted. 1155 1156 Notes 1157 ----- 1158 For simple mathematical operations it can be practical to use anonymous 1159 functions. For the ratio of two observables one can e.g. use 1160 1161 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1162 """ 1163 1164 data = np.asarray(data) 1165 raveled_data = data.ravel() 1166 1167 # Workaround for matrix operations containing non Obs data 1168 if not all(isinstance(x, Obs) for x in raveled_data): 1169 for i in range(len(raveled_data)): 1170 if isinstance(raveled_data[i], (int, float)): 1171 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1172 1173 allcov = {} 1174 for o in raveled_data: 1175 for name in o.cov_names: 1176 if name in allcov: 1177 if not np.allclose(allcov[name], o.covobs[name].cov): 1178 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1179 else: 1180 allcov[name] = o.covobs[name].cov 1181 1182 n_obs = len(raveled_data) 1183 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1184 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1185 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1186 1187 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1188 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1189 1190 if data.ndim == 1: 1191 values = np.array([o.value for o in data]) 1192 else: 1193 values = np.vectorize(lambda x: x.value)(data) 1194 1195 new_values = func(values, **kwargs) 1196 1197 multi = int(isinstance(new_values, np.ndarray)) 1198 1199 new_r_values = {} 1200 new_idl_d = {} 1201 for name in new_sample_names: 1202 idl = [] 1203 tmp_values = np.zeros(n_obs) 1204 for i, item in enumerate(raveled_data): 1205 tmp_values[i] = item.r_values.get(name, item.value) 1206 tmp_idl = item.idl.get(name) 1207 if tmp_idl is not None: 1208 idl.append(tmp_idl) 1209 if multi > 0: 1210 tmp_values = np.array(tmp_values).reshape(data.shape) 1211 new_r_values[name] = func(tmp_values, **kwargs) 1212 new_idl_d[name] = _merge_idx(idl) 1213 if not is_merged[name]: 1214 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1215 1216 if 'man_grad' in kwargs: 1217 deriv = np.asarray(kwargs.get('man_grad')) 1218 if new_values.shape + data.shape != deriv.shape: 1219 raise Exception('Manual derivative does not have correct shape.') 1220 elif kwargs.get('num_grad') is True: 1221 if multi > 0: 1222 raise Exception('Multi mode currently not supported for numerical derivative') 1223 options = { 1224 'base_step': 0.1, 1225 'step_ratio': 2.5} 1226 for key in options.keys(): 1227 kwarg = kwargs.get(key) 1228 if kwarg is not None: 1229 options[key] = kwarg 1230 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1231 if tmp_df.size == 1: 1232 deriv = np.array([tmp_df.real]) 1233 else: 1234 deriv = tmp_df.real 1235 else: 1236 deriv = jacobian(func)(values, **kwargs) 1237 1238 final_result = np.zeros(new_values.shape, dtype=object) 1239 1240 if array_mode is True: 1241 1242 class _Zero_grad(): 1243 def __init__(self, N): 1244 self.grad = np.zeros((N, 1)) 1245 1246 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1247 d_extracted = {} 1248 g_extracted = {} 1249 for name in new_sample_names: 1250 d_extracted[name] = [] 1251 ens_length = len(new_idl_d[name]) 1252 for i_dat, dat in enumerate(data): 1253 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1254 for name in new_cov_names: 1255 g_extracted[name] = [] 1256 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1257 for i_dat, dat in enumerate(data): 1258 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1259 1260 for i_val, new_val in np.ndenumerate(new_values): 1261 new_deltas = {} 1262 new_grad = {} 1263 if array_mode is True: 1264 for name in new_sample_names: 1265 ens_length = d_extracted[name][0].shape[-1] 1266 new_deltas[name] = np.zeros(ens_length) 1267 for i_dat, dat in enumerate(d_extracted[name]): 1268 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1269 for name in new_cov_names: 1270 new_grad[name] = 0 1271 for i_dat, dat in enumerate(g_extracted[name]): 1272 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1273 else: 1274 for j_obs, obs in np.ndenumerate(data): 1275 for name in obs.names: 1276 if name in obs.cov_names: 1277 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1278 else: 1279 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1280 1281 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1282 1283 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1284 raise Exception('The same name has been used for deltas and covobs!') 1285 new_samples = [] 1286 new_means = [] 1287 new_idl = [] 1288 new_names_obs = [] 1289 for name in new_names: 1290 if name not in new_covobs: 1291 if is_merged[name]: 1292 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1293 else: 1294 filtered_deltas = new_deltas[name] 1295 filtered_idl_d = new_idl_d[name] 1296 1297 new_samples.append(filtered_deltas) 1298 new_idl.append(filtered_idl_d) 1299 new_means.append(new_r_values[name][i_val]) 1300 new_names_obs.append(name) 1301 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1302 for name in new_covobs: 1303 final_result[i_val].names.append(name) 1304 final_result[i_val]._covobs = new_covobs 1305 final_result[i_val]._value = new_val 1306 final_result[i_val].is_merged = is_merged 1307 final_result[i_val].reweighted = reweighted 1308 1309 if multi == 0: 1310 final_result = final_result.item() 1311 1312 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1349def reweight(weight, obs, **kwargs): 1350 """Reweight a list of observables. 1351 1352 Parameters 1353 ---------- 1354 weight : Obs 1355 Reweighting factor. An Observable that has to be defined on a superset of the 1356 configurations in obs[i].idl for all i. 1357 obs : list 1358 list of Obs, e.g. [obs1, obs2, obs3]. 1359 all_configs : bool 1360 if True, the reweighted observables are normalized by the average of 1361 the reweighting factor on all configurations in weight.idl and not 1362 on the configurations in obs[i].idl. Default False. 1363 """ 1364 result = [] 1365 for i in range(len(obs)): 1366 if len(obs[i].cov_names): 1367 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1368 if not set(obs[i].names).issubset(weight.names): 1369 raise Exception('Error: Ensembles do not fit') 1370 for name in obs[i].names: 1371 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1372 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1373 new_samples = [] 1374 w_deltas = {} 1375 for name in sorted(obs[i].names): 1376 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1377 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1378 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1379 1380 if kwargs.get('all_configs'): 1381 new_weight = weight 1382 else: 1383 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1384 1385 result.append(tmp_obs / new_weight) 1386 result[-1].reweighted = True 1387 result[-1].is_merged = obs[i].is_merged 1388 1389 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1392def correlate(obs_a, obs_b): 1393 """Correlate two observables. 1394 1395 Parameters 1396 ---------- 1397 obs_a : Obs 1398 First observable 1399 obs_b : Obs 1400 Second observable 1401 1402 Notes 1403 ----- 1404 Keep in mind to only correlate primary observables which have not been reweighted 1405 yet. The reweighting has to be applied after correlating the observables. 1406 Currently only works if ensembles are identical (this is not strictly necessary). 1407 """ 1408 1409 if sorted(obs_a.names) != sorted(obs_b.names): 1410 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1411 if len(obs_a.cov_names) or len(obs_b.cov_names): 1412 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1413 for name in obs_a.names: 1414 if obs_a.shape[name] != obs_b.shape[name]: 1415 raise Exception('Shapes of ensemble', name, 'do not fit') 1416 if obs_a.idl[name] != obs_b.idl[name]: 1417 raise Exception('idl of ensemble', name, 'do not fit') 1418 1419 if obs_a.reweighted is True: 1420 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1421 if obs_b.reweighted is True: 1422 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1423 1424 new_samples = [] 1425 new_idl = [] 1426 for name in sorted(obs_a.names): 1427 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1428 new_idl.append(obs_a.idl[name]) 1429 1430 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1431 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1432 o.reweighted = obs_a.reweighted or obs_b.reweighted 1433 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1436def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1437 r'''Calculates the error covariance matrix of a set of observables. 1438 1439 WARNING: This function should be used with care, especially for observables with support on multiple 1440 ensembles with differing autocorrelations. See the notes below for details. 1441 1442 The gamma method has to be applied first to all observables. 1443 1444 Parameters 1445 ---------- 1446 obs : list or numpy.ndarray 1447 List or one dimensional array of Obs 1448 visualize : bool 1449 If True plots the corresponding normalized correlation matrix (default False). 1450 correlation : bool 1451 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1452 smooth : None or int 1453 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1454 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1455 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1456 small ones. 1457 1458 Notes 1459 ----- 1460 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1461 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1462 in the absence of autocorrelation. 1463 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1464 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1465 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1466 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1467 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1468 ''' 1469 1470 length = len(obs) 1471 1472 max_samples = np.max([o.N for o in obs]) 1473 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1474 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1475 1476 cov = np.zeros((length, length)) 1477 for i in range(length): 1478 for j in range(i, length): 1479 cov[i, j] = _covariance_element(obs[i], obs[j]) 1480 cov = cov + cov.T - np.diag(np.diag(cov)) 1481 1482 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1483 1484 if isinstance(smooth, int): 1485 corr = _smooth_eigenvalues(corr, smooth) 1486 1487 if visualize: 1488 plt.matshow(corr, vmin=-1, vmax=1) 1489 plt.set_cmap('RdBu') 1490 plt.colorbar() 1491 plt.draw() 1492 1493 if correlation is True: 1494 return corr 1495 1496 errors = [o.dvalue for o in obs] 1497 cov = np.diag(errors) @ corr @ np.diag(errors) 1498 1499 eigenvalues = np.linalg.eigh(cov)[0] 1500 if not np.all(eigenvalues >= 0): 1501 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1502 1503 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1583def import_jackknife(jacks, name, idl=None): 1584 """Imports jackknife samples and returns an Obs 1585 1586 Parameters 1587 ---------- 1588 jacks : numpy.ndarray 1589 numpy array containing the mean value as zeroth entry and 1590 the N jackknife samples as first to Nth entry. 1591 name : str 1592 name of the ensemble the samples are defined on. 1593 """ 1594 length = len(jacks) - 1 1595 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1596 samples = jacks[1:] @ prj 1597 mean = np.mean(samples) 1598 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1599 new_obs._value = jacks[0] 1600 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1603def merge_obs(list_of_obs): 1604 """Combine all observables in list_of_obs into one new observable 1605 1606 Parameters 1607 ---------- 1608 list_of_obs : list 1609 list of the Obs object to be combined 1610 1611 Notes 1612 ----- 1613 It is not possible to combine obs which are based on the same replicum 1614 """ 1615 replist = [item for obs in list_of_obs for item in obs.names] 1616 if (len(replist) == len(set(replist))) is False: 1617 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1618 if any([len(o.cov_names) for o in list_of_obs]): 1619 raise Exception('Not possible to merge data that contains covobs!') 1620 new_dict = {} 1621 idl_dict = {} 1622 for o in list_of_obs: 1623 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1624 for key in set(o.deltas) | set(o.r_values)}) 1625 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1626 1627 names = sorted(new_dict.keys()) 1628 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1629 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1630 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1631 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1634def cov_Obs(means, cov, name, grad=None): 1635 """Create an Obs based on mean(s) and a covariance matrix 1636 1637 Parameters 1638 ---------- 1639 mean : list of floats or float 1640 N mean value(s) of the new Obs 1641 cov : list or array 1642 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1643 name : str 1644 identifier for the covariance matrix 1645 grad : list or array 1646 Gradient of the Covobs wrt. the means belonging to cov. 1647 """ 1648 1649 def covobs_to_obs(co): 1650 """Make an Obs out of a Covobs 1651 1652 Parameters 1653 ---------- 1654 co : Covobs 1655 Covobs to be embedded into the Obs 1656 """ 1657 o = Obs([], [], means=[]) 1658 o._value = co.value 1659 o.names.append(co.name) 1660 o._covobs[co.name] = co 1661 o._dvalue = np.sqrt(co.errsq()) 1662 return o 1663 1664 ol = [] 1665 if isinstance(means, (float, int)): 1666 means = [means] 1667 1668 for i in range(len(means)): 1669 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1670 if ol[0].covobs[name].N != len(means): 1671 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1672 if len(ol) == 1: 1673 return ol[0] 1674 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.