pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15# Improve print output of numpy.ndarrays containing Obs objects. 16np.set_printoptions(formatter={'object': lambda x: str(x)}) 17 18 19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise Exception('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise Exception('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise Exception('names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise Exception('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.is_merged = {} 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise Exception("Unsorted idx for idl[%s]" % (name)) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise Exception('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None 142 143 @property 144 def value(self): 145 return self._value 146 147 @property 148 def dvalue(self): 149 return self._dvalue 150 151 @property 152 def e_names(self): 153 return sorted(set([o.split('|')[0] for o in self.names])) 154 155 @property 156 def cov_names(self): 157 return sorted(set([o for o in self.covobs.keys()])) 158 159 @property 160 def mc_names(self): 161 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 162 163 @property 164 def e_content(self): 165 res = {} 166 for e, e_name in enumerate(self.e_names): 167 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 168 if e_name in self.names: 169 res[e_name].append(e_name) 170 return res 171 172 @property 173 def covobs(self): 174 return self._covobs 175 176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name])) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 gaps = [] 273 for r_name in e_content[e_name]: 274 if isinstance(self.idl[r_name], range): 275 gaps.append(1) 276 else: 277 gaps.append(np.min(np.diff(self.idl[r_name]))) 278 279 if not np.all([gi == gaps[0] for gi in gaps]): 280 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 281 else: 282 gapsize = gaps[0] 283 284 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 285 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 286 # Make sure no entry of tauint is smaller than 0.5 287 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 288 # hep-lat/0306017 eq. (42) 289 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 290 self.e_n_dtauint[e_name][0] = 0.0 291 292 def _compute_drho(i): 293 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 294 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 295 296 _compute_drho(gapsize) 297 if self.tau_exp[e_name] > 0: 298 texp = self.tau_exp[e_name] 299 # Critical slowing down analysis 300 if w_max // 2 <= 1: 301 raise Exception("Need at least 8 samples for tau_exp error analysis") 302 for n in range(gapsize, w_max // 2, gapsize): 303 _compute_drho(n + gapsize) 304 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 305 # Bias correction hep-lat/0306017 eq. (49) included 306 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 307 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 308 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 309 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 310 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 311 self.e_windowsize[e_name] = n 312 break 313 else: 314 if self.S[e_name] == 0.0: 315 self.e_tauint[e_name] = 0.5 316 self.e_dtauint[e_name] = 0.0 317 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 318 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 319 self.e_windowsize[e_name] = 0 320 else: 321 # Standard automatic windowing procedure 322 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 323 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 324 for n in range(1, w_max): 325 if n < w_max // 2 - 2: 326 _compute_drho(gapsize * n + gapsize) 327 if g_w[n - 1] < 0 or n >= w_max - 1: 328 n *= gapsize 329 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 330 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 331 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 332 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 333 self.e_windowsize[e_name] = n 334 break 335 336 self._dvalue += self.e_dvalue[e_name] ** 2 337 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 338 339 for e_name in self.cov_names: 340 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 341 self.e_ddvalue[e_name] = 0 342 self._dvalue += self.e_dvalue[e_name]**2 343 344 self._dvalue = np.sqrt(self._dvalue) 345 if self._dvalue == 0.0: 346 self.ddvalue = 0.0 347 else: 348 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 349 return 350 351 gm = gamma_method 352 353 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 354 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 355 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 356 357 Parameters 358 ---------- 359 deltas : list 360 List of fluctuations 361 idx : list 362 List or range of configurations on which the deltas are defined. 363 shape : int 364 Number of configurations in idx. 365 w_max : int 366 Upper bound for the summation window. 367 fft : bool 368 determines whether the fft algorithm is used for the computation 369 of the autocorrelation function. 370 """ 371 gamma = np.zeros(w_max) 372 deltas = _expand_deltas(deltas, idx, shape) 373 new_shape = len(deltas) 374 if fft: 375 max_gamma = min(new_shape, w_max) 376 # The padding for the fft has to be even 377 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 378 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 379 else: 380 for n in range(w_max): 381 if new_shape - n >= 0: 382 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 383 384 return gamma 385 386 def details(self, ens_content=True): 387 """Output detailed properties of the Obs. 388 389 Parameters 390 ---------- 391 ens_content : bool 392 print details about the ensembles and replica if true. 393 """ 394 if self.tag is not None: 395 print("Description:", self.tag) 396 if not hasattr(self, 'e_dvalue'): 397 print('Result\t %3.8e' % (self.value)) 398 else: 399 if self.value == 0.0: 400 percentage = np.nan 401 else: 402 percentage = np.abs(self._dvalue / self.value) * 100 403 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 404 if len(self.e_names) > 1: 405 print(' Ensemble errors:') 406 e_content = self.e_content 407 for e_name in self.mc_names: 408 if isinstance(self.idl[e_content[e_name][0]], range): 409 gap = self.idl[e_content[e_name][0]].step 410 else: 411 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 412 413 if len(self.e_names) > 1: 414 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 415 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 416 tau_string += f" in units of {gap} config" 417 if gap > 1: 418 tau_string += "s" 419 if self.tau_exp[e_name] > 0: 420 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 421 else: 422 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 423 print(tau_string) 424 for e_name in self.cov_names: 425 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 426 if ens_content is True: 427 if len(self.e_names) == 1: 428 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 429 else: 430 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 431 my_string_list = [] 432 for key, value in sorted(self.e_content.items()): 433 if key not in self.covobs: 434 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 435 if len(value) == 1: 436 my_string += f': {self.shape[value[0]]} configurations' 437 if isinstance(self.idl[value[0]], range): 438 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 439 else: 440 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 441 else: 442 sublist = [] 443 for v in value: 444 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 445 my_substring += f': {self.shape[v]} configurations' 446 if isinstance(self.idl[v], range): 447 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 448 else: 449 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 450 sublist.append(my_substring) 451 452 my_string += '\n' + '\n'.join(sublist) 453 else: 454 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 455 my_string_list.append(my_string) 456 print('\n'.join(my_string_list)) 457 458 def reweight(self, weight): 459 """Reweight the obs with given rewighting factors. 460 461 Parameters 462 ---------- 463 weight : Obs 464 Reweighting factor. An Observable that has to be defined on a superset of the 465 configurations in obs[i].idl for all i. 466 all_configs : bool 467 if True, the reweighted observables are normalized by the average of 468 the reweighting factor on all configurations in weight.idl and not 469 on the configurations in obs[i].idl. Default False. 470 """ 471 return reweight(weight, [self])[0] 472 473 def is_zero_within_error(self, sigma=1): 474 """Checks whether the observable is zero within 'sigma' standard errors. 475 476 Parameters 477 ---------- 478 sigma : int 479 Number of standard errors used for the check. 480 481 Works only properly when the gamma method was run. 482 """ 483 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 484 485 def is_zero(self, atol=1e-10): 486 """Checks whether the observable is zero within a given tolerance. 487 488 Parameters 489 ---------- 490 atol : float 491 Absolute tolerance (for details see numpy documentation). 492 """ 493 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 494 495 def plot_tauint(self, save=None): 496 """Plot integrated autocorrelation time for each ensemble. 497 498 Parameters 499 ---------- 500 save : str 501 saves the figure to a file named 'save' if. 502 """ 503 if not hasattr(self, 'e_dvalue'): 504 raise Exception('Run the gamma method first.') 505 506 for e, e_name in enumerate(self.mc_names): 507 fig = plt.figure() 508 plt.xlabel(r'$W$') 509 plt.ylabel(r'$\tau_\mathrm{int}$') 510 length = int(len(self.e_n_tauint[e_name])) 511 if self.tau_exp[e_name] > 0: 512 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 513 x_help = np.arange(2 * self.tau_exp[e_name]) 514 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 515 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 516 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 517 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 518 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 519 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 520 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 521 else: 522 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 523 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 524 525 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 526 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 527 plt.legend() 528 plt.xlim(-0.5, xmax) 529 ylim = plt.ylim() 530 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 531 plt.draw() 532 if save: 533 fig.savefig(save + "_" + str(e)) 534 535 def plot_rho(self, save=None): 536 """Plot normalized autocorrelation function time for each ensemble. 537 538 Parameters 539 ---------- 540 save : str 541 saves the figure to a file named 'save' if. 542 """ 543 if not hasattr(self, 'e_dvalue'): 544 raise Exception('Run the gamma method first.') 545 for e, e_name in enumerate(self.mc_names): 546 fig = plt.figure() 547 plt.xlabel('W') 548 plt.ylabel('rho') 549 length = int(len(self.e_drho[e_name])) 550 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 551 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 552 if self.tau_exp[e_name] > 0: 553 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 554 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 555 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 556 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 557 else: 558 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 559 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 560 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 561 plt.xlim(-0.5, xmax) 562 plt.draw() 563 if save: 564 fig.savefig(save + "_" + str(e)) 565 566 def plot_rep_dist(self): 567 """Plot replica distribution for each ensemble with more than one replicum.""" 568 if not hasattr(self, 'e_dvalue'): 569 raise Exception('Run the gamma method first.') 570 for e, e_name in enumerate(self.mc_names): 571 if len(self.e_content[e_name]) == 1: 572 print('No replica distribution for a single replicum (', e_name, ')') 573 continue 574 r_length = [] 575 sub_r_mean = 0 576 for r, r_name in enumerate(self.e_content[e_name]): 577 r_length.append(len(self.deltas[r_name])) 578 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 579 e_N = np.sum(r_length) 580 sub_r_mean /= e_N 581 arr = np.zeros(len(self.e_content[e_name])) 582 for r, r_name in enumerate(self.e_content[e_name]): 583 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 584 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 585 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 586 plt.draw() 587 588 def plot_history(self, expand=True): 589 """Plot derived Monte Carlo history for each ensemble 590 591 Parameters 592 ---------- 593 expand : bool 594 show expanded history for irregular Monte Carlo chains (default: True). 595 """ 596 for e, e_name in enumerate(self.mc_names): 597 plt.figure() 598 r_length = [] 599 tmp = [] 600 tmp_expanded = [] 601 for r, r_name in enumerate(self.e_content[e_name]): 602 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 603 if expand: 604 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 605 r_length.append(len(tmp_expanded[-1])) 606 else: 607 r_length.append(len(tmp[-1])) 608 e_N = np.sum(r_length) 609 x = np.arange(e_N) 610 y_test = np.concatenate(tmp, axis=0) 611 if expand: 612 y = np.concatenate(tmp_expanded, axis=0) 613 else: 614 y = y_test 615 plt.errorbar(x, y, fmt='.', markersize=3) 616 plt.xlim(-0.5, e_N - 0.5) 617 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 618 plt.draw() 619 620 def plot_piechart(self, save=None): 621 """Plot piechart which shows the fractional contribution of each 622 ensemble to the error and returns a dictionary containing the fractions. 623 624 Parameters 625 ---------- 626 save : str 627 saves the figure to a file named 'save' if. 628 """ 629 if not hasattr(self, 'e_dvalue'): 630 raise Exception('Run the gamma method first.') 631 if np.isclose(0.0, self._dvalue, atol=1e-15): 632 raise Exception('Error is 0.0') 633 labels = self.e_names 634 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 635 fig1, ax1 = plt.subplots() 636 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 637 ax1.axis('equal') 638 plt.draw() 639 if save: 640 fig1.savefig(save) 641 642 return dict(zip(self.e_names, sizes)) 643 644 def dump(self, filename, datatype="json.gz", description="", **kwargs): 645 """Dump the Obs to a file 'name' of chosen format. 646 647 Parameters 648 ---------- 649 filename : str 650 name of the file to be saved. 651 datatype : str 652 Format of the exported file. Supported formats include 653 "json.gz" and "pickle" 654 description : str 655 Description for output file, only relevant for json.gz format. 656 path : str 657 specifies a custom path for the file (default '.') 658 """ 659 if 'path' in kwargs: 660 file_name = kwargs.get('path') + '/' + filename 661 else: 662 file_name = filename 663 664 if datatype == "json.gz": 665 from .input.json import dump_to_json 666 dump_to_json([self], file_name, description=description) 667 elif datatype == "pickle": 668 with open(file_name + '.p', 'wb') as fb: 669 pickle.dump(self, fb) 670 else: 671 raise Exception("Unknown datatype " + str(datatype)) 672 673 def export_jackknife(self): 674 """Export jackknife samples from the Obs 675 676 Returns 677 ------- 678 numpy.ndarray 679 Returns a numpy array of length N + 1 where N is the number of samples 680 for the given ensemble and replicum. The zeroth entry of the array contains 681 the mean value of the Obs, entries 1 to N contain the N jackknife samples 682 derived from the Obs. The current implementation only works for observables 683 defined on exactly one ensemble and replicum. The derived jackknife samples 684 should agree with samples from a full jackknife analysis up to O(1/N). 685 """ 686 687 if len(self.names) != 1: 688 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 689 690 name = self.names[0] 691 full_data = self.deltas[name] + self.r_values[name] 692 n = full_data.size 693 mean = self.value 694 tmp_jacks = np.zeros(n + 1) 695 tmp_jacks[0] = mean 696 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 697 return tmp_jacks 698 699 def __float__(self): 700 return float(self.value) 701 702 def __repr__(self): 703 return 'Obs[' + str(self) + ']' 704 705 def __str__(self): 706 return _format_uncertainty(self.value, self._dvalue) 707 708 def __hash__(self): 709 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 710 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 711 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 712 hash_tuple += tuple([o.encode() for o in self.names]) 713 m = hashlib.md5() 714 [m.update(o) for o in hash_tuple] 715 return int(m.hexdigest(), 16) & 0xFFFFFFFF 716 717 # Overload comparisons 718 def __lt__(self, other): 719 return self.value < other 720 721 def __le__(self, other): 722 return self.value <= other 723 724 def __gt__(self, other): 725 return self.value > other 726 727 def __ge__(self, other): 728 return self.value >= other 729 730 def __eq__(self, other): 731 return (self - other).is_zero() 732 733 def __ne__(self, other): 734 return not (self - other).is_zero() 735 736 # Overload math operations 737 def __add__(self, y): 738 if isinstance(y, Obs): 739 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 740 else: 741 if isinstance(y, np.ndarray): 742 return np.array([self + o for o in y]) 743 elif y.__class__.__name__ in ['Corr', 'CObs']: 744 return NotImplemented 745 else: 746 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 747 748 def __radd__(self, y): 749 return self + y 750 751 def __mul__(self, y): 752 if isinstance(y, Obs): 753 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 754 else: 755 if isinstance(y, np.ndarray): 756 return np.array([self * o for o in y]) 757 elif isinstance(y, complex): 758 return CObs(self * y.real, self * y.imag) 759 elif y.__class__.__name__ in ['Corr', 'CObs']: 760 return NotImplemented 761 else: 762 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 763 764 def __rmul__(self, y): 765 return self * y 766 767 def __sub__(self, y): 768 if isinstance(y, Obs): 769 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 770 else: 771 if isinstance(y, np.ndarray): 772 return np.array([self - o for o in y]) 773 elif y.__class__.__name__ in ['Corr', 'CObs']: 774 return NotImplemented 775 else: 776 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 777 778 def __rsub__(self, y): 779 return -1 * (self - y) 780 781 def __pos__(self): 782 return self 783 784 def __neg__(self): 785 return -1 * self 786 787 def __truediv__(self, y): 788 if isinstance(y, Obs): 789 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 790 else: 791 if isinstance(y, np.ndarray): 792 return np.array([self / o for o in y]) 793 elif y.__class__.__name__ in ['Corr', 'CObs']: 794 return NotImplemented 795 else: 796 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 797 798 def __rtruediv__(self, y): 799 if isinstance(y, Obs): 800 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 801 else: 802 if isinstance(y, np.ndarray): 803 return np.array([o / self for o in y]) 804 elif y.__class__.__name__ in ['Corr', 'CObs']: 805 return NotImplemented 806 else: 807 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 808 809 def __pow__(self, y): 810 if isinstance(y, Obs): 811 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 812 else: 813 return derived_observable(lambda x: x[0] ** y, [self]) 814 815 def __rpow__(self, y): 816 if isinstance(y, Obs): 817 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 818 else: 819 return derived_observable(lambda x: y ** x[0], [self]) 820 821 def __abs__(self): 822 return derived_observable(lambda x: anp.abs(x[0]), [self]) 823 824 # Overload numpy functions 825 def sqrt(self): 826 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 827 828 def log(self): 829 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 830 831 def exp(self): 832 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 833 834 def sin(self): 835 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 836 837 def cos(self): 838 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 839 840 def tan(self): 841 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 842 843 def arcsin(self): 844 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 845 846 def arccos(self): 847 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 848 849 def arctan(self): 850 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 851 852 def sinh(self): 853 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 854 855 def cosh(self): 856 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 857 858 def tanh(self): 859 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 860 861 def arcsinh(self): 862 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 863 864 def arccosh(self): 865 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 866 867 def arctanh(self): 868 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 869 870 871class CObs: 872 """Class for a complex valued observable.""" 873 __slots__ = ['_real', '_imag', 'tag'] 874 875 def __init__(self, real, imag=0.0): 876 self._real = real 877 self._imag = imag 878 self.tag = None 879 880 @property 881 def real(self): 882 return self._real 883 884 @property 885 def imag(self): 886 return self._imag 887 888 def gamma_method(self, **kwargs): 889 """Executes the gamma_method for the real and the imaginary part.""" 890 if isinstance(self.real, Obs): 891 self.real.gamma_method(**kwargs) 892 if isinstance(self.imag, Obs): 893 self.imag.gamma_method(**kwargs) 894 895 def is_zero(self): 896 """Checks whether both real and imaginary part are zero within machine precision.""" 897 return self.real == 0.0 and self.imag == 0.0 898 899 def conjugate(self): 900 return CObs(self.real, -self.imag) 901 902 def __add__(self, other): 903 if isinstance(other, np.ndarray): 904 return other + self 905 elif hasattr(other, 'real') and hasattr(other, 'imag'): 906 return CObs(self.real + other.real, 907 self.imag + other.imag) 908 else: 909 return CObs(self.real + other, self.imag) 910 911 def __radd__(self, y): 912 return self + y 913 914 def __sub__(self, other): 915 if isinstance(other, np.ndarray): 916 return -1 * (other - self) 917 elif hasattr(other, 'real') and hasattr(other, 'imag'): 918 return CObs(self.real - other.real, self.imag - other.imag) 919 else: 920 return CObs(self.real - other, self.imag) 921 922 def __rsub__(self, other): 923 return -1 * (self - other) 924 925 def __mul__(self, other): 926 if isinstance(other, np.ndarray): 927 return other * self 928 elif hasattr(other, 'real') and hasattr(other, 'imag'): 929 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 930 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 931 [self.real, other.real, self.imag, other.imag], 932 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 933 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 934 [self.real, other.real, self.imag, other.imag], 935 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 936 elif getattr(other, 'imag', 0) != 0: 937 return CObs(self.real * other.real - self.imag * other.imag, 938 self.imag * other.real + self.real * other.imag) 939 else: 940 return CObs(self.real * other.real, self.imag * other.real) 941 else: 942 return CObs(self.real * other, self.imag * other) 943 944 def __rmul__(self, other): 945 return self * other 946 947 def __truediv__(self, other): 948 if isinstance(other, np.ndarray): 949 return 1 / (other / self) 950 elif hasattr(other, 'real') and hasattr(other, 'imag'): 951 r = other.real ** 2 + other.imag ** 2 952 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 953 else: 954 return CObs(self.real / other, self.imag / other) 955 956 def __rtruediv__(self, other): 957 r = self.real ** 2 + self.imag ** 2 958 if hasattr(other, 'real') and hasattr(other, 'imag'): 959 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 960 else: 961 return CObs(self.real * other / r, -self.imag * other / r) 962 963 def __abs__(self): 964 return np.sqrt(self.real**2 + self.imag**2) 965 966 def __pos__(self): 967 return self 968 969 def __neg__(self): 970 return -1 * self 971 972 def __eq__(self, other): 973 return self.real == other.real and self.imag == other.imag 974 975 def __str__(self): 976 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 977 978 def __repr__(self): 979 return 'CObs[' + str(self) + ']' 980 981 982def _format_uncertainty(value, dvalue): 983 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 984 if dvalue == 0.0: 985 return str(value) 986 fexp = np.floor(np.log10(dvalue)) 987 if fexp < 0.0: 988 return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 989 elif fexp == 0.0: 990 return '{:.1f}({:1.1f})'.format(value, dvalue) 991 else: 992 return '{:.0f}({:2.0f})'.format(value, dvalue) 993 994 995def _expand_deltas(deltas, idx, shape): 996 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 997 If idx is of type range, the deltas are not changed 998 999 Parameters 1000 ---------- 1001 deltas : list 1002 List of fluctuations 1003 idx : list 1004 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1005 shape : int 1006 Number of configs in idx. 1007 """ 1008 if isinstance(idx, range): 1009 return deltas 1010 else: 1011 ret = np.zeros(idx[-1] - idx[0] + 1) 1012 for i in range(shape): 1013 ret[idx[i] - idx[0]] = deltas[i] 1014 return ret 1015 1016 1017def _merge_idx(idl): 1018 """Returns the union of all lists in idl as sorted list 1019 1020 Parameters 1021 ---------- 1022 idl : list 1023 List of lists or ranges. 1024 """ 1025 1026 # Use groupby to efficiently check whether all elements of idl are identical 1027 try: 1028 g = groupby(idl) 1029 if next(g, True) and not next(g, False): 1030 return idl[0] 1031 except Exception: 1032 pass 1033 1034 if np.all([type(idx) is range for idx in idl]): 1035 if len(set([idx[0] for idx in idl])) == 1: 1036 idstart = min([idx.start for idx in idl]) 1037 idstop = max([idx.stop for idx in idl]) 1038 idstep = min([idx.step for idx in idl]) 1039 return range(idstart, idstop, idstep) 1040 1041 return sorted(set().union(*idl)) 1042 1043 1044def _intersection_idx(idl): 1045 """Returns the intersection of all lists in idl as sorted list 1046 1047 Parameters 1048 ---------- 1049 idl : list 1050 List of lists or ranges. 1051 """ 1052 1053 def _lcm(*args): 1054 """Returns the lowest common multiple of args. 1055 1056 From python 3.9 onwards the math library contains an lcm function.""" 1057 return reduce(lambda a, b: a * b // gcd(a, b), args) 1058 1059 # Use groupby to efficiently check whether all elements of idl are identical 1060 try: 1061 g = groupby(idl) 1062 if next(g, True) and not next(g, False): 1063 return idl[0] 1064 except Exception: 1065 pass 1066 1067 if np.all([type(idx) is range for idx in idl]): 1068 if len(set([idx[0] for idx in idl])) == 1: 1069 idstart = max([idx.start for idx in idl]) 1070 idstop = min([idx.stop for idx in idl]) 1071 idstep = _lcm(*[idx.step for idx in idl]) 1072 return range(idstart, idstop, idstep) 1073 1074 return sorted(set.intersection(*[set(o) for o in idl])) 1075 1076 1077def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1078 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1079 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1080 common divisor of the step sizes is used as new step size. 1081 1082 Parameters 1083 ---------- 1084 deltas : list 1085 List of fluctuations 1086 idx : list 1087 List or range of configs on which the deltas are defined. 1088 Has to be a subset of new_idx and has to be sorted in ascending order. 1089 shape : list 1090 Number of configs in idx. 1091 new_idx : list 1092 List of configs that defines the new range, has to be sorted in ascending order. 1093 """ 1094 1095 if type(idx) is range and type(new_idx) is range: 1096 if idx == new_idx: 1097 return deltas 1098 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1099 for i in range(shape): 1100 ret[idx[i] - new_idx[0]] = deltas[i] 1101 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1102 1103 1104def derived_observable(func, data, array_mode=False, **kwargs): 1105 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1106 1107 Parameters 1108 ---------- 1109 func : object 1110 arbitrary function of the form func(data, **kwargs). For the 1111 automatic differentiation to work, all numpy functions have to have 1112 the autograd wrapper (use 'import autograd.numpy as anp'). 1113 data : list 1114 list of Obs, e.g. [obs1, obs2, obs3]. 1115 num_grad : bool 1116 if True, numerical derivatives are used instead of autograd 1117 (default False). To control the numerical differentiation the 1118 kwargs of numdifftools.step_generators.MaxStepGenerator 1119 can be used. 1120 man_grad : list 1121 manually supply a list or an array which contains the jacobian 1122 of func. Use cautiously, supplying the wrong derivative will 1123 not be intercepted. 1124 1125 Notes 1126 ----- 1127 For simple mathematical operations it can be practical to use anonymous 1128 functions. For the ratio of two observables one can e.g. use 1129 1130 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1131 """ 1132 1133 data = np.asarray(data) 1134 raveled_data = data.ravel() 1135 1136 # Workaround for matrix operations containing non Obs data 1137 if not all(isinstance(x, Obs) for x in raveled_data): 1138 for i in range(len(raveled_data)): 1139 if isinstance(raveled_data[i], (int, float)): 1140 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1141 1142 allcov = {} 1143 for o in raveled_data: 1144 for name in o.cov_names: 1145 if name in allcov: 1146 if not np.allclose(allcov[name], o.covobs[name].cov): 1147 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1148 else: 1149 allcov[name] = o.covobs[name].cov 1150 1151 n_obs = len(raveled_data) 1152 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1153 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1154 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1155 1156 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1157 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1158 1159 if data.ndim == 1: 1160 values = np.array([o.value for o in data]) 1161 else: 1162 values = np.vectorize(lambda x: x.value)(data) 1163 1164 new_values = func(values, **kwargs) 1165 1166 multi = int(isinstance(new_values, np.ndarray)) 1167 1168 new_r_values = {} 1169 new_idl_d = {} 1170 for name in new_sample_names: 1171 idl = [] 1172 tmp_values = np.zeros(n_obs) 1173 for i, item in enumerate(raveled_data): 1174 tmp_values[i] = item.r_values.get(name, item.value) 1175 tmp_idl = item.idl.get(name) 1176 if tmp_idl is not None: 1177 idl.append(tmp_idl) 1178 if multi > 0: 1179 tmp_values = np.array(tmp_values).reshape(data.shape) 1180 new_r_values[name] = func(tmp_values, **kwargs) 1181 new_idl_d[name] = _merge_idx(idl) 1182 if not is_merged[name]: 1183 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1184 1185 if 'man_grad' in kwargs: 1186 deriv = np.asarray(kwargs.get('man_grad')) 1187 if new_values.shape + data.shape != deriv.shape: 1188 raise Exception('Manual derivative does not have correct shape.') 1189 elif kwargs.get('num_grad') is True: 1190 if multi > 0: 1191 raise Exception('Multi mode currently not supported for numerical derivative') 1192 options = { 1193 'base_step': 0.1, 1194 'step_ratio': 2.5} 1195 for key in options.keys(): 1196 kwarg = kwargs.get(key) 1197 if kwarg is not None: 1198 options[key] = kwarg 1199 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1200 if tmp_df.size == 1: 1201 deriv = np.array([tmp_df.real]) 1202 else: 1203 deriv = tmp_df.real 1204 else: 1205 deriv = jacobian(func)(values, **kwargs) 1206 1207 final_result = np.zeros(new_values.shape, dtype=object) 1208 1209 if array_mode is True: 1210 1211 class _Zero_grad(): 1212 def __init__(self, N): 1213 self.grad = np.zeros((N, 1)) 1214 1215 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1216 d_extracted = {} 1217 g_extracted = {} 1218 for name in new_sample_names: 1219 d_extracted[name] = [] 1220 ens_length = len(new_idl_d[name]) 1221 for i_dat, dat in enumerate(data): 1222 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1223 for name in new_cov_names: 1224 g_extracted[name] = [] 1225 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1226 for i_dat, dat in enumerate(data): 1227 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1228 1229 for i_val, new_val in np.ndenumerate(new_values): 1230 new_deltas = {} 1231 new_grad = {} 1232 if array_mode is True: 1233 for name in new_sample_names: 1234 ens_length = d_extracted[name][0].shape[-1] 1235 new_deltas[name] = np.zeros(ens_length) 1236 for i_dat, dat in enumerate(d_extracted[name]): 1237 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1238 for name in new_cov_names: 1239 new_grad[name] = 0 1240 for i_dat, dat in enumerate(g_extracted[name]): 1241 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1242 else: 1243 for j_obs, obs in np.ndenumerate(data): 1244 for name in obs.names: 1245 if name in obs.cov_names: 1246 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1247 else: 1248 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1249 1250 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1251 1252 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1253 raise Exception('The same name has been used for deltas and covobs!') 1254 new_samples = [] 1255 new_means = [] 1256 new_idl = [] 1257 new_names_obs = [] 1258 for name in new_names: 1259 if name not in new_covobs: 1260 new_samples.append(new_deltas[name]) 1261 new_idl.append(new_idl_d[name]) 1262 new_means.append(new_r_values[name][i_val]) 1263 new_names_obs.append(name) 1264 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1265 for name in new_covobs: 1266 final_result[i_val].names.append(name) 1267 final_result[i_val]._covobs = new_covobs 1268 final_result[i_val]._value = new_val 1269 final_result[i_val].is_merged = is_merged 1270 final_result[i_val].reweighted = reweighted 1271 1272 if multi == 0: 1273 final_result = final_result.item() 1274 1275 return final_result 1276 1277 1278def _reduce_deltas(deltas, idx_old, idx_new): 1279 """Extract deltas defined on idx_old on all configs of idx_new. 1280 1281 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1282 are ordered in an ascending order. 1283 1284 Parameters 1285 ---------- 1286 deltas : list 1287 List of fluctuations 1288 idx_old : list 1289 List or range of configs on which the deltas are defined 1290 idx_new : list 1291 List of configs for which we want to extract the deltas. 1292 Has to be a subset of idx_old. 1293 """ 1294 if not len(deltas) == len(idx_old): 1295 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1296 if type(idx_old) is range and type(idx_new) is range: 1297 if idx_old == idx_new: 1298 return deltas 1299 # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical 1300 try: 1301 g = groupby([idx_old, idx_new]) 1302 if next(g, True) and not next(g, False): 1303 return deltas 1304 except Exception: 1305 pass 1306 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1307 if len(indices) < len(idx_new): 1308 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1309 return np.array(deltas)[indices] 1310 1311 1312def reweight(weight, obs, **kwargs): 1313 """Reweight a list of observables. 1314 1315 Parameters 1316 ---------- 1317 weight : Obs 1318 Reweighting factor. An Observable that has to be defined on a superset of the 1319 configurations in obs[i].idl for all i. 1320 obs : list 1321 list of Obs, e.g. [obs1, obs2, obs3]. 1322 all_configs : bool 1323 if True, the reweighted observables are normalized by the average of 1324 the reweighting factor on all configurations in weight.idl and not 1325 on the configurations in obs[i].idl. Default False. 1326 """ 1327 result = [] 1328 for i in range(len(obs)): 1329 if len(obs[i].cov_names): 1330 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1331 if not set(obs[i].names).issubset(weight.names): 1332 raise Exception('Error: Ensembles do not fit') 1333 for name in obs[i].names: 1334 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1335 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1336 new_samples = [] 1337 w_deltas = {} 1338 for name in sorted(obs[i].names): 1339 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1340 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1341 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1342 1343 if kwargs.get('all_configs'): 1344 new_weight = weight 1345 else: 1346 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1347 1348 result.append(tmp_obs / new_weight) 1349 result[-1].reweighted = True 1350 result[-1].is_merged = obs[i].is_merged 1351 1352 return result 1353 1354 1355def correlate(obs_a, obs_b): 1356 """Correlate two observables. 1357 1358 Parameters 1359 ---------- 1360 obs_a : Obs 1361 First observable 1362 obs_b : Obs 1363 Second observable 1364 1365 Notes 1366 ----- 1367 Keep in mind to only correlate primary observables which have not been reweighted 1368 yet. The reweighting has to be applied after correlating the observables. 1369 Currently only works if ensembles are identical (this is not strictly necessary). 1370 """ 1371 1372 if sorted(obs_a.names) != sorted(obs_b.names): 1373 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1374 if len(obs_a.cov_names) or len(obs_b.cov_names): 1375 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1376 for name in obs_a.names: 1377 if obs_a.shape[name] != obs_b.shape[name]: 1378 raise Exception('Shapes of ensemble', name, 'do not fit') 1379 if obs_a.idl[name] != obs_b.idl[name]: 1380 raise Exception('idl of ensemble', name, 'do not fit') 1381 1382 if obs_a.reweighted is True: 1383 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1384 if obs_b.reweighted is True: 1385 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1386 1387 new_samples = [] 1388 new_idl = [] 1389 for name in sorted(obs_a.names): 1390 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1391 new_idl.append(obs_a.idl[name]) 1392 1393 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1394 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1395 o.reweighted = obs_a.reweighted or obs_b.reweighted 1396 return o 1397 1398 1399def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1400 r'''Calculates the error covariance matrix of a set of observables. 1401 1402 WARNING: This function should be used with care, especially for observables with support on multiple 1403 ensembles with differing autocorrelations. See the notes below for details. 1404 1405 The gamma method has to be applied first to all observables. 1406 1407 Parameters 1408 ---------- 1409 obs : list or numpy.ndarray 1410 List or one dimensional array of Obs 1411 visualize : bool 1412 If True plots the corresponding normalized correlation matrix (default False). 1413 correlation : bool 1414 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1415 smooth : None or int 1416 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1417 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1418 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1419 small ones. 1420 1421 Notes 1422 ----- 1423 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1424 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1425 in the absence of autocorrelation. 1426 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1427 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1428 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1429 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1430 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1431 ''' 1432 1433 length = len(obs) 1434 1435 max_samples = np.max([o.N for o in obs]) 1436 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1437 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1438 1439 cov = np.zeros((length, length)) 1440 for i in range(length): 1441 for j in range(i, length): 1442 cov[i, j] = _covariance_element(obs[i], obs[j]) 1443 cov = cov + cov.T - np.diag(np.diag(cov)) 1444 1445 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1446 1447 if isinstance(smooth, int): 1448 corr = _smooth_eigenvalues(corr, smooth) 1449 1450 if visualize: 1451 plt.matshow(corr, vmin=-1, vmax=1) 1452 plt.set_cmap('RdBu') 1453 plt.colorbar() 1454 plt.draw() 1455 1456 if correlation is True: 1457 return corr 1458 1459 errors = [o.dvalue for o in obs] 1460 cov = np.diag(errors) @ corr @ np.diag(errors) 1461 1462 eigenvalues = np.linalg.eigh(cov)[0] 1463 if not np.all(eigenvalues >= 0): 1464 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1465 1466 return cov 1467 1468 1469def _smooth_eigenvalues(corr, E): 1470 """Eigenvalue smoothing as described in hep-lat/9412087 1471 1472 corr : np.ndarray 1473 correlation matrix 1474 E : integer 1475 Number of eigenvalues to be left substantially unchanged 1476 """ 1477 if not (2 < E < corr.shape[0] - 1): 1478 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1479 vals, vec = np.linalg.eigh(corr) 1480 lambda_min = np.mean(vals[:-E]) 1481 vals[vals < lambda_min] = lambda_min 1482 vals /= np.mean(vals) 1483 return vec @ np.diag(vals) @ vec.T 1484 1485 1486def _covariance_element(obs1, obs2): 1487 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1488 1489 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1490 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1491 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1492 return np.sum(deltas1 * deltas2) 1493 1494 if set(obs1.names).isdisjoint(set(obs2.names)): 1495 return 0.0 1496 1497 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1498 raise Exception('The gamma method has to be applied to both Obs first.') 1499 1500 dvalue = 0.0 1501 1502 for e_name in obs1.mc_names: 1503 1504 if e_name not in obs2.mc_names: 1505 continue 1506 1507 idl_d = {} 1508 for r_name in obs1.e_content[e_name]: 1509 if r_name not in obs2.e_content[e_name]: 1510 continue 1511 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1512 1513 gamma = 0.0 1514 1515 for r_name in obs1.e_content[e_name]: 1516 if r_name not in obs2.e_content[e_name]: 1517 continue 1518 if len(idl_d[r_name]) == 0: 1519 continue 1520 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1521 1522 if gamma == 0.0: 1523 continue 1524 1525 gamma_div = 0.0 1526 for r_name in obs1.e_content[e_name]: 1527 if r_name not in obs2.e_content[e_name]: 1528 continue 1529 if len(idl_d[r_name]) == 0: 1530 continue 1531 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1532 gamma /= gamma_div 1533 1534 dvalue += gamma 1535 1536 for e_name in obs1.cov_names: 1537 1538 if e_name not in obs2.cov_names: 1539 continue 1540 1541 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1542 1543 return dvalue 1544 1545 1546def import_jackknife(jacks, name, idl=None): 1547 """Imports jackknife samples and returns an Obs 1548 1549 Parameters 1550 ---------- 1551 jacks : numpy.ndarray 1552 numpy array containing the mean value as zeroth entry and 1553 the N jackknife samples as first to Nth entry. 1554 name : str 1555 name of the ensemble the samples are defined on. 1556 """ 1557 length = len(jacks) - 1 1558 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1559 samples = jacks[1:] @ prj 1560 mean = np.mean(samples) 1561 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1562 new_obs._value = jacks[0] 1563 return new_obs 1564 1565 1566def merge_obs(list_of_obs): 1567 """Combine all observables in list_of_obs into one new observable 1568 1569 Parameters 1570 ---------- 1571 list_of_obs : list 1572 list of the Obs object to be combined 1573 1574 Notes 1575 ----- 1576 It is not possible to combine obs which are based on the same replicum 1577 """ 1578 replist = [item for obs in list_of_obs for item in obs.names] 1579 if (len(replist) == len(set(replist))) is False: 1580 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1581 if any([len(o.cov_names) for o in list_of_obs]): 1582 raise Exception('Not possible to merge data that contains covobs!') 1583 new_dict = {} 1584 idl_dict = {} 1585 for o in list_of_obs: 1586 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1587 for key in set(o.deltas) | set(o.r_values)}) 1588 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1589 1590 names = sorted(new_dict.keys()) 1591 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1592 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1593 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1594 return o 1595 1596 1597def cov_Obs(means, cov, name, grad=None): 1598 """Create an Obs based on mean(s) and a covariance matrix 1599 1600 Parameters 1601 ---------- 1602 mean : list of floats or float 1603 N mean value(s) of the new Obs 1604 cov : list or array 1605 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1606 name : str 1607 identifier for the covariance matrix 1608 grad : list or array 1609 Gradient of the Covobs wrt. the means belonging to cov. 1610 """ 1611 1612 def covobs_to_obs(co): 1613 """Make an Obs out of a Covobs 1614 1615 Parameters 1616 ---------- 1617 co : Covobs 1618 Covobs to be embedded into the Obs 1619 """ 1620 o = Obs([], [], means=[]) 1621 o._value = co.value 1622 o.names.append(co.name) 1623 o._covobs[co.name] = co 1624 o._dvalue = np.sqrt(co.errsq()) 1625 return o 1626 1627 ol = [] 1628 if isinstance(means, (float, int)): 1629 means = [means] 1630 1631 for i in range(len(means)): 1632 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1633 if ol[0].covobs[name].N != len(means): 1634 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1635 if len(ol) == 1: 1636 return ol[0] 1637 return ol
20class Obs: 21 """Class for a general observable. 22 23 Instances of Obs are the basic objects of a pyerrors error analysis. 24 They are initialized with a list which contains arrays of samples for 25 different ensembles/replica and another list of same length which contains 26 the names of the ensembles/replica. Mathematical operations can be 27 performed on instances. The result is another instance of Obs. The error of 28 an instance can be computed with the gamma_method. Also contains additional 29 methods for output and visualization of the error calculation. 30 31 Attributes 32 ---------- 33 S_global : float 34 Standard value for S (default 2.0) 35 S_dict : dict 36 Dictionary for S values. If an entry for a given ensemble 37 exists this overwrites the standard value for that ensemble. 38 tau_exp_global : float 39 Standard value for tau_exp (default 0.0) 40 tau_exp_dict : dict 41 Dictionary for tau_exp values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 N_sigma_global : float 44 Standard value for N_sigma (default 1.0) 45 N_sigma_dict : dict 46 Dictionary for N_sigma values. If an entry for a given ensemble exists 47 this overwrites the standard value for that ensemble. 48 """ 49 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 50 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 51 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 52 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 53 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 54 55 S_global = 2.0 56 S_dict = {} 57 tau_exp_global = 0.0 58 tau_exp_dict = {} 59 N_sigma_global = 1.0 60 N_sigma_dict = {} 61 62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise Exception('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise Exception('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise Exception('names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise Exception('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.is_merged = {} 102 self.idl = {} 103 if idl is not None: 104 for name, idx in sorted(zip(names, idl)): 105 if isinstance(idx, range): 106 self.idl[name] = idx 107 elif isinstance(idx, (list, np.ndarray)): 108 dc = np.unique(np.diff(idx)) 109 if np.any(dc < 0): 110 raise Exception("Unsorted idx for idl[%s]" % (name)) 111 if len(dc) == 1: 112 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 113 else: 114 self.idl[name] = list(idx) 115 else: 116 raise Exception('incompatible type for idl[%s].' % (name)) 117 else: 118 for name, sample in sorted(zip(names, samples)): 119 self.idl[name] = range(1, len(sample) + 1) 120 121 if kwargs.get("means") is not None: 122 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 self.r_values[name] = mean 126 self.deltas[name] = sample 127 else: 128 for name, sample in sorted(zip(names, samples)): 129 self.shape[name] = len(self.idl[name]) 130 self.N += self.shape[name] 131 if len(sample) != self.shape[name]: 132 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 133 self.r_values[name] = np.mean(sample) 134 self.deltas[name] = sample - self.r_values[name] 135 self._value += self.shape[name] * self.r_values[name] 136 self._value /= self.N 137 138 self._dvalue = 0.0 139 self.ddvalue = 0.0 140 self.reweighted = False 141 142 self.tag = None 143 144 @property 145 def value(self): 146 return self._value 147 148 @property 149 def dvalue(self): 150 return self._dvalue 151 152 @property 153 def e_names(self): 154 return sorted(set([o.split('|')[0] for o in self.names])) 155 156 @property 157 def cov_names(self): 158 return sorted(set([o for o in self.covobs.keys()])) 159 160 @property 161 def mc_names(self): 162 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 163 164 @property 165 def e_content(self): 166 res = {} 167 for e, e_name in enumerate(self.e_names): 168 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 169 if e_name in self.names: 170 res[e_name].append(e_name) 171 return res 172 173 @property 174 def covobs(self): 175 return self._covobs 176 177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name])) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 gaps = [] 274 for r_name in e_content[e_name]: 275 if isinstance(self.idl[r_name], range): 276 gaps.append(1) 277 else: 278 gaps.append(np.min(np.diff(self.idl[r_name]))) 279 280 if not np.all([gi == gaps[0] for gi in gaps]): 281 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 282 else: 283 gapsize = gaps[0] 284 285 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 286 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 287 # Make sure no entry of tauint is smaller than 0.5 288 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 289 # hep-lat/0306017 eq. (42) 290 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 291 self.e_n_dtauint[e_name][0] = 0.0 292 293 def _compute_drho(i): 294 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 295 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 296 297 _compute_drho(gapsize) 298 if self.tau_exp[e_name] > 0: 299 texp = self.tau_exp[e_name] 300 # Critical slowing down analysis 301 if w_max // 2 <= 1: 302 raise Exception("Need at least 8 samples for tau_exp error analysis") 303 for n in range(gapsize, w_max // 2, gapsize): 304 _compute_drho(n + gapsize) 305 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 306 # Bias correction hep-lat/0306017 eq. (49) included 307 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 308 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 309 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 310 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 312 self.e_windowsize[e_name] = n 313 break 314 else: 315 if self.S[e_name] == 0.0: 316 self.e_tauint[e_name] = 0.5 317 self.e_dtauint[e_name] = 0.0 318 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 319 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 320 self.e_windowsize[e_name] = 0 321 else: 322 # Standard automatic windowing procedure 323 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 324 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 325 for n in range(1, w_max): 326 if n < w_max // 2 - 2: 327 _compute_drho(gapsize * n + gapsize) 328 if g_w[n - 1] < 0 or n >= w_max - 1: 329 n *= gapsize 330 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 331 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 332 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 333 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 334 self.e_windowsize[e_name] = n 335 break 336 337 self._dvalue += self.e_dvalue[e_name] ** 2 338 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 339 340 for e_name in self.cov_names: 341 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 342 self.e_ddvalue[e_name] = 0 343 self._dvalue += self.e_dvalue[e_name]**2 344 345 self._dvalue = np.sqrt(self._dvalue) 346 if self._dvalue == 0.0: 347 self.ddvalue = 0.0 348 else: 349 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 350 return 351 352 gm = gamma_method 353 354 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 355 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 356 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 357 358 Parameters 359 ---------- 360 deltas : list 361 List of fluctuations 362 idx : list 363 List or range of configurations on which the deltas are defined. 364 shape : int 365 Number of configurations in idx. 366 w_max : int 367 Upper bound for the summation window. 368 fft : bool 369 determines whether the fft algorithm is used for the computation 370 of the autocorrelation function. 371 """ 372 gamma = np.zeros(w_max) 373 deltas = _expand_deltas(deltas, idx, shape) 374 new_shape = len(deltas) 375 if fft: 376 max_gamma = min(new_shape, w_max) 377 # The padding for the fft has to be even 378 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 379 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 380 else: 381 for n in range(w_max): 382 if new_shape - n >= 0: 383 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 384 385 return gamma 386 387 def details(self, ens_content=True): 388 """Output detailed properties of the Obs. 389 390 Parameters 391 ---------- 392 ens_content : bool 393 print details about the ensembles and replica if true. 394 """ 395 if self.tag is not None: 396 print("Description:", self.tag) 397 if not hasattr(self, 'e_dvalue'): 398 print('Result\t %3.8e' % (self.value)) 399 else: 400 if self.value == 0.0: 401 percentage = np.nan 402 else: 403 percentage = np.abs(self._dvalue / self.value) * 100 404 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 405 if len(self.e_names) > 1: 406 print(' Ensemble errors:') 407 e_content = self.e_content 408 for e_name in self.mc_names: 409 if isinstance(self.idl[e_content[e_name][0]], range): 410 gap = self.idl[e_content[e_name][0]].step 411 else: 412 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 413 414 if len(self.e_names) > 1: 415 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 416 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 417 tau_string += f" in units of {gap} config" 418 if gap > 1: 419 tau_string += "s" 420 if self.tau_exp[e_name] > 0: 421 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 422 else: 423 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 424 print(tau_string) 425 for e_name in self.cov_names: 426 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 427 if ens_content is True: 428 if len(self.e_names) == 1: 429 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 430 else: 431 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 432 my_string_list = [] 433 for key, value in sorted(self.e_content.items()): 434 if key not in self.covobs: 435 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 436 if len(value) == 1: 437 my_string += f': {self.shape[value[0]]} configurations' 438 if isinstance(self.idl[value[0]], range): 439 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 440 else: 441 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 442 else: 443 sublist = [] 444 for v in value: 445 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 446 my_substring += f': {self.shape[v]} configurations' 447 if isinstance(self.idl[v], range): 448 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 449 else: 450 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 451 sublist.append(my_substring) 452 453 my_string += '\n' + '\n'.join(sublist) 454 else: 455 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 456 my_string_list.append(my_string) 457 print('\n'.join(my_string_list)) 458 459 def reweight(self, weight): 460 """Reweight the obs with given rewighting factors. 461 462 Parameters 463 ---------- 464 weight : Obs 465 Reweighting factor. An Observable that has to be defined on a superset of the 466 configurations in obs[i].idl for all i. 467 all_configs : bool 468 if True, the reweighted observables are normalized by the average of 469 the reweighting factor on all configurations in weight.idl and not 470 on the configurations in obs[i].idl. Default False. 471 """ 472 return reweight(weight, [self])[0] 473 474 def is_zero_within_error(self, sigma=1): 475 """Checks whether the observable is zero within 'sigma' standard errors. 476 477 Parameters 478 ---------- 479 sigma : int 480 Number of standard errors used for the check. 481 482 Works only properly when the gamma method was run. 483 """ 484 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 485 486 def is_zero(self, atol=1e-10): 487 """Checks whether the observable is zero within a given tolerance. 488 489 Parameters 490 ---------- 491 atol : float 492 Absolute tolerance (for details see numpy documentation). 493 """ 494 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 495 496 def plot_tauint(self, save=None): 497 """Plot integrated autocorrelation time for each ensemble. 498 499 Parameters 500 ---------- 501 save : str 502 saves the figure to a file named 'save' if. 503 """ 504 if not hasattr(self, 'e_dvalue'): 505 raise Exception('Run the gamma method first.') 506 507 for e, e_name in enumerate(self.mc_names): 508 fig = plt.figure() 509 plt.xlabel(r'$W$') 510 plt.ylabel(r'$\tau_\mathrm{int}$') 511 length = int(len(self.e_n_tauint[e_name])) 512 if self.tau_exp[e_name] > 0: 513 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 514 x_help = np.arange(2 * self.tau_exp[e_name]) 515 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 516 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 517 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 518 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 519 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 520 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 521 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 522 else: 523 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 524 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 525 526 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 527 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 528 plt.legend() 529 plt.xlim(-0.5, xmax) 530 ylim = plt.ylim() 531 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 532 plt.draw() 533 if save: 534 fig.savefig(save + "_" + str(e)) 535 536 def plot_rho(self, save=None): 537 """Plot normalized autocorrelation function time for each ensemble. 538 539 Parameters 540 ---------- 541 save : str 542 saves the figure to a file named 'save' if. 543 """ 544 if not hasattr(self, 'e_dvalue'): 545 raise Exception('Run the gamma method first.') 546 for e, e_name in enumerate(self.mc_names): 547 fig = plt.figure() 548 plt.xlabel('W') 549 plt.ylabel('rho') 550 length = int(len(self.e_drho[e_name])) 551 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 552 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 553 if self.tau_exp[e_name] > 0: 554 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 555 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 556 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 557 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 558 else: 559 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 560 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 561 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 562 plt.xlim(-0.5, xmax) 563 plt.draw() 564 if save: 565 fig.savefig(save + "_" + str(e)) 566 567 def plot_rep_dist(self): 568 """Plot replica distribution for each ensemble with more than one replicum.""" 569 if not hasattr(self, 'e_dvalue'): 570 raise Exception('Run the gamma method first.') 571 for e, e_name in enumerate(self.mc_names): 572 if len(self.e_content[e_name]) == 1: 573 print('No replica distribution for a single replicum (', e_name, ')') 574 continue 575 r_length = [] 576 sub_r_mean = 0 577 for r, r_name in enumerate(self.e_content[e_name]): 578 r_length.append(len(self.deltas[r_name])) 579 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 580 e_N = np.sum(r_length) 581 sub_r_mean /= e_N 582 arr = np.zeros(len(self.e_content[e_name])) 583 for r, r_name in enumerate(self.e_content[e_name]): 584 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 585 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 586 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 587 plt.draw() 588 589 def plot_history(self, expand=True): 590 """Plot derived Monte Carlo history for each ensemble 591 592 Parameters 593 ---------- 594 expand : bool 595 show expanded history for irregular Monte Carlo chains (default: True). 596 """ 597 for e, e_name in enumerate(self.mc_names): 598 plt.figure() 599 r_length = [] 600 tmp = [] 601 tmp_expanded = [] 602 for r, r_name in enumerate(self.e_content[e_name]): 603 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 604 if expand: 605 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 606 r_length.append(len(tmp_expanded[-1])) 607 else: 608 r_length.append(len(tmp[-1])) 609 e_N = np.sum(r_length) 610 x = np.arange(e_N) 611 y_test = np.concatenate(tmp, axis=0) 612 if expand: 613 y = np.concatenate(tmp_expanded, axis=0) 614 else: 615 y = y_test 616 plt.errorbar(x, y, fmt='.', markersize=3) 617 plt.xlim(-0.5, e_N - 0.5) 618 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 619 plt.draw() 620 621 def plot_piechart(self, save=None): 622 """Plot piechart which shows the fractional contribution of each 623 ensemble to the error and returns a dictionary containing the fractions. 624 625 Parameters 626 ---------- 627 save : str 628 saves the figure to a file named 'save' if. 629 """ 630 if not hasattr(self, 'e_dvalue'): 631 raise Exception('Run the gamma method first.') 632 if np.isclose(0.0, self._dvalue, atol=1e-15): 633 raise Exception('Error is 0.0') 634 labels = self.e_names 635 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 636 fig1, ax1 = plt.subplots() 637 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 638 ax1.axis('equal') 639 plt.draw() 640 if save: 641 fig1.savefig(save) 642 643 return dict(zip(self.e_names, sizes)) 644 645 def dump(self, filename, datatype="json.gz", description="", **kwargs): 646 """Dump the Obs to a file 'name' of chosen format. 647 648 Parameters 649 ---------- 650 filename : str 651 name of the file to be saved. 652 datatype : str 653 Format of the exported file. Supported formats include 654 "json.gz" and "pickle" 655 description : str 656 Description for output file, only relevant for json.gz format. 657 path : str 658 specifies a custom path for the file (default '.') 659 """ 660 if 'path' in kwargs: 661 file_name = kwargs.get('path') + '/' + filename 662 else: 663 file_name = filename 664 665 if datatype == "json.gz": 666 from .input.json import dump_to_json 667 dump_to_json([self], file_name, description=description) 668 elif datatype == "pickle": 669 with open(file_name + '.p', 'wb') as fb: 670 pickle.dump(self, fb) 671 else: 672 raise Exception("Unknown datatype " + str(datatype)) 673 674 def export_jackknife(self): 675 """Export jackknife samples from the Obs 676 677 Returns 678 ------- 679 numpy.ndarray 680 Returns a numpy array of length N + 1 where N is the number of samples 681 for the given ensemble and replicum. The zeroth entry of the array contains 682 the mean value of the Obs, entries 1 to N contain the N jackknife samples 683 derived from the Obs. The current implementation only works for observables 684 defined on exactly one ensemble and replicum. The derived jackknife samples 685 should agree with samples from a full jackknife analysis up to O(1/N). 686 """ 687 688 if len(self.names) != 1: 689 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 690 691 name = self.names[0] 692 full_data = self.deltas[name] + self.r_values[name] 693 n = full_data.size 694 mean = self.value 695 tmp_jacks = np.zeros(n + 1) 696 tmp_jacks[0] = mean 697 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 698 return tmp_jacks 699 700 def __float__(self): 701 return float(self.value) 702 703 def __repr__(self): 704 return 'Obs[' + str(self) + ']' 705 706 def __str__(self): 707 return _format_uncertainty(self.value, self._dvalue) 708 709 def __hash__(self): 710 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 711 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 712 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 713 hash_tuple += tuple([o.encode() for o in self.names]) 714 m = hashlib.md5() 715 [m.update(o) for o in hash_tuple] 716 return int(m.hexdigest(), 16) & 0xFFFFFFFF 717 718 # Overload comparisons 719 def __lt__(self, other): 720 return self.value < other 721 722 def __le__(self, other): 723 return self.value <= other 724 725 def __gt__(self, other): 726 return self.value > other 727 728 def __ge__(self, other): 729 return self.value >= other 730 731 def __eq__(self, other): 732 return (self - other).is_zero() 733 734 def __ne__(self, other): 735 return not (self - other).is_zero() 736 737 # Overload math operations 738 def __add__(self, y): 739 if isinstance(y, Obs): 740 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 741 else: 742 if isinstance(y, np.ndarray): 743 return np.array([self + o for o in y]) 744 elif y.__class__.__name__ in ['Corr', 'CObs']: 745 return NotImplemented 746 else: 747 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 748 749 def __radd__(self, y): 750 return self + y 751 752 def __mul__(self, y): 753 if isinstance(y, Obs): 754 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 755 else: 756 if isinstance(y, np.ndarray): 757 return np.array([self * o for o in y]) 758 elif isinstance(y, complex): 759 return CObs(self * y.real, self * y.imag) 760 elif y.__class__.__name__ in ['Corr', 'CObs']: 761 return NotImplemented 762 else: 763 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 764 765 def __rmul__(self, y): 766 return self * y 767 768 def __sub__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([self - o for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 778 779 def __rsub__(self, y): 780 return -1 * (self - y) 781 782 def __pos__(self): 783 return self 784 785 def __neg__(self): 786 return -1 * self 787 788 def __truediv__(self, y): 789 if isinstance(y, Obs): 790 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 791 else: 792 if isinstance(y, np.ndarray): 793 return np.array([self / o for o in y]) 794 elif y.__class__.__name__ in ['Corr', 'CObs']: 795 return NotImplemented 796 else: 797 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 798 799 def __rtruediv__(self, y): 800 if isinstance(y, Obs): 801 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 802 else: 803 if isinstance(y, np.ndarray): 804 return np.array([o / self for o in y]) 805 elif y.__class__.__name__ in ['Corr', 'CObs']: 806 return NotImplemented 807 else: 808 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 809 810 def __pow__(self, y): 811 if isinstance(y, Obs): 812 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 813 else: 814 return derived_observable(lambda x: x[0] ** y, [self]) 815 816 def __rpow__(self, y): 817 if isinstance(y, Obs): 818 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 819 else: 820 return derived_observable(lambda x: y ** x[0], [self]) 821 822 def __abs__(self): 823 return derived_observable(lambda x: anp.abs(x[0]), [self]) 824 825 # Overload numpy functions 826 def sqrt(self): 827 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 828 829 def log(self): 830 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 831 832 def exp(self): 833 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 834 835 def sin(self): 836 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 837 838 def cos(self): 839 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 840 841 def tan(self): 842 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 843 844 def arcsin(self): 845 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 846 847 def arccos(self): 848 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 849 850 def arctan(self): 851 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 852 853 def sinh(self): 854 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 855 856 def cosh(self): 857 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 858 859 def tanh(self): 860 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 861 862 def arcsinh(self): 863 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 864 865 def arccosh(self): 866 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 867 868 def arctanh(self): 869 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise Exception('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise Exception('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise Exception('names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise Exception('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.is_merged = {} 102 self.idl = {} 103 if idl is not None: 104 for name, idx in sorted(zip(names, idl)): 105 if isinstance(idx, range): 106 self.idl[name] = idx 107 elif isinstance(idx, (list, np.ndarray)): 108 dc = np.unique(np.diff(idx)) 109 if np.any(dc < 0): 110 raise Exception("Unsorted idx for idl[%s]" % (name)) 111 if len(dc) == 1: 112 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 113 else: 114 self.idl[name] = list(idx) 115 else: 116 raise Exception('incompatible type for idl[%s].' % (name)) 117 else: 118 for name, sample in sorted(zip(names, samples)): 119 self.idl[name] = range(1, len(sample) + 1) 120 121 if kwargs.get("means") is not None: 122 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 self.r_values[name] = mean 126 self.deltas[name] = sample 127 else: 128 for name, sample in sorted(zip(names, samples)): 129 self.shape[name] = len(self.idl[name]) 130 self.N += self.shape[name] 131 if len(sample) != self.shape[name]: 132 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 133 self.r_values[name] = np.mean(sample) 134 self.deltas[name] = sample - self.r_values[name] 135 self._value += self.shape[name] * self.r_values[name] 136 self._value /= self.N 137 138 self._dvalue = 0.0 139 self.ddvalue = 0.0 140 self.reweighted = False 141 142 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name])) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 gaps = [] 274 for r_name in e_content[e_name]: 275 if isinstance(self.idl[r_name], range): 276 gaps.append(1) 277 else: 278 gaps.append(np.min(np.diff(self.idl[r_name]))) 279 280 if not np.all([gi == gaps[0] for gi in gaps]): 281 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 282 else: 283 gapsize = gaps[0] 284 285 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 286 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 287 # Make sure no entry of tauint is smaller than 0.5 288 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 289 # hep-lat/0306017 eq. (42) 290 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 291 self.e_n_dtauint[e_name][0] = 0.0 292 293 def _compute_drho(i): 294 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 295 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 296 297 _compute_drho(gapsize) 298 if self.tau_exp[e_name] > 0: 299 texp = self.tau_exp[e_name] 300 # Critical slowing down analysis 301 if w_max // 2 <= 1: 302 raise Exception("Need at least 8 samples for tau_exp error analysis") 303 for n in range(gapsize, w_max // 2, gapsize): 304 _compute_drho(n + gapsize) 305 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 306 # Bias correction hep-lat/0306017 eq. (49) included 307 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 308 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 309 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 310 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 312 self.e_windowsize[e_name] = n 313 break 314 else: 315 if self.S[e_name] == 0.0: 316 self.e_tauint[e_name] = 0.5 317 self.e_dtauint[e_name] = 0.0 318 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 319 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 320 self.e_windowsize[e_name] = 0 321 else: 322 # Standard automatic windowing procedure 323 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 324 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 325 for n in range(1, w_max): 326 if n < w_max // 2 - 2: 327 _compute_drho(gapsize * n + gapsize) 328 if g_w[n - 1] < 0 or n >= w_max - 1: 329 n *= gapsize 330 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 331 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 332 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 333 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 334 self.e_windowsize[e_name] = n 335 break 336 337 self._dvalue += self.e_dvalue[e_name] ** 2 338 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 339 340 for e_name in self.cov_names: 341 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 342 self.e_ddvalue[e_name] = 0 343 self._dvalue += self.e_dvalue[e_name]**2 344 345 self._dvalue = np.sqrt(self._dvalue) 346 if self._dvalue == 0.0: 347 self.ddvalue = 0.0 348 else: 349 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 350 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name])) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 gaps = [] 274 for r_name in e_content[e_name]: 275 if isinstance(self.idl[r_name], range): 276 gaps.append(1) 277 else: 278 gaps.append(np.min(np.diff(self.idl[r_name]))) 279 280 if not np.all([gi == gaps[0] for gi in gaps]): 281 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 282 else: 283 gapsize = gaps[0] 284 285 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 286 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 287 # Make sure no entry of tauint is smaller than 0.5 288 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 289 # hep-lat/0306017 eq. (42) 290 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 291 self.e_n_dtauint[e_name][0] = 0.0 292 293 def _compute_drho(i): 294 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 295 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 296 297 _compute_drho(gapsize) 298 if self.tau_exp[e_name] > 0: 299 texp = self.tau_exp[e_name] 300 # Critical slowing down analysis 301 if w_max // 2 <= 1: 302 raise Exception("Need at least 8 samples for tau_exp error analysis") 303 for n in range(gapsize, w_max // 2, gapsize): 304 _compute_drho(n + gapsize) 305 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 306 # Bias correction hep-lat/0306017 eq. (49) included 307 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 308 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 309 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 310 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 312 self.e_windowsize[e_name] = n 313 break 314 else: 315 if self.S[e_name] == 0.0: 316 self.e_tauint[e_name] = 0.5 317 self.e_dtauint[e_name] = 0.0 318 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 319 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 320 self.e_windowsize[e_name] = 0 321 else: 322 # Standard automatic windowing procedure 323 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 324 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 325 for n in range(1, w_max): 326 if n < w_max // 2 - 2: 327 _compute_drho(gapsize * n + gapsize) 328 if g_w[n - 1] < 0 or n >= w_max - 1: 329 n *= gapsize 330 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 331 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 332 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 333 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 334 self.e_windowsize[e_name] = n 335 break 336 337 self._dvalue += self.e_dvalue[e_name] ** 2 338 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 339 340 for e_name in self.cov_names: 341 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 342 self.e_ddvalue[e_name] = 0 343 self._dvalue += self.e_dvalue[e_name]**2 344 345 self._dvalue = np.sqrt(self._dvalue) 346 if self._dvalue == 0.0: 347 self.ddvalue = 0.0 348 else: 349 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 350 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
387 def details(self, ens_content=True): 388 """Output detailed properties of the Obs. 389 390 Parameters 391 ---------- 392 ens_content : bool 393 print details about the ensembles and replica if true. 394 """ 395 if self.tag is not None: 396 print("Description:", self.tag) 397 if not hasattr(self, 'e_dvalue'): 398 print('Result\t %3.8e' % (self.value)) 399 else: 400 if self.value == 0.0: 401 percentage = np.nan 402 else: 403 percentage = np.abs(self._dvalue / self.value) * 100 404 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 405 if len(self.e_names) > 1: 406 print(' Ensemble errors:') 407 e_content = self.e_content 408 for e_name in self.mc_names: 409 if isinstance(self.idl[e_content[e_name][0]], range): 410 gap = self.idl[e_content[e_name][0]].step 411 else: 412 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 413 414 if len(self.e_names) > 1: 415 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 416 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 417 tau_string += f" in units of {gap} config" 418 if gap > 1: 419 tau_string += "s" 420 if self.tau_exp[e_name] > 0: 421 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 422 else: 423 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 424 print(tau_string) 425 for e_name in self.cov_names: 426 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 427 if ens_content is True: 428 if len(self.e_names) == 1: 429 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 430 else: 431 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 432 my_string_list = [] 433 for key, value in sorted(self.e_content.items()): 434 if key not in self.covobs: 435 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 436 if len(value) == 1: 437 my_string += f': {self.shape[value[0]]} configurations' 438 if isinstance(self.idl[value[0]], range): 439 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 440 else: 441 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 442 else: 443 sublist = [] 444 for v in value: 445 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 446 my_substring += f': {self.shape[v]} configurations' 447 if isinstance(self.idl[v], range): 448 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 449 else: 450 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 451 sublist.append(my_substring) 452 453 my_string += '\n' + '\n'.join(sublist) 454 else: 455 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 456 my_string_list.append(my_string) 457 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
459 def reweight(self, weight): 460 """Reweight the obs with given rewighting factors. 461 462 Parameters 463 ---------- 464 weight : Obs 465 Reweighting factor. An Observable that has to be defined on a superset of the 466 configurations in obs[i].idl for all i. 467 all_configs : bool 468 if True, the reweighted observables are normalized by the average of 469 the reweighting factor on all configurations in weight.idl and not 470 on the configurations in obs[i].idl. Default False. 471 """ 472 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
474 def is_zero_within_error(self, sigma=1): 475 """Checks whether the observable is zero within 'sigma' standard errors. 476 477 Parameters 478 ---------- 479 sigma : int 480 Number of standard errors used for the check. 481 482 Works only properly when the gamma method was run. 483 """ 484 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
486 def is_zero(self, atol=1e-10): 487 """Checks whether the observable is zero within a given tolerance. 488 489 Parameters 490 ---------- 491 atol : float 492 Absolute tolerance (for details see numpy documentation). 493 """ 494 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
496 def plot_tauint(self, save=None): 497 """Plot integrated autocorrelation time for each ensemble. 498 499 Parameters 500 ---------- 501 save : str 502 saves the figure to a file named 'save' if. 503 """ 504 if not hasattr(self, 'e_dvalue'): 505 raise Exception('Run the gamma method first.') 506 507 for e, e_name in enumerate(self.mc_names): 508 fig = plt.figure() 509 plt.xlabel(r'$W$') 510 plt.ylabel(r'$\tau_\mathrm{int}$') 511 length = int(len(self.e_n_tauint[e_name])) 512 if self.tau_exp[e_name] > 0: 513 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 514 x_help = np.arange(2 * self.tau_exp[e_name]) 515 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 516 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 517 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 518 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 519 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 520 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 521 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 522 else: 523 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 524 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 525 526 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 527 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 528 plt.legend() 529 plt.xlim(-0.5, xmax) 530 ylim = plt.ylim() 531 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 532 plt.draw() 533 if save: 534 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
536 def plot_rho(self, save=None): 537 """Plot normalized autocorrelation function time for each ensemble. 538 539 Parameters 540 ---------- 541 save : str 542 saves the figure to a file named 'save' if. 543 """ 544 if not hasattr(self, 'e_dvalue'): 545 raise Exception('Run the gamma method first.') 546 for e, e_name in enumerate(self.mc_names): 547 fig = plt.figure() 548 plt.xlabel('W') 549 plt.ylabel('rho') 550 length = int(len(self.e_drho[e_name])) 551 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 552 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 553 if self.tau_exp[e_name] > 0: 554 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 555 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 556 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 557 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 558 else: 559 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 560 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 561 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 562 plt.xlim(-0.5, xmax) 563 plt.draw() 564 if save: 565 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
567 def plot_rep_dist(self): 568 """Plot replica distribution for each ensemble with more than one replicum.""" 569 if not hasattr(self, 'e_dvalue'): 570 raise Exception('Run the gamma method first.') 571 for e, e_name in enumerate(self.mc_names): 572 if len(self.e_content[e_name]) == 1: 573 print('No replica distribution for a single replicum (', e_name, ')') 574 continue 575 r_length = [] 576 sub_r_mean = 0 577 for r, r_name in enumerate(self.e_content[e_name]): 578 r_length.append(len(self.deltas[r_name])) 579 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 580 e_N = np.sum(r_length) 581 sub_r_mean /= e_N 582 arr = np.zeros(len(self.e_content[e_name])) 583 for r, r_name in enumerate(self.e_content[e_name]): 584 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 585 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 586 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 587 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
589 def plot_history(self, expand=True): 590 """Plot derived Monte Carlo history for each ensemble 591 592 Parameters 593 ---------- 594 expand : bool 595 show expanded history for irregular Monte Carlo chains (default: True). 596 """ 597 for e, e_name in enumerate(self.mc_names): 598 plt.figure() 599 r_length = [] 600 tmp = [] 601 tmp_expanded = [] 602 for r, r_name in enumerate(self.e_content[e_name]): 603 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 604 if expand: 605 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 606 r_length.append(len(tmp_expanded[-1])) 607 else: 608 r_length.append(len(tmp[-1])) 609 e_N = np.sum(r_length) 610 x = np.arange(e_N) 611 y_test = np.concatenate(tmp, axis=0) 612 if expand: 613 y = np.concatenate(tmp_expanded, axis=0) 614 else: 615 y = y_test 616 plt.errorbar(x, y, fmt='.', markersize=3) 617 plt.xlim(-0.5, e_N - 0.5) 618 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 619 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
621 def plot_piechart(self, save=None): 622 """Plot piechart which shows the fractional contribution of each 623 ensemble to the error and returns a dictionary containing the fractions. 624 625 Parameters 626 ---------- 627 save : str 628 saves the figure to a file named 'save' if. 629 """ 630 if not hasattr(self, 'e_dvalue'): 631 raise Exception('Run the gamma method first.') 632 if np.isclose(0.0, self._dvalue, atol=1e-15): 633 raise Exception('Error is 0.0') 634 labels = self.e_names 635 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 636 fig1, ax1 = plt.subplots() 637 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 638 ax1.axis('equal') 639 plt.draw() 640 if save: 641 fig1.savefig(save) 642 643 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
645 def dump(self, filename, datatype="json.gz", description="", **kwargs): 646 """Dump the Obs to a file 'name' of chosen format. 647 648 Parameters 649 ---------- 650 filename : str 651 name of the file to be saved. 652 datatype : str 653 Format of the exported file. Supported formats include 654 "json.gz" and "pickle" 655 description : str 656 Description for output file, only relevant for json.gz format. 657 path : str 658 specifies a custom path for the file (default '.') 659 """ 660 if 'path' in kwargs: 661 file_name = kwargs.get('path') + '/' + filename 662 else: 663 file_name = filename 664 665 if datatype == "json.gz": 666 from .input.json import dump_to_json 667 dump_to_json([self], file_name, description=description) 668 elif datatype == "pickle": 669 with open(file_name + '.p', 'wb') as fb: 670 pickle.dump(self, fb) 671 else: 672 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
674 def export_jackknife(self): 675 """Export jackknife samples from the Obs 676 677 Returns 678 ------- 679 numpy.ndarray 680 Returns a numpy array of length N + 1 where N is the number of samples 681 for the given ensemble and replicum. The zeroth entry of the array contains 682 the mean value of the Obs, entries 1 to N contain the N jackknife samples 683 derived from the Obs. The current implementation only works for observables 684 defined on exactly one ensemble and replicum. The derived jackknife samples 685 should agree with samples from a full jackknife analysis up to O(1/N). 686 """ 687 688 if len(self.names) != 1: 689 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 690 691 name = self.names[0] 692 full_data = self.deltas[name] + self.r_values[name] 693 n = full_data.size 694 mean = self.value 695 tmp_jacks = np.zeros(n + 1) 696 tmp_jacks[0] = mean 697 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 698 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
872class CObs: 873 """Class for a complex valued observable.""" 874 __slots__ = ['_real', '_imag', 'tag'] 875 876 def __init__(self, real, imag=0.0): 877 self._real = real 878 self._imag = imag 879 self.tag = None 880 881 @property 882 def real(self): 883 return self._real 884 885 @property 886 def imag(self): 887 return self._imag 888 889 def gamma_method(self, **kwargs): 890 """Executes the gamma_method for the real and the imaginary part.""" 891 if isinstance(self.real, Obs): 892 self.real.gamma_method(**kwargs) 893 if isinstance(self.imag, Obs): 894 self.imag.gamma_method(**kwargs) 895 896 def is_zero(self): 897 """Checks whether both real and imaginary part are zero within machine precision.""" 898 return self.real == 0.0 and self.imag == 0.0 899 900 def conjugate(self): 901 return CObs(self.real, -self.imag) 902 903 def __add__(self, other): 904 if isinstance(other, np.ndarray): 905 return other + self 906 elif hasattr(other, 'real') and hasattr(other, 'imag'): 907 return CObs(self.real + other.real, 908 self.imag + other.imag) 909 else: 910 return CObs(self.real + other, self.imag) 911 912 def __radd__(self, y): 913 return self + y 914 915 def __sub__(self, other): 916 if isinstance(other, np.ndarray): 917 return -1 * (other - self) 918 elif hasattr(other, 'real') and hasattr(other, 'imag'): 919 return CObs(self.real - other.real, self.imag - other.imag) 920 else: 921 return CObs(self.real - other, self.imag) 922 923 def __rsub__(self, other): 924 return -1 * (self - other) 925 926 def __mul__(self, other): 927 if isinstance(other, np.ndarray): 928 return other * self 929 elif hasattr(other, 'real') and hasattr(other, 'imag'): 930 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 931 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 932 [self.real, other.real, self.imag, other.imag], 933 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 934 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 935 [self.real, other.real, self.imag, other.imag], 936 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 937 elif getattr(other, 'imag', 0) != 0: 938 return CObs(self.real * other.real - self.imag * other.imag, 939 self.imag * other.real + self.real * other.imag) 940 else: 941 return CObs(self.real * other.real, self.imag * other.real) 942 else: 943 return CObs(self.real * other, self.imag * other) 944 945 def __rmul__(self, other): 946 return self * other 947 948 def __truediv__(self, other): 949 if isinstance(other, np.ndarray): 950 return 1 / (other / self) 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 r = other.real ** 2 + other.imag ** 2 953 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 954 else: 955 return CObs(self.real / other, self.imag / other) 956 957 def __rtruediv__(self, other): 958 r = self.real ** 2 + self.imag ** 2 959 if hasattr(other, 'real') and hasattr(other, 'imag'): 960 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 961 else: 962 return CObs(self.real * other / r, -self.imag * other / r) 963 964 def __abs__(self): 965 return np.sqrt(self.real**2 + self.imag**2) 966 967 def __pos__(self): 968 return self 969 970 def __neg__(self): 971 return -1 * self 972 973 def __eq__(self, other): 974 return self.real == other.real and self.imag == other.imag 975 976 def __str__(self): 977 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 978 979 def __repr__(self): 980 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
889 def gamma_method(self, **kwargs): 890 """Executes the gamma_method for the real and the imaginary part.""" 891 if isinstance(self.real, Obs): 892 self.real.gamma_method(**kwargs) 893 if isinstance(self.imag, Obs): 894 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1105def derived_observable(func, data, array_mode=False, **kwargs): 1106 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1107 1108 Parameters 1109 ---------- 1110 func : object 1111 arbitrary function of the form func(data, **kwargs). For the 1112 automatic differentiation to work, all numpy functions have to have 1113 the autograd wrapper (use 'import autograd.numpy as anp'). 1114 data : list 1115 list of Obs, e.g. [obs1, obs2, obs3]. 1116 num_grad : bool 1117 if True, numerical derivatives are used instead of autograd 1118 (default False). To control the numerical differentiation the 1119 kwargs of numdifftools.step_generators.MaxStepGenerator 1120 can be used. 1121 man_grad : list 1122 manually supply a list or an array which contains the jacobian 1123 of func. Use cautiously, supplying the wrong derivative will 1124 not be intercepted. 1125 1126 Notes 1127 ----- 1128 For simple mathematical operations it can be practical to use anonymous 1129 functions. For the ratio of two observables one can e.g. use 1130 1131 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1132 """ 1133 1134 data = np.asarray(data) 1135 raveled_data = data.ravel() 1136 1137 # Workaround for matrix operations containing non Obs data 1138 if not all(isinstance(x, Obs) for x in raveled_data): 1139 for i in range(len(raveled_data)): 1140 if isinstance(raveled_data[i], (int, float)): 1141 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1142 1143 allcov = {} 1144 for o in raveled_data: 1145 for name in o.cov_names: 1146 if name in allcov: 1147 if not np.allclose(allcov[name], o.covobs[name].cov): 1148 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1149 else: 1150 allcov[name] = o.covobs[name].cov 1151 1152 n_obs = len(raveled_data) 1153 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1154 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1155 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1156 1157 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1158 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1159 1160 if data.ndim == 1: 1161 values = np.array([o.value for o in data]) 1162 else: 1163 values = np.vectorize(lambda x: x.value)(data) 1164 1165 new_values = func(values, **kwargs) 1166 1167 multi = int(isinstance(new_values, np.ndarray)) 1168 1169 new_r_values = {} 1170 new_idl_d = {} 1171 for name in new_sample_names: 1172 idl = [] 1173 tmp_values = np.zeros(n_obs) 1174 for i, item in enumerate(raveled_data): 1175 tmp_values[i] = item.r_values.get(name, item.value) 1176 tmp_idl = item.idl.get(name) 1177 if tmp_idl is not None: 1178 idl.append(tmp_idl) 1179 if multi > 0: 1180 tmp_values = np.array(tmp_values).reshape(data.shape) 1181 new_r_values[name] = func(tmp_values, **kwargs) 1182 new_idl_d[name] = _merge_idx(idl) 1183 if not is_merged[name]: 1184 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1185 1186 if 'man_grad' in kwargs: 1187 deriv = np.asarray(kwargs.get('man_grad')) 1188 if new_values.shape + data.shape != deriv.shape: 1189 raise Exception('Manual derivative does not have correct shape.') 1190 elif kwargs.get('num_grad') is True: 1191 if multi > 0: 1192 raise Exception('Multi mode currently not supported for numerical derivative') 1193 options = { 1194 'base_step': 0.1, 1195 'step_ratio': 2.5} 1196 for key in options.keys(): 1197 kwarg = kwargs.get(key) 1198 if kwarg is not None: 1199 options[key] = kwarg 1200 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1201 if tmp_df.size == 1: 1202 deriv = np.array([tmp_df.real]) 1203 else: 1204 deriv = tmp_df.real 1205 else: 1206 deriv = jacobian(func)(values, **kwargs) 1207 1208 final_result = np.zeros(new_values.shape, dtype=object) 1209 1210 if array_mode is True: 1211 1212 class _Zero_grad(): 1213 def __init__(self, N): 1214 self.grad = np.zeros((N, 1)) 1215 1216 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1217 d_extracted = {} 1218 g_extracted = {} 1219 for name in new_sample_names: 1220 d_extracted[name] = [] 1221 ens_length = len(new_idl_d[name]) 1222 for i_dat, dat in enumerate(data): 1223 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1224 for name in new_cov_names: 1225 g_extracted[name] = [] 1226 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1227 for i_dat, dat in enumerate(data): 1228 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1229 1230 for i_val, new_val in np.ndenumerate(new_values): 1231 new_deltas = {} 1232 new_grad = {} 1233 if array_mode is True: 1234 for name in new_sample_names: 1235 ens_length = d_extracted[name][0].shape[-1] 1236 new_deltas[name] = np.zeros(ens_length) 1237 for i_dat, dat in enumerate(d_extracted[name]): 1238 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1239 for name in new_cov_names: 1240 new_grad[name] = 0 1241 for i_dat, dat in enumerate(g_extracted[name]): 1242 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1243 else: 1244 for j_obs, obs in np.ndenumerate(data): 1245 for name in obs.names: 1246 if name in obs.cov_names: 1247 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1248 else: 1249 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1250 1251 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1252 1253 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1254 raise Exception('The same name has been used for deltas and covobs!') 1255 new_samples = [] 1256 new_means = [] 1257 new_idl = [] 1258 new_names_obs = [] 1259 for name in new_names: 1260 if name not in new_covobs: 1261 new_samples.append(new_deltas[name]) 1262 new_idl.append(new_idl_d[name]) 1263 new_means.append(new_r_values[name][i_val]) 1264 new_names_obs.append(name) 1265 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1266 for name in new_covobs: 1267 final_result[i_val].names.append(name) 1268 final_result[i_val]._covobs = new_covobs 1269 final_result[i_val]._value = new_val 1270 final_result[i_val].is_merged = is_merged 1271 final_result[i_val].reweighted = reweighted 1272 1273 if multi == 0: 1274 final_result = final_result.item() 1275 1276 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1313def reweight(weight, obs, **kwargs): 1314 """Reweight a list of observables. 1315 1316 Parameters 1317 ---------- 1318 weight : Obs 1319 Reweighting factor. An Observable that has to be defined on a superset of the 1320 configurations in obs[i].idl for all i. 1321 obs : list 1322 list of Obs, e.g. [obs1, obs2, obs3]. 1323 all_configs : bool 1324 if True, the reweighted observables are normalized by the average of 1325 the reweighting factor on all configurations in weight.idl and not 1326 on the configurations in obs[i].idl. Default False. 1327 """ 1328 result = [] 1329 for i in range(len(obs)): 1330 if len(obs[i].cov_names): 1331 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1332 if not set(obs[i].names).issubset(weight.names): 1333 raise Exception('Error: Ensembles do not fit') 1334 for name in obs[i].names: 1335 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1336 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1337 new_samples = [] 1338 w_deltas = {} 1339 for name in sorted(obs[i].names): 1340 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1341 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1342 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1343 1344 if kwargs.get('all_configs'): 1345 new_weight = weight 1346 else: 1347 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1348 1349 result.append(tmp_obs / new_weight) 1350 result[-1].reweighted = True 1351 result[-1].is_merged = obs[i].is_merged 1352 1353 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1356def correlate(obs_a, obs_b): 1357 """Correlate two observables. 1358 1359 Parameters 1360 ---------- 1361 obs_a : Obs 1362 First observable 1363 obs_b : Obs 1364 Second observable 1365 1366 Notes 1367 ----- 1368 Keep in mind to only correlate primary observables which have not been reweighted 1369 yet. The reweighting has to be applied after correlating the observables. 1370 Currently only works if ensembles are identical (this is not strictly necessary). 1371 """ 1372 1373 if sorted(obs_a.names) != sorted(obs_b.names): 1374 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1375 if len(obs_a.cov_names) or len(obs_b.cov_names): 1376 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1377 for name in obs_a.names: 1378 if obs_a.shape[name] != obs_b.shape[name]: 1379 raise Exception('Shapes of ensemble', name, 'do not fit') 1380 if obs_a.idl[name] != obs_b.idl[name]: 1381 raise Exception('idl of ensemble', name, 'do not fit') 1382 1383 if obs_a.reweighted is True: 1384 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1385 if obs_b.reweighted is True: 1386 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1387 1388 new_samples = [] 1389 new_idl = [] 1390 for name in sorted(obs_a.names): 1391 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1392 new_idl.append(obs_a.idl[name]) 1393 1394 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1395 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1396 o.reweighted = obs_a.reweighted or obs_b.reweighted 1397 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1400def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1401 r'''Calculates the error covariance matrix of a set of observables. 1402 1403 WARNING: This function should be used with care, especially for observables with support on multiple 1404 ensembles with differing autocorrelations. See the notes below for details. 1405 1406 The gamma method has to be applied first to all observables. 1407 1408 Parameters 1409 ---------- 1410 obs : list or numpy.ndarray 1411 List or one dimensional array of Obs 1412 visualize : bool 1413 If True plots the corresponding normalized correlation matrix (default False). 1414 correlation : bool 1415 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1416 smooth : None or int 1417 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1418 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1419 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1420 small ones. 1421 1422 Notes 1423 ----- 1424 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1425 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1426 in the absence of autocorrelation. 1427 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1428 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1429 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1430 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1431 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1432 ''' 1433 1434 length = len(obs) 1435 1436 max_samples = np.max([o.N for o in obs]) 1437 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1438 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1439 1440 cov = np.zeros((length, length)) 1441 for i in range(length): 1442 for j in range(i, length): 1443 cov[i, j] = _covariance_element(obs[i], obs[j]) 1444 cov = cov + cov.T - np.diag(np.diag(cov)) 1445 1446 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1447 1448 if isinstance(smooth, int): 1449 corr = _smooth_eigenvalues(corr, smooth) 1450 1451 if visualize: 1452 plt.matshow(corr, vmin=-1, vmax=1) 1453 plt.set_cmap('RdBu') 1454 plt.colorbar() 1455 plt.draw() 1456 1457 if correlation is True: 1458 return corr 1459 1460 errors = [o.dvalue for o in obs] 1461 cov = np.diag(errors) @ corr @ np.diag(errors) 1462 1463 eigenvalues = np.linalg.eigh(cov)[0] 1464 if not np.all(eigenvalues >= 0): 1465 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1466 1467 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1547def import_jackknife(jacks, name, idl=None): 1548 """Imports jackknife samples and returns an Obs 1549 1550 Parameters 1551 ---------- 1552 jacks : numpy.ndarray 1553 numpy array containing the mean value as zeroth entry and 1554 the N jackknife samples as first to Nth entry. 1555 name : str 1556 name of the ensemble the samples are defined on. 1557 """ 1558 length = len(jacks) - 1 1559 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1560 samples = jacks[1:] @ prj 1561 mean = np.mean(samples) 1562 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1563 new_obs._value = jacks[0] 1564 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1567def merge_obs(list_of_obs): 1568 """Combine all observables in list_of_obs into one new observable 1569 1570 Parameters 1571 ---------- 1572 list_of_obs : list 1573 list of the Obs object to be combined 1574 1575 Notes 1576 ----- 1577 It is not possible to combine obs which are based on the same replicum 1578 """ 1579 replist = [item for obs in list_of_obs for item in obs.names] 1580 if (len(replist) == len(set(replist))) is False: 1581 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1582 if any([len(o.cov_names) for o in list_of_obs]): 1583 raise Exception('Not possible to merge data that contains covobs!') 1584 new_dict = {} 1585 idl_dict = {} 1586 for o in list_of_obs: 1587 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1588 for key in set(o.deltas) | set(o.r_values)}) 1589 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1590 1591 names = sorted(new_dict.keys()) 1592 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1593 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1594 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1595 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1598def cov_Obs(means, cov, name, grad=None): 1599 """Create an Obs based on mean(s) and a covariance matrix 1600 1601 Parameters 1602 ---------- 1603 mean : list of floats or float 1604 N mean value(s) of the new Obs 1605 cov : list or array 1606 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1607 name : str 1608 identifier for the covariance matrix 1609 grad : list or array 1610 Gradient of the Covobs wrt. the means belonging to cov. 1611 """ 1612 1613 def covobs_to_obs(co): 1614 """Make an Obs out of a Covobs 1615 1616 Parameters 1617 ---------- 1618 co : Covobs 1619 Covobs to be embedded into the Obs 1620 """ 1621 o = Obs([], [], means=[]) 1622 o._value = co.value 1623 o.names.append(co.name) 1624 o._covobs[co.name] = co 1625 o._dvalue = np.sqrt(co.errsq()) 1626 return o 1627 1628 ol = [] 1629 if isinstance(means, (float, int)): 1630 means = [means] 1631 1632 for i in range(len(means)): 1633 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1634 if ol[0].covobs[name].N != len(means): 1635 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1636 if len(ol) == 1: 1637 return ol[0] 1638 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.