pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15 16class Obs: 17 """Class for a general observable. 18 19 Instances of Obs are the basic objects of a pyerrors error analysis. 20 They are initialized with a list which contains arrays of samples for 21 different ensembles/replica and another list of same length which contains 22 the names of the ensembles/replica. Mathematical operations can be 23 performed on instances. The result is another instance of Obs. The error of 24 an instance can be computed with the gamma_method. Also contains additional 25 methods for output and visualization of the error calculation. 26 27 Attributes 28 ---------- 29 S_global : float 30 Standard value for S (default 2.0) 31 S_dict : dict 32 Dictionary for S values. If an entry for a given ensemble 33 exists this overwrites the standard value for that ensemble. 34 tau_exp_global : float 35 Standard value for tau_exp (default 0.0) 36 tau_exp_dict : dict 37 Dictionary for tau_exp values. If an entry for a given ensemble exists 38 this overwrites the standard value for that ensemble. 39 N_sigma_global : float 40 Standard value for N_sigma (default 1.0) 41 N_sigma_dict : dict 42 Dictionary for N_sigma values. If an entry for a given ensemble exists 43 this overwrites the standard value for that ensemble. 44 """ 45 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 46 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 47 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 48 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 49 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 50 51 S_global = 2.0 52 S_dict = {} 53 tau_exp_global = 0.0 54 tau_exp_dict = {} 55 N_sigma_global = 1.0 56 N_sigma_dict = {} 57 filter_eps = 1e-10 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 gaps = [] 271 for r_name in e_content[e_name]: 272 if isinstance(self.idl[r_name], range): 273 gaps.append(1) 274 else: 275 gaps.append(np.min(np.diff(self.idl[r_name]))) 276 277 if not np.all([gi == gaps[0] for gi in gaps]): 278 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 279 else: 280 gapsize = gaps[0] 281 282 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 283 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 284 # Make sure no entry of tauint is smaller than 0.5 285 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 286 # hep-lat/0306017 eq. (42) 287 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 288 self.e_n_dtauint[e_name][0] = 0.0 289 290 def _compute_drho(i): 291 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 292 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 293 294 _compute_drho(gapsize) 295 if self.tau_exp[e_name] > 0: 296 texp = self.tau_exp[e_name] 297 # Critical slowing down analysis 298 if w_max // 2 <= 1: 299 raise Exception("Need at least 8 samples for tau_exp error analysis") 300 for n in range(gapsize, w_max // 2, gapsize): 301 _compute_drho(n + gapsize) 302 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 303 # Bias correction hep-lat/0306017 eq. (49) included 304 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 305 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 306 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 307 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 308 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 309 self.e_windowsize[e_name] = n 310 break 311 else: 312 if self.S[e_name] == 0.0: 313 self.e_tauint[e_name] = 0.5 314 self.e_dtauint[e_name] = 0.0 315 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 316 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 317 self.e_windowsize[e_name] = 0 318 else: 319 # Standard automatic windowing procedure 320 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 321 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 322 for n in range(1, w_max): 323 if n < w_max // 2 - 2: 324 _compute_drho(gapsize * n + gapsize) 325 if g_w[n - 1] < 0 or n >= w_max - 1: 326 n *= gapsize 327 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 328 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 329 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 330 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 331 self.e_windowsize[e_name] = n 332 break 333 334 self._dvalue += self.e_dvalue[e_name] ** 2 335 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 336 337 for e_name in self.cov_names: 338 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 339 self.e_ddvalue[e_name] = 0 340 self._dvalue += self.e_dvalue[e_name]**2 341 342 self._dvalue = np.sqrt(self._dvalue) 343 if self._dvalue == 0.0: 344 self.ddvalue = 0.0 345 else: 346 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 347 return 348 349 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 350 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 351 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 352 353 Parameters 354 ---------- 355 deltas : list 356 List of fluctuations 357 idx : list 358 List or range of configurations on which the deltas are defined. 359 shape : int 360 Number of configurations in idx. 361 w_max : int 362 Upper bound for the summation window. 363 fft : bool 364 determines whether the fft algorithm is used for the computation 365 of the autocorrelation function. 366 """ 367 gamma = np.zeros(w_max) 368 deltas = _expand_deltas(deltas, idx, shape) 369 new_shape = len(deltas) 370 if fft: 371 max_gamma = min(new_shape, w_max) 372 # The padding for the fft has to be even 373 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 374 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 375 else: 376 for n in range(w_max): 377 if new_shape - n >= 0: 378 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 379 380 return gamma 381 382 def details(self, ens_content=True): 383 """Output detailed properties of the Obs. 384 385 Parameters 386 ---------- 387 ens_content : bool 388 print details about the ensembles and replica if true. 389 """ 390 if self.tag is not None: 391 print("Description:", self.tag) 392 if not hasattr(self, 'e_dvalue'): 393 print('Result\t %3.8e' % (self.value)) 394 else: 395 if self.value == 0.0: 396 percentage = np.nan 397 else: 398 percentage = np.abs(self._dvalue / self.value) * 100 399 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 400 if len(self.e_names) > 1: 401 print(' Ensemble errors:') 402 for e_name in self.mc_names: 403 if len(self.e_names) > 1: 404 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 405 if self.tau_exp[e_name] > 0: 406 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 407 else: 408 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 409 for e_name in self.cov_names: 410 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 411 if ens_content is True: 412 if len(self.e_names) == 1: 413 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 414 else: 415 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 416 my_string_list = [] 417 for key, value in sorted(self.e_content.items()): 418 if key not in self.covobs: 419 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 420 if len(value) == 1: 421 my_string += f': {self.shape[value[0]]} configurations' 422 if isinstance(self.idl[value[0]], range): 423 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 424 else: 425 my_string += ' (irregular range)' 426 else: 427 sublist = [] 428 for v in value: 429 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 430 my_substring += f': {self.shape[v]} configurations' 431 if isinstance(self.idl[v], range): 432 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 433 else: 434 my_substring += ' (irregular range)' 435 sublist.append(my_substring) 436 437 my_string += '\n' + '\n'.join(sublist) 438 else: 439 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 440 my_string_list.append(my_string) 441 print('\n'.join(my_string_list)) 442 443 def reweight(self, weight): 444 """Reweight the obs with given rewighting factors. 445 446 Parameters 447 ---------- 448 weight : Obs 449 Reweighting factor. An Observable that has to be defined on a superset of the 450 configurations in obs[i].idl for all i. 451 all_configs : bool 452 if True, the reweighted observables are normalized by the average of 453 the reweighting factor on all configurations in weight.idl and not 454 on the configurations in obs[i].idl. Default False. 455 """ 456 return reweight(weight, [self])[0] 457 458 def is_zero_within_error(self, sigma=1): 459 """Checks whether the observable is zero within 'sigma' standard errors. 460 461 Parameters 462 ---------- 463 sigma : int 464 Number of standard errors used for the check. 465 466 Works only properly when the gamma method was run. 467 """ 468 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 469 470 def is_zero(self, atol=1e-10): 471 """Checks whether the observable is zero within a given tolerance. 472 473 Parameters 474 ---------- 475 atol : float 476 Absolute tolerance (for details see numpy documentation). 477 """ 478 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 479 480 def plot_tauint(self, save=None): 481 """Plot integrated autocorrelation time for each ensemble. 482 483 Parameters 484 ---------- 485 save : str 486 saves the figure to a file named 'save' if. 487 """ 488 if not hasattr(self, 'e_dvalue'): 489 raise Exception('Run the gamma method first.') 490 491 for e, e_name in enumerate(self.mc_names): 492 fig = plt.figure() 493 plt.xlabel(r'$W$') 494 plt.ylabel(r'$\tau_\mathrm{int}$') 495 length = int(len(self.e_n_tauint[e_name])) 496 if self.tau_exp[e_name] > 0: 497 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 498 x_help = np.arange(2 * self.tau_exp[e_name]) 499 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 500 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 501 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 502 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 503 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 504 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 505 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 506 else: 507 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 508 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 509 510 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 511 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 512 plt.legend() 513 plt.xlim(-0.5, xmax) 514 ylim = plt.ylim() 515 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e)) 519 520 def plot_rho(self, save=None): 521 """Plot normalized autocorrelation function time for each ensemble. 522 523 Parameters 524 ---------- 525 save : str 526 saves the figure to a file named 'save' if. 527 """ 528 if not hasattr(self, 'e_dvalue'): 529 raise Exception('Run the gamma method first.') 530 for e, e_name in enumerate(self.mc_names): 531 fig = plt.figure() 532 plt.xlabel('W') 533 plt.ylabel('rho') 534 length = int(len(self.e_drho[e_name])) 535 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 536 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 537 if self.tau_exp[e_name] > 0: 538 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 539 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 540 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 541 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 542 else: 543 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 544 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 545 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 546 plt.xlim(-0.5, xmax) 547 plt.draw() 548 if save: 549 fig.savefig(save + "_" + str(e)) 550 551 def plot_rep_dist(self): 552 """Plot replica distribution for each ensemble with more than one replicum.""" 553 if not hasattr(self, 'e_dvalue'): 554 raise Exception('Run the gamma method first.') 555 for e, e_name in enumerate(self.mc_names): 556 if len(self.e_content[e_name]) == 1: 557 print('No replica distribution for a single replicum (', e_name, ')') 558 continue 559 r_length = [] 560 sub_r_mean = 0 561 for r, r_name in enumerate(self.e_content[e_name]): 562 r_length.append(len(self.deltas[r_name])) 563 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 564 e_N = np.sum(r_length) 565 sub_r_mean /= e_N 566 arr = np.zeros(len(self.e_content[e_name])) 567 for r, r_name in enumerate(self.e_content[e_name]): 568 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 569 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 570 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 571 plt.draw() 572 573 def plot_history(self, expand=True): 574 """Plot derived Monte Carlo history for each ensemble 575 576 Parameters 577 ---------- 578 expand : bool 579 show expanded history for irregular Monte Carlo chains (default: True). 580 """ 581 for e, e_name in enumerate(self.mc_names): 582 plt.figure() 583 r_length = [] 584 tmp = [] 585 tmp_expanded = [] 586 for r, r_name in enumerate(self.e_content[e_name]): 587 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 588 if expand: 589 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 590 r_length.append(len(tmp_expanded[-1])) 591 else: 592 r_length.append(len(tmp[-1])) 593 e_N = np.sum(r_length) 594 x = np.arange(e_N) 595 y_test = np.concatenate(tmp, axis=0) 596 if expand: 597 y = np.concatenate(tmp_expanded, axis=0) 598 else: 599 y = y_test 600 plt.errorbar(x, y, fmt='.', markersize=3) 601 plt.xlim(-0.5, e_N - 0.5) 602 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 603 plt.draw() 604 605 def plot_piechart(self, save=None): 606 """Plot piechart which shows the fractional contribution of each 607 ensemble to the error and returns a dictionary containing the fractions. 608 609 Parameters 610 ---------- 611 save : str 612 saves the figure to a file named 'save' if. 613 """ 614 if not hasattr(self, 'e_dvalue'): 615 raise Exception('Run the gamma method first.') 616 if np.isclose(0.0, self._dvalue, atol=1e-15): 617 raise Exception('Error is 0.0') 618 labels = self.e_names 619 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 620 fig1, ax1 = plt.subplots() 621 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 622 ax1.axis('equal') 623 plt.draw() 624 if save: 625 fig1.savefig(save) 626 627 return dict(zip(self.e_names, sizes)) 628 629 def dump(self, filename, datatype="json.gz", description="", **kwargs): 630 """Dump the Obs to a file 'name' of chosen format. 631 632 Parameters 633 ---------- 634 filename : str 635 name of the file to be saved. 636 datatype : str 637 Format of the exported file. Supported formats include 638 "json.gz" and "pickle" 639 description : str 640 Description for output file, only relevant for json.gz format. 641 path : str 642 specifies a custom path for the file (default '.') 643 """ 644 if 'path' in kwargs: 645 file_name = kwargs.get('path') + '/' + filename 646 else: 647 file_name = filename 648 649 if datatype == "json.gz": 650 from .input.json import dump_to_json 651 dump_to_json([self], file_name, description=description) 652 elif datatype == "pickle": 653 with open(file_name + '.p', 'wb') as fb: 654 pickle.dump(self, fb) 655 else: 656 raise Exception("Unknown datatype " + str(datatype)) 657 658 def export_jackknife(self): 659 """Export jackknife samples from the Obs 660 661 Returns 662 ------- 663 numpy.ndarray 664 Returns a numpy array of length N + 1 where N is the number of samples 665 for the given ensemble and replicum. The zeroth entry of the array contains 666 the mean value of the Obs, entries 1 to N contain the N jackknife samples 667 derived from the Obs. The current implementation only works for observables 668 defined on exactly one ensemble and replicum. The derived jackknife samples 669 should agree with samples from a full jackknife analysis up to O(1/N). 670 """ 671 672 if len(self.names) != 1: 673 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 674 675 name = self.names[0] 676 full_data = self.deltas[name] + self.r_values[name] 677 n = full_data.size 678 mean = self.value 679 tmp_jacks = np.zeros(n + 1) 680 tmp_jacks[0] = mean 681 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 682 return tmp_jacks 683 684 def __float__(self): 685 return float(self.value) 686 687 def __repr__(self): 688 return 'Obs[' + str(self) + ']' 689 690 def __str__(self): 691 if self._dvalue == 0.0: 692 return str(self.value) 693 fexp = np.floor(np.log10(self._dvalue)) 694 if fexp < 0.0: 695 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 696 elif fexp == 0.0: 697 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 698 else: 699 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 700 701 def __hash__(self): 702 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 703 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 704 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 705 hash_tuple += tuple([o.encode() for o in self.names]) 706 m = hashlib.md5() 707 [m.update(o) for o in hash_tuple] 708 return int(m.hexdigest(), 16) & 0xFFFFFFFF 709 710 # Overload comparisons 711 def __lt__(self, other): 712 return self.value < other 713 714 def __le__(self, other): 715 return self.value <= other 716 717 def __gt__(self, other): 718 return self.value > other 719 720 def __ge__(self, other): 721 return self.value >= other 722 723 def __eq__(self, other): 724 return (self - other).is_zero() 725 726 def __ne__(self, other): 727 return not (self - other).is_zero() 728 729 # Overload math operations 730 def __add__(self, y): 731 if isinstance(y, Obs): 732 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 733 else: 734 if isinstance(y, np.ndarray): 735 return np.array([self + o for o in y]) 736 elif y.__class__.__name__ in ['Corr', 'CObs']: 737 return NotImplemented 738 else: 739 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 740 741 def __radd__(self, y): 742 return self + y 743 744 def __mul__(self, y): 745 if isinstance(y, Obs): 746 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 747 else: 748 if isinstance(y, np.ndarray): 749 return np.array([self * o for o in y]) 750 elif isinstance(y, complex): 751 return CObs(self * y.real, self * y.imag) 752 elif y.__class__.__name__ in ['Corr', 'CObs']: 753 return NotImplemented 754 else: 755 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 756 757 def __rmul__(self, y): 758 return self * y 759 760 def __sub__(self, y): 761 if isinstance(y, Obs): 762 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 763 else: 764 if isinstance(y, np.ndarray): 765 return np.array([self - o for o in y]) 766 elif y.__class__.__name__ in ['Corr', 'CObs']: 767 return NotImplemented 768 else: 769 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 770 771 def __rsub__(self, y): 772 return -1 * (self - y) 773 774 def __pos__(self): 775 return self 776 777 def __neg__(self): 778 return -1 * self 779 780 def __truediv__(self, y): 781 if isinstance(y, Obs): 782 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 783 else: 784 if isinstance(y, np.ndarray): 785 return np.array([self / o for o in y]) 786 elif y.__class__.__name__ in ['Corr', 'CObs']: 787 return NotImplemented 788 else: 789 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 790 791 def __rtruediv__(self, y): 792 if isinstance(y, Obs): 793 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 794 else: 795 if isinstance(y, np.ndarray): 796 return np.array([o / self for o in y]) 797 elif y.__class__.__name__ in ['Corr', 'CObs']: 798 return NotImplemented 799 else: 800 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 801 802 def __pow__(self, y): 803 if isinstance(y, Obs): 804 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 805 else: 806 return derived_observable(lambda x: x[0] ** y, [self]) 807 808 def __rpow__(self, y): 809 if isinstance(y, Obs): 810 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 811 else: 812 return derived_observable(lambda x: y ** x[0], [self]) 813 814 def __abs__(self): 815 return derived_observable(lambda x: anp.abs(x[0]), [self]) 816 817 # Overload numpy functions 818 def sqrt(self): 819 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 820 821 def log(self): 822 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 823 824 def exp(self): 825 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 826 827 def sin(self): 828 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 829 830 def cos(self): 831 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 832 833 def tan(self): 834 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 835 836 def arcsin(self): 837 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 838 839 def arccos(self): 840 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 841 842 def arctan(self): 843 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 844 845 def sinh(self): 846 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 847 848 def cosh(self): 849 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 850 851 def tanh(self): 852 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 853 854 def arcsinh(self): 855 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 856 857 def arccosh(self): 858 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 859 860 def arctanh(self): 861 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 862 863 864class CObs: 865 """Class for a complex valued observable.""" 866 __slots__ = ['_real', '_imag', 'tag'] 867 868 def __init__(self, real, imag=0.0): 869 self._real = real 870 self._imag = imag 871 self.tag = None 872 873 @property 874 def real(self): 875 return self._real 876 877 @property 878 def imag(self): 879 return self._imag 880 881 def gamma_method(self, **kwargs): 882 """Executes the gamma_method for the real and the imaginary part.""" 883 if isinstance(self.real, Obs): 884 self.real.gamma_method(**kwargs) 885 if isinstance(self.imag, Obs): 886 self.imag.gamma_method(**kwargs) 887 888 def is_zero(self): 889 """Checks whether both real and imaginary part are zero within machine precision.""" 890 return self.real == 0.0 and self.imag == 0.0 891 892 def conjugate(self): 893 return CObs(self.real, -self.imag) 894 895 def __add__(self, other): 896 if isinstance(other, np.ndarray): 897 return other + self 898 elif hasattr(other, 'real') and hasattr(other, 'imag'): 899 return CObs(self.real + other.real, 900 self.imag + other.imag) 901 else: 902 return CObs(self.real + other, self.imag) 903 904 def __radd__(self, y): 905 return self + y 906 907 def __sub__(self, other): 908 if isinstance(other, np.ndarray): 909 return -1 * (other - self) 910 elif hasattr(other, 'real') and hasattr(other, 'imag'): 911 return CObs(self.real - other.real, self.imag - other.imag) 912 else: 913 return CObs(self.real - other, self.imag) 914 915 def __rsub__(self, other): 916 return -1 * (self - other) 917 918 def __mul__(self, other): 919 if isinstance(other, np.ndarray): 920 return other * self 921 elif hasattr(other, 'real') and hasattr(other, 'imag'): 922 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 923 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 924 [self.real, other.real, self.imag, other.imag], 925 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 926 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 927 [self.real, other.real, self.imag, other.imag], 928 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 929 elif getattr(other, 'imag', 0) != 0: 930 return CObs(self.real * other.real - self.imag * other.imag, 931 self.imag * other.real + self.real * other.imag) 932 else: 933 return CObs(self.real * other.real, self.imag * other.real) 934 else: 935 return CObs(self.real * other, self.imag * other) 936 937 def __rmul__(self, other): 938 return self * other 939 940 def __truediv__(self, other): 941 if isinstance(other, np.ndarray): 942 return 1 / (other / self) 943 elif hasattr(other, 'real') and hasattr(other, 'imag'): 944 r = other.real ** 2 + other.imag ** 2 945 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 946 else: 947 return CObs(self.real / other, self.imag / other) 948 949 def __rtruediv__(self, other): 950 r = self.real ** 2 + self.imag ** 2 951 if hasattr(other, 'real') and hasattr(other, 'imag'): 952 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 953 else: 954 return CObs(self.real * other / r, -self.imag * other / r) 955 956 def __abs__(self): 957 return np.sqrt(self.real**2 + self.imag**2) 958 959 def __pos__(self): 960 return self 961 962 def __neg__(self): 963 return -1 * self 964 965 def __eq__(self, other): 966 return self.real == other.real and self.imag == other.imag 967 968 def __str__(self): 969 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 970 971 def __repr__(self): 972 return 'CObs[' + str(self) + ']' 973 974 975def _expand_deltas(deltas, idx, shape): 976 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 977 If idx is of type range, the deltas are not changed 978 979 Parameters 980 ---------- 981 deltas : list 982 List of fluctuations 983 idx : list 984 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 985 shape : int 986 Number of configs in idx. 987 """ 988 if isinstance(idx, range): 989 return deltas 990 else: 991 ret = np.zeros(idx[-1] - idx[0] + 1) 992 for i in range(shape): 993 ret[idx[i] - idx[0]] = deltas[i] 994 return ret 995 996 997def _merge_idx(idl): 998 """Returns the union of all lists in idl as sorted list 999 1000 Parameters 1001 ---------- 1002 idl : list 1003 List of lists or ranges. 1004 """ 1005 1006 # Use groupby to efficiently check whether all elements of idl are identical 1007 try: 1008 g = groupby(idl) 1009 if next(g, True) and not next(g, False): 1010 return idl[0] 1011 except Exception: 1012 pass 1013 1014 if np.all([type(idx) is range for idx in idl]): 1015 if len(set([idx[0] for idx in idl])) == 1: 1016 idstart = min([idx.start for idx in idl]) 1017 idstop = max([idx.stop for idx in idl]) 1018 idstep = min([idx.step for idx in idl]) 1019 return range(idstart, idstop, idstep) 1020 1021 return sorted(set().union(*idl)) 1022 1023 1024def _intersection_idx(idl): 1025 """Returns the intersection of all lists in idl as sorted list 1026 1027 Parameters 1028 ---------- 1029 idl : list 1030 List of lists or ranges. 1031 """ 1032 1033 def _lcm(*args): 1034 """Returns the lowest common multiple of args. 1035 1036 From python 3.9 onwards the math library contains an lcm function.""" 1037 return reduce(lambda a, b: a * b // gcd(a, b), args) 1038 1039 # Use groupby to efficiently check whether all elements of idl are identical 1040 try: 1041 g = groupby(idl) 1042 if next(g, True) and not next(g, False): 1043 return idl[0] 1044 except Exception: 1045 pass 1046 1047 if np.all([type(idx) is range for idx in idl]): 1048 if len(set([idx[0] for idx in idl])) == 1: 1049 idstart = max([idx.start for idx in idl]) 1050 idstop = min([idx.stop for idx in idl]) 1051 idstep = _lcm(*[idx.step for idx in idl]) 1052 return range(idstart, idstop, idstep) 1053 1054 return sorted(set.intersection(*[set(o) for o in idl])) 1055 1056 1057def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1058 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1059 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1060 common divisor of the step sizes is used as new step size. 1061 1062 Parameters 1063 ---------- 1064 deltas : list 1065 List of fluctuations 1066 idx : list 1067 List or range of configs on which the deltas are defined. 1068 Has to be a subset of new_idx and has to be sorted in ascending order. 1069 shape : list 1070 Number of configs in idx. 1071 new_idx : list 1072 List of configs that defines the new range, has to be sorted in ascending order. 1073 """ 1074 1075 if type(idx) is range and type(new_idx) is range: 1076 if idx == new_idx: 1077 return deltas 1078 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1079 for i in range(shape): 1080 ret[idx[i] - new_idx[0]] = deltas[i] 1081 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1082 1083 1084def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): 1085 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. 1086 If idx and new_idx are of type range, the smallest 1087 common divisor of the step sizes is used as new step size. 1088 1089 Parameters 1090 ---------- 1091 deltas : list 1092 List of fluctuations 1093 idx : list 1094 List or range of configs on which the deltas are defined. 1095 Has to be a subset of new_idx and has to be sorted in ascending order. 1096 shape : list 1097 Number of configs in idx. 1098 new_idx : list 1099 List of configs that defines the new range, has to be sorted in ascending order. 1100 """ 1101 1102 if type(idx) is range and type(new_idx) is range: 1103 if idx == new_idx: 1104 return deltas 1105 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1106 for i in range(shape): 1107 if idx[i] in new_idx: 1108 ret[idx[i] - new_idx[0]] = deltas[i] 1109 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1110 1111 1112def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1113 """Filter out all configurations with vanishing fluctuation such that they do not 1114 contribute to the error estimate anymore. Returns the new deltas and 1115 idx according to the filtering. 1116 A fluctuation is considered to be vanishing, if it is smaller than eps times 1117 the mean of the absolute values of all deltas in one list. 1118 1119 Parameters 1120 ---------- 1121 deltas : list 1122 List of fluctuations 1123 idx : list 1124 List or ranges of configs on which the deltas are defined. 1125 eps : float 1126 Prefactor that enters the filter criterion. 1127 """ 1128 new_deltas = [] 1129 new_idx = [] 1130 maxd = np.mean(np.fabs(deltas)) 1131 for i in range(len(deltas)): 1132 if abs(deltas[i]) > eps * maxd: 1133 new_deltas.append(deltas[i]) 1134 new_idx.append(idx[i]) 1135 if new_idx: 1136 return np.array(new_deltas), new_idx 1137 else: 1138 return deltas, idx 1139 1140 1141def derived_observable(func, data, array_mode=False, **kwargs): 1142 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1143 1144 Parameters 1145 ---------- 1146 func : object 1147 arbitrary function of the form func(data, **kwargs). For the 1148 automatic differentiation to work, all numpy functions have to have 1149 the autograd wrapper (use 'import autograd.numpy as anp'). 1150 data : list 1151 list of Obs, e.g. [obs1, obs2, obs3]. 1152 num_grad : bool 1153 if True, numerical derivatives are used instead of autograd 1154 (default False). To control the numerical differentiation the 1155 kwargs of numdifftools.step_generators.MaxStepGenerator 1156 can be used. 1157 man_grad : list 1158 manually supply a list or an array which contains the jacobian 1159 of func. Use cautiously, supplying the wrong derivative will 1160 not be intercepted. 1161 1162 Notes 1163 ----- 1164 For simple mathematical operations it can be practical to use anonymous 1165 functions. For the ratio of two observables one can e.g. use 1166 1167 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1168 """ 1169 1170 data = np.asarray(data) 1171 raveled_data = data.ravel() 1172 1173 # Workaround for matrix operations containing non Obs data 1174 if not all(isinstance(x, Obs) for x in raveled_data): 1175 for i in range(len(raveled_data)): 1176 if isinstance(raveled_data[i], (int, float)): 1177 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1178 1179 allcov = {} 1180 for o in raveled_data: 1181 for name in o.cov_names: 1182 if name in allcov: 1183 if not np.allclose(allcov[name], o.covobs[name].cov): 1184 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1185 else: 1186 allcov[name] = o.covobs[name].cov 1187 1188 n_obs = len(raveled_data) 1189 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1190 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1191 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1192 1193 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1194 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1195 1196 if data.ndim == 1: 1197 values = np.array([o.value for o in data]) 1198 else: 1199 values = np.vectorize(lambda x: x.value)(data) 1200 1201 new_values = func(values, **kwargs) 1202 1203 multi = int(isinstance(new_values, np.ndarray)) 1204 1205 new_r_values = {} 1206 new_idl_d = {} 1207 for name in new_sample_names: 1208 idl = [] 1209 tmp_values = np.zeros(n_obs) 1210 for i, item in enumerate(raveled_data): 1211 tmp_values[i] = item.r_values.get(name, item.value) 1212 tmp_idl = item.idl.get(name) 1213 if tmp_idl is not None: 1214 idl.append(tmp_idl) 1215 if multi > 0: 1216 tmp_values = np.array(tmp_values).reshape(data.shape) 1217 new_r_values[name] = func(tmp_values, **kwargs) 1218 new_idl_d[name] = _merge_idx(idl) 1219 if not is_merged[name]: 1220 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1221 1222 if 'man_grad' in kwargs: 1223 deriv = np.asarray(kwargs.get('man_grad')) 1224 if new_values.shape + data.shape != deriv.shape: 1225 raise Exception('Manual derivative does not have correct shape.') 1226 elif kwargs.get('num_grad') is True: 1227 if multi > 0: 1228 raise Exception('Multi mode currently not supported for numerical derivative') 1229 options = { 1230 'base_step': 0.1, 1231 'step_ratio': 2.5} 1232 for key in options.keys(): 1233 kwarg = kwargs.get(key) 1234 if kwarg is not None: 1235 options[key] = kwarg 1236 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1237 if tmp_df.size == 1: 1238 deriv = np.array([tmp_df.real]) 1239 else: 1240 deriv = tmp_df.real 1241 else: 1242 deriv = jacobian(func)(values, **kwargs) 1243 1244 final_result = np.zeros(new_values.shape, dtype=object) 1245 1246 if array_mode is True: 1247 1248 class _Zero_grad(): 1249 def __init__(self, N): 1250 self.grad = np.zeros((N, 1)) 1251 1252 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1253 d_extracted = {} 1254 g_extracted = {} 1255 for name in new_sample_names: 1256 d_extracted[name] = [] 1257 ens_length = len(new_idl_d[name]) 1258 for i_dat, dat in enumerate(data): 1259 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1260 for name in new_cov_names: 1261 g_extracted[name] = [] 1262 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1263 for i_dat, dat in enumerate(data): 1264 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1265 1266 for i_val, new_val in np.ndenumerate(new_values): 1267 new_deltas = {} 1268 new_grad = {} 1269 if array_mode is True: 1270 for name in new_sample_names: 1271 ens_length = d_extracted[name][0].shape[-1] 1272 new_deltas[name] = np.zeros(ens_length) 1273 for i_dat, dat in enumerate(d_extracted[name]): 1274 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1275 for name in new_cov_names: 1276 new_grad[name] = 0 1277 for i_dat, dat in enumerate(g_extracted[name]): 1278 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1279 else: 1280 for j_obs, obs in np.ndenumerate(data): 1281 for name in obs.names: 1282 if name in obs.cov_names: 1283 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1284 else: 1285 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1286 1287 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1288 1289 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1290 raise Exception('The same name has been used for deltas and covobs!') 1291 new_samples = [] 1292 new_means = [] 1293 new_idl = [] 1294 new_names_obs = [] 1295 for name in new_names: 1296 if name not in new_covobs: 1297 if is_merged[name]: 1298 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1299 else: 1300 filtered_deltas = new_deltas[name] 1301 filtered_idl_d = new_idl_d[name] 1302 1303 new_samples.append(filtered_deltas) 1304 new_idl.append(filtered_idl_d) 1305 new_means.append(new_r_values[name][i_val]) 1306 new_names_obs.append(name) 1307 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1308 for name in new_covobs: 1309 final_result[i_val].names.append(name) 1310 final_result[i_val]._covobs = new_covobs 1311 final_result[i_val]._value = new_val 1312 final_result[i_val].is_merged = is_merged 1313 final_result[i_val].reweighted = reweighted 1314 1315 if multi == 0: 1316 final_result = final_result.item() 1317 1318 return final_result 1319 1320 1321def _reduce_deltas(deltas, idx_old, idx_new): 1322 """Extract deltas defined on idx_old on all configs of idx_new. 1323 1324 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1325 are ordered in an ascending order. 1326 1327 Parameters 1328 ---------- 1329 deltas : list 1330 List of fluctuations 1331 idx_old : list 1332 List or range of configs on which the deltas are defined 1333 idx_new : list 1334 List of configs for which we want to extract the deltas. 1335 Has to be a subset of idx_old. 1336 """ 1337 if not len(deltas) == len(idx_old): 1338 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1339 if type(idx_old) is range and type(idx_new) is range: 1340 if idx_old == idx_new: 1341 return deltas 1342 shape = len(idx_new) 1343 ret = np.zeros(shape) 1344 oldpos = 0 1345 for i in range(shape): 1346 pos = -1 1347 for j in range(oldpos, len(idx_old)): 1348 if idx_old[j] == idx_new[i]: 1349 pos = j 1350 break 1351 if pos < 0: 1352 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1353 ret[i] = deltas[pos] 1354 oldpos = pos 1355 return np.array(ret) 1356 1357 1358def reweight(weight, obs, **kwargs): 1359 """Reweight a list of observables. 1360 1361 Parameters 1362 ---------- 1363 weight : Obs 1364 Reweighting factor. An Observable that has to be defined on a superset of the 1365 configurations in obs[i].idl for all i. 1366 obs : list 1367 list of Obs, e.g. [obs1, obs2, obs3]. 1368 all_configs : bool 1369 if True, the reweighted observables are normalized by the average of 1370 the reweighting factor on all configurations in weight.idl and not 1371 on the configurations in obs[i].idl. Default False. 1372 """ 1373 result = [] 1374 for i in range(len(obs)): 1375 if len(obs[i].cov_names): 1376 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1377 if not set(obs[i].names).issubset(weight.names): 1378 raise Exception('Error: Ensembles do not fit') 1379 for name in obs[i].names: 1380 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1381 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1382 new_samples = [] 1383 w_deltas = {} 1384 for name in sorted(obs[i].names): 1385 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1386 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1387 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1388 1389 if kwargs.get('all_configs'): 1390 new_weight = weight 1391 else: 1392 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1393 1394 result.append(tmp_obs / new_weight) 1395 result[-1].reweighted = True 1396 result[-1].is_merged = obs[i].is_merged 1397 1398 return result 1399 1400 1401def correlate(obs_a, obs_b): 1402 """Correlate two observables. 1403 1404 Parameters 1405 ---------- 1406 obs_a : Obs 1407 First observable 1408 obs_b : Obs 1409 Second observable 1410 1411 Notes 1412 ----- 1413 Keep in mind to only correlate primary observables which have not been reweighted 1414 yet. The reweighting has to be applied after correlating the observables. 1415 Currently only works if ensembles are identical (this is not strictly necessary). 1416 """ 1417 1418 if sorted(obs_a.names) != sorted(obs_b.names): 1419 raise Exception('Ensembles do not fit') 1420 if len(obs_a.cov_names) or len(obs_b.cov_names): 1421 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1422 for name in obs_a.names: 1423 if obs_a.shape[name] != obs_b.shape[name]: 1424 raise Exception('Shapes of ensemble', name, 'do not fit') 1425 if obs_a.idl[name] != obs_b.idl[name]: 1426 raise Exception('idl of ensemble', name, 'do not fit') 1427 1428 if obs_a.reweighted is True: 1429 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1430 if obs_b.reweighted is True: 1431 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1432 1433 new_samples = [] 1434 new_idl = [] 1435 for name in sorted(obs_a.names): 1436 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1437 new_idl.append(obs_a.idl[name]) 1438 1439 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1440 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1441 o.reweighted = obs_a.reweighted or obs_b.reweighted 1442 return o 1443 1444 1445def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1446 r'''Calculates the error covariance matrix of a set of observables. 1447 1448 The gamma method has to be applied first to all observables. 1449 1450 Parameters 1451 ---------- 1452 obs : list or numpy.ndarray 1453 List or one dimensional array of Obs 1454 visualize : bool 1455 If True plots the corresponding normalized correlation matrix (default False). 1456 correlation : bool 1457 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1458 smooth : None or int 1459 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1460 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1461 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1462 small ones. 1463 1464 Notes 1465 ----- 1466 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1467 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1468 in the absence of autocorrelation. 1469 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1470 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1471 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1472 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1473 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1474 ''' 1475 1476 length = len(obs) 1477 1478 max_samples = np.max([o.N for o in obs]) 1479 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1480 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1481 1482 cov = np.zeros((length, length)) 1483 for i in range(length): 1484 for j in range(i, length): 1485 cov[i, j] = _covariance_element(obs[i], obs[j]) 1486 cov = cov + cov.T - np.diag(np.diag(cov)) 1487 1488 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1489 1490 if isinstance(smooth, int): 1491 corr = _smooth_eigenvalues(corr, smooth) 1492 1493 if visualize: 1494 plt.matshow(corr, vmin=-1, vmax=1) 1495 plt.set_cmap('RdBu') 1496 plt.colorbar() 1497 plt.draw() 1498 1499 if correlation is True: 1500 return corr 1501 1502 errors = [o.dvalue for o in obs] 1503 cov = np.diag(errors) @ corr @ np.diag(errors) 1504 1505 eigenvalues = np.linalg.eigh(cov)[0] 1506 if not np.all(eigenvalues >= 0): 1507 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1508 1509 return cov 1510 1511 1512def _smooth_eigenvalues(corr, E): 1513 """Eigenvalue smoothing as described in hep-lat/9412087 1514 1515 corr : np.ndarray 1516 correlation matrix 1517 E : integer 1518 Number of eigenvalues to be left substantially unchanged 1519 """ 1520 if not (2 < E < corr.shape[0] - 1): 1521 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1522 vals, vec = np.linalg.eigh(corr) 1523 lambda_min = np.mean(vals[:-E]) 1524 vals[vals < lambda_min] = lambda_min 1525 vals /= np.mean(vals) 1526 return vec @ np.diag(vals) @ vec.T 1527 1528 1529def _covariance_element(obs1, obs2): 1530 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1531 1532 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1533 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1534 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1535 return np.sum(deltas1 * deltas2) 1536 1537 if set(obs1.names).isdisjoint(set(obs2.names)): 1538 return 0.0 1539 1540 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1541 raise Exception('The gamma method has to be applied to both Obs first.') 1542 1543 dvalue = 0.0 1544 1545 for e_name in obs1.mc_names: 1546 1547 if e_name not in obs2.mc_names: 1548 continue 1549 1550 idl_d = {} 1551 for r_name in obs1.e_content[e_name]: 1552 if r_name not in obs2.e_content[e_name]: 1553 continue 1554 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1555 1556 gamma = 0.0 1557 1558 for r_name in obs1.e_content[e_name]: 1559 if r_name not in obs2.e_content[e_name]: 1560 continue 1561 if len(idl_d[r_name]) == 0: 1562 continue 1563 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1564 1565 if gamma == 0.0: 1566 continue 1567 1568 gamma_div = 0.0 1569 for r_name in obs1.e_content[e_name]: 1570 if r_name not in obs2.e_content[e_name]: 1571 continue 1572 if len(idl_d[r_name]) == 0: 1573 continue 1574 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1575 gamma /= gamma_div 1576 1577 dvalue += gamma 1578 1579 for e_name in obs1.cov_names: 1580 1581 if e_name not in obs2.cov_names: 1582 continue 1583 1584 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1585 1586 return dvalue 1587 1588 1589def import_jackknife(jacks, name, idl=None): 1590 """Imports jackknife samples and returns an Obs 1591 1592 Parameters 1593 ---------- 1594 jacks : numpy.ndarray 1595 numpy array containing the mean value as zeroth entry and 1596 the N jackknife samples as first to Nth entry. 1597 name : str 1598 name of the ensemble the samples are defined on. 1599 """ 1600 length = len(jacks) - 1 1601 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1602 samples = jacks[1:] @ prj 1603 mean = np.mean(samples) 1604 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1605 new_obs._value = jacks[0] 1606 return new_obs 1607 1608 1609def merge_obs(list_of_obs): 1610 """Combine all observables in list_of_obs into one new observable 1611 1612 Parameters 1613 ---------- 1614 list_of_obs : list 1615 list of the Obs object to be combined 1616 1617 Notes 1618 ----- 1619 It is not possible to combine obs which are based on the same replicum 1620 """ 1621 replist = [item for obs in list_of_obs for item in obs.names] 1622 if (len(replist) == len(set(replist))) is False: 1623 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1624 if any([len(o.cov_names) for o in list_of_obs]): 1625 raise Exception('Not possible to merge data that contains covobs!') 1626 new_dict = {} 1627 idl_dict = {} 1628 for o in list_of_obs: 1629 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1630 for key in set(o.deltas) | set(o.r_values)}) 1631 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1632 1633 names = sorted(new_dict.keys()) 1634 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1635 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1636 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1637 return o 1638 1639 1640def cov_Obs(means, cov, name, grad=None): 1641 """Create an Obs based on mean(s) and a covariance matrix 1642 1643 Parameters 1644 ---------- 1645 mean : list of floats or float 1646 N mean value(s) of the new Obs 1647 cov : list or array 1648 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1649 name : str 1650 identifier for the covariance matrix 1651 grad : list or array 1652 Gradient of the Covobs wrt. the means belonging to cov. 1653 """ 1654 1655 def covobs_to_obs(co): 1656 """Make an Obs out of a Covobs 1657 1658 Parameters 1659 ---------- 1660 co : Covobs 1661 Covobs to be embedded into the Obs 1662 """ 1663 o = Obs([], [], means=[]) 1664 o._value = co.value 1665 o.names.append(co.name) 1666 o._covobs[co.name] = co 1667 o._dvalue = np.sqrt(co.errsq()) 1668 return o 1669 1670 ol = [] 1671 if isinstance(means, (float, int)): 1672 means = [means] 1673 1674 for i in range(len(means)): 1675 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1676 if ol[0].covobs[name].N != len(means): 1677 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1678 if len(ol) == 1: 1679 return ol[0] 1680 return ol
17class Obs: 18 """Class for a general observable. 19 20 Instances of Obs are the basic objects of a pyerrors error analysis. 21 They are initialized with a list which contains arrays of samples for 22 different ensembles/replica and another list of same length which contains 23 the names of the ensembles/replica. Mathematical operations can be 24 performed on instances. The result is another instance of Obs. The error of 25 an instance can be computed with the gamma_method. Also contains additional 26 methods for output and visualization of the error calculation. 27 28 Attributes 29 ---------- 30 S_global : float 31 Standard value for S (default 2.0) 32 S_dict : dict 33 Dictionary for S values. If an entry for a given ensemble 34 exists this overwrites the standard value for that ensemble. 35 tau_exp_global : float 36 Standard value for tau_exp (default 0.0) 37 tau_exp_dict : dict 38 Dictionary for tau_exp values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 N_sigma_global : float 41 Standard value for N_sigma (default 1.0) 42 N_sigma_dict : dict 43 Dictionary for N_sigma values. If an entry for a given ensemble exists 44 this overwrites the standard value for that ensemble. 45 """ 46 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 47 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 48 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 49 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 50 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 51 52 S_global = 2.0 53 S_dict = {} 54 tau_exp_global = 0.0 55 tau_exp_dict = {} 56 N_sigma_global = 1.0 57 N_sigma_dict = {} 58 filter_eps = 1e-10 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 gaps = [] 272 for r_name in e_content[e_name]: 273 if isinstance(self.idl[r_name], range): 274 gaps.append(1) 275 else: 276 gaps.append(np.min(np.diff(self.idl[r_name]))) 277 278 if not np.all([gi == gaps[0] for gi in gaps]): 279 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 280 else: 281 gapsize = gaps[0] 282 283 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 284 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 285 # Make sure no entry of tauint is smaller than 0.5 286 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 287 # hep-lat/0306017 eq. (42) 288 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 289 self.e_n_dtauint[e_name][0] = 0.0 290 291 def _compute_drho(i): 292 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 293 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 294 295 _compute_drho(gapsize) 296 if self.tau_exp[e_name] > 0: 297 texp = self.tau_exp[e_name] 298 # Critical slowing down analysis 299 if w_max // 2 <= 1: 300 raise Exception("Need at least 8 samples for tau_exp error analysis") 301 for n in range(gapsize, w_max // 2, gapsize): 302 _compute_drho(n + gapsize) 303 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 304 # Bias correction hep-lat/0306017 eq. (49) included 305 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 306 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 307 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 308 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 310 self.e_windowsize[e_name] = n 311 break 312 else: 313 if self.S[e_name] == 0.0: 314 self.e_tauint[e_name] = 0.5 315 self.e_dtauint[e_name] = 0.0 316 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 318 self.e_windowsize[e_name] = 0 319 else: 320 # Standard automatic windowing procedure 321 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 322 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 323 for n in range(1, w_max): 324 if n < w_max // 2 - 2: 325 _compute_drho(gapsize * n + gapsize) 326 if g_w[n - 1] < 0 or n >= w_max - 1: 327 n *= gapsize 328 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 329 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 330 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 331 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 332 self.e_windowsize[e_name] = n 333 break 334 335 self._dvalue += self.e_dvalue[e_name] ** 2 336 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 337 338 for e_name in self.cov_names: 339 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 340 self.e_ddvalue[e_name] = 0 341 self._dvalue += self.e_dvalue[e_name]**2 342 343 self._dvalue = np.sqrt(self._dvalue) 344 if self._dvalue == 0.0: 345 self.ddvalue = 0.0 346 else: 347 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 348 return 349 350 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 351 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 352 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 353 354 Parameters 355 ---------- 356 deltas : list 357 List of fluctuations 358 idx : list 359 List or range of configurations on which the deltas are defined. 360 shape : int 361 Number of configurations in idx. 362 w_max : int 363 Upper bound for the summation window. 364 fft : bool 365 determines whether the fft algorithm is used for the computation 366 of the autocorrelation function. 367 """ 368 gamma = np.zeros(w_max) 369 deltas = _expand_deltas(deltas, idx, shape) 370 new_shape = len(deltas) 371 if fft: 372 max_gamma = min(new_shape, w_max) 373 # The padding for the fft has to be even 374 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 375 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 376 else: 377 for n in range(w_max): 378 if new_shape - n >= 0: 379 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 380 381 return gamma 382 383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 for e_name in self.mc_names: 404 if len(self.e_names) > 1: 405 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 406 if self.tau_exp[e_name] > 0: 407 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 408 else: 409 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 410 for e_name in self.cov_names: 411 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 412 if ens_content is True: 413 if len(self.e_names) == 1: 414 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 415 else: 416 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 417 my_string_list = [] 418 for key, value in sorted(self.e_content.items()): 419 if key not in self.covobs: 420 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 421 if len(value) == 1: 422 my_string += f': {self.shape[value[0]]} configurations' 423 if isinstance(self.idl[value[0]], range): 424 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 425 else: 426 my_string += ' (irregular range)' 427 else: 428 sublist = [] 429 for v in value: 430 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 431 my_substring += f': {self.shape[v]} configurations' 432 if isinstance(self.idl[v], range): 433 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 434 else: 435 my_substring += ' (irregular range)' 436 sublist.append(my_substring) 437 438 my_string += '\n' + '\n'.join(sublist) 439 else: 440 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 441 my_string_list.append(my_string) 442 print('\n'.join(my_string_list)) 443 444 def reweight(self, weight): 445 """Reweight the obs with given rewighting factors. 446 447 Parameters 448 ---------- 449 weight : Obs 450 Reweighting factor. An Observable that has to be defined on a superset of the 451 configurations in obs[i].idl for all i. 452 all_configs : bool 453 if True, the reweighted observables are normalized by the average of 454 the reweighting factor on all configurations in weight.idl and not 455 on the configurations in obs[i].idl. Default False. 456 """ 457 return reweight(weight, [self])[0] 458 459 def is_zero_within_error(self, sigma=1): 460 """Checks whether the observable is zero within 'sigma' standard errors. 461 462 Parameters 463 ---------- 464 sigma : int 465 Number of standard errors used for the check. 466 467 Works only properly when the gamma method was run. 468 """ 469 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 470 471 def is_zero(self, atol=1e-10): 472 """Checks whether the observable is zero within a given tolerance. 473 474 Parameters 475 ---------- 476 atol : float 477 Absolute tolerance (for details see numpy documentation). 478 """ 479 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 480 481 def plot_tauint(self, save=None): 482 """Plot integrated autocorrelation time for each ensemble. 483 484 Parameters 485 ---------- 486 save : str 487 saves the figure to a file named 'save' if. 488 """ 489 if not hasattr(self, 'e_dvalue'): 490 raise Exception('Run the gamma method first.') 491 492 for e, e_name in enumerate(self.mc_names): 493 fig = plt.figure() 494 plt.xlabel(r'$W$') 495 plt.ylabel(r'$\tau_\mathrm{int}$') 496 length = int(len(self.e_n_tauint[e_name])) 497 if self.tau_exp[e_name] > 0: 498 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 499 x_help = np.arange(2 * self.tau_exp[e_name]) 500 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 501 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 502 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 503 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 504 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 505 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 506 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 507 else: 508 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 509 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 510 511 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 512 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 513 plt.legend() 514 plt.xlim(-0.5, xmax) 515 ylim = plt.ylim() 516 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 517 plt.draw() 518 if save: 519 fig.savefig(save + "_" + str(e)) 520 521 def plot_rho(self, save=None): 522 """Plot normalized autocorrelation function time for each ensemble. 523 524 Parameters 525 ---------- 526 save : str 527 saves the figure to a file named 'save' if. 528 """ 529 if not hasattr(self, 'e_dvalue'): 530 raise Exception('Run the gamma method first.') 531 for e, e_name in enumerate(self.mc_names): 532 fig = plt.figure() 533 plt.xlabel('W') 534 plt.ylabel('rho') 535 length = int(len(self.e_drho[e_name])) 536 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 537 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 538 if self.tau_exp[e_name] > 0: 539 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 540 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 541 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 542 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 543 else: 544 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 545 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 546 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 547 plt.xlim(-0.5, xmax) 548 plt.draw() 549 if save: 550 fig.savefig(save + "_" + str(e)) 551 552 def plot_rep_dist(self): 553 """Plot replica distribution for each ensemble with more than one replicum.""" 554 if not hasattr(self, 'e_dvalue'): 555 raise Exception('Run the gamma method first.') 556 for e, e_name in enumerate(self.mc_names): 557 if len(self.e_content[e_name]) == 1: 558 print('No replica distribution for a single replicum (', e_name, ')') 559 continue 560 r_length = [] 561 sub_r_mean = 0 562 for r, r_name in enumerate(self.e_content[e_name]): 563 r_length.append(len(self.deltas[r_name])) 564 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 565 e_N = np.sum(r_length) 566 sub_r_mean /= e_N 567 arr = np.zeros(len(self.e_content[e_name])) 568 for r, r_name in enumerate(self.e_content[e_name]): 569 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 570 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 571 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 572 plt.draw() 573 574 def plot_history(self, expand=True): 575 """Plot derived Monte Carlo history for each ensemble 576 577 Parameters 578 ---------- 579 expand : bool 580 show expanded history for irregular Monte Carlo chains (default: True). 581 """ 582 for e, e_name in enumerate(self.mc_names): 583 plt.figure() 584 r_length = [] 585 tmp = [] 586 tmp_expanded = [] 587 for r, r_name in enumerate(self.e_content[e_name]): 588 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 589 if expand: 590 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 591 r_length.append(len(tmp_expanded[-1])) 592 else: 593 r_length.append(len(tmp[-1])) 594 e_N = np.sum(r_length) 595 x = np.arange(e_N) 596 y_test = np.concatenate(tmp, axis=0) 597 if expand: 598 y = np.concatenate(tmp_expanded, axis=0) 599 else: 600 y = y_test 601 plt.errorbar(x, y, fmt='.', markersize=3) 602 plt.xlim(-0.5, e_N - 0.5) 603 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 604 plt.draw() 605 606 def plot_piechart(self, save=None): 607 """Plot piechart which shows the fractional contribution of each 608 ensemble to the error and returns a dictionary containing the fractions. 609 610 Parameters 611 ---------- 612 save : str 613 saves the figure to a file named 'save' if. 614 """ 615 if not hasattr(self, 'e_dvalue'): 616 raise Exception('Run the gamma method first.') 617 if np.isclose(0.0, self._dvalue, atol=1e-15): 618 raise Exception('Error is 0.0') 619 labels = self.e_names 620 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 621 fig1, ax1 = plt.subplots() 622 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 623 ax1.axis('equal') 624 plt.draw() 625 if save: 626 fig1.savefig(save) 627 628 return dict(zip(self.e_names, sizes)) 629 630 def dump(self, filename, datatype="json.gz", description="", **kwargs): 631 """Dump the Obs to a file 'name' of chosen format. 632 633 Parameters 634 ---------- 635 filename : str 636 name of the file to be saved. 637 datatype : str 638 Format of the exported file. Supported formats include 639 "json.gz" and "pickle" 640 description : str 641 Description for output file, only relevant for json.gz format. 642 path : str 643 specifies a custom path for the file (default '.') 644 """ 645 if 'path' in kwargs: 646 file_name = kwargs.get('path') + '/' + filename 647 else: 648 file_name = filename 649 650 if datatype == "json.gz": 651 from .input.json import dump_to_json 652 dump_to_json([self], file_name, description=description) 653 elif datatype == "pickle": 654 with open(file_name + '.p', 'wb') as fb: 655 pickle.dump(self, fb) 656 else: 657 raise Exception("Unknown datatype " + str(datatype)) 658 659 def export_jackknife(self): 660 """Export jackknife samples from the Obs 661 662 Returns 663 ------- 664 numpy.ndarray 665 Returns a numpy array of length N + 1 where N is the number of samples 666 for the given ensemble and replicum. The zeroth entry of the array contains 667 the mean value of the Obs, entries 1 to N contain the N jackknife samples 668 derived from the Obs. The current implementation only works for observables 669 defined on exactly one ensemble and replicum. The derived jackknife samples 670 should agree with samples from a full jackknife analysis up to O(1/N). 671 """ 672 673 if len(self.names) != 1: 674 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 675 676 name = self.names[0] 677 full_data = self.deltas[name] + self.r_values[name] 678 n = full_data.size 679 mean = self.value 680 tmp_jacks = np.zeros(n + 1) 681 tmp_jacks[0] = mean 682 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 683 return tmp_jacks 684 685 def __float__(self): 686 return float(self.value) 687 688 def __repr__(self): 689 return 'Obs[' + str(self) + ']' 690 691 def __str__(self): 692 if self._dvalue == 0.0: 693 return str(self.value) 694 fexp = np.floor(np.log10(self._dvalue)) 695 if fexp < 0.0: 696 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 697 elif fexp == 0.0: 698 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 699 else: 700 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 701 702 def __hash__(self): 703 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 704 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 705 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 706 hash_tuple += tuple([o.encode() for o in self.names]) 707 m = hashlib.md5() 708 [m.update(o) for o in hash_tuple] 709 return int(m.hexdigest(), 16) & 0xFFFFFFFF 710 711 # Overload comparisons 712 def __lt__(self, other): 713 return self.value < other 714 715 def __le__(self, other): 716 return self.value <= other 717 718 def __gt__(self, other): 719 return self.value > other 720 721 def __ge__(self, other): 722 return self.value >= other 723 724 def __eq__(self, other): 725 return (self - other).is_zero() 726 727 def __ne__(self, other): 728 return not (self - other).is_zero() 729 730 # Overload math operations 731 def __add__(self, y): 732 if isinstance(y, Obs): 733 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 734 else: 735 if isinstance(y, np.ndarray): 736 return np.array([self + o for o in y]) 737 elif y.__class__.__name__ in ['Corr', 'CObs']: 738 return NotImplemented 739 else: 740 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 741 742 def __radd__(self, y): 743 return self + y 744 745 def __mul__(self, y): 746 if isinstance(y, Obs): 747 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 748 else: 749 if isinstance(y, np.ndarray): 750 return np.array([self * o for o in y]) 751 elif isinstance(y, complex): 752 return CObs(self * y.real, self * y.imag) 753 elif y.__class__.__name__ in ['Corr', 'CObs']: 754 return NotImplemented 755 else: 756 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 757 758 def __rmul__(self, y): 759 return self * y 760 761 def __sub__(self, y): 762 if isinstance(y, Obs): 763 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 764 else: 765 if isinstance(y, np.ndarray): 766 return np.array([self - o for o in y]) 767 elif y.__class__.__name__ in ['Corr', 'CObs']: 768 return NotImplemented 769 else: 770 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 771 772 def __rsub__(self, y): 773 return -1 * (self - y) 774 775 def __pos__(self): 776 return self 777 778 def __neg__(self): 779 return -1 * self 780 781 def __truediv__(self, y): 782 if isinstance(y, Obs): 783 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 784 else: 785 if isinstance(y, np.ndarray): 786 return np.array([self / o for o in y]) 787 elif y.__class__.__name__ in ['Corr', 'CObs']: 788 return NotImplemented 789 else: 790 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 791 792 def __rtruediv__(self, y): 793 if isinstance(y, Obs): 794 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 795 else: 796 if isinstance(y, np.ndarray): 797 return np.array([o / self for o in y]) 798 elif y.__class__.__name__ in ['Corr', 'CObs']: 799 return NotImplemented 800 else: 801 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 802 803 def __pow__(self, y): 804 if isinstance(y, Obs): 805 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 806 else: 807 return derived_observable(lambda x: x[0] ** y, [self]) 808 809 def __rpow__(self, y): 810 if isinstance(y, Obs): 811 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 812 else: 813 return derived_observable(lambda x: y ** x[0], [self]) 814 815 def __abs__(self): 816 return derived_observable(lambda x: anp.abs(x[0]), [self]) 817 818 # Overload numpy functions 819 def sqrt(self): 820 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 821 822 def log(self): 823 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 824 825 def exp(self): 826 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 827 828 def sin(self): 829 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 830 831 def cos(self): 832 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 833 834 def tan(self): 835 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 836 837 def arcsin(self): 838 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 839 840 def arccos(self): 841 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 842 843 def arctan(self): 844 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 845 846 def sinh(self): 847 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 848 849 def cosh(self): 850 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 851 852 def tanh(self): 853 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 854 855 def arcsinh(self): 856 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 857 858 def arccosh(self): 859 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 860 861 def arctanh(self): 862 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 gaps = [] 272 for r_name in e_content[e_name]: 273 if isinstance(self.idl[r_name], range): 274 gaps.append(1) 275 else: 276 gaps.append(np.min(np.diff(self.idl[r_name]))) 277 278 if not np.all([gi == gaps[0] for gi in gaps]): 279 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 280 else: 281 gapsize = gaps[0] 282 283 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 284 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 285 # Make sure no entry of tauint is smaller than 0.5 286 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 287 # hep-lat/0306017 eq. (42) 288 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 289 self.e_n_dtauint[e_name][0] = 0.0 290 291 def _compute_drho(i): 292 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 293 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 294 295 _compute_drho(gapsize) 296 if self.tau_exp[e_name] > 0: 297 texp = self.tau_exp[e_name] 298 # Critical slowing down analysis 299 if w_max // 2 <= 1: 300 raise Exception("Need at least 8 samples for tau_exp error analysis") 301 for n in range(gapsize, w_max // 2, gapsize): 302 _compute_drho(n + gapsize) 303 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 304 # Bias correction hep-lat/0306017 eq. (49) included 305 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 306 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 307 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 308 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 310 self.e_windowsize[e_name] = n 311 break 312 else: 313 if self.S[e_name] == 0.0: 314 self.e_tauint[e_name] = 0.5 315 self.e_dtauint[e_name] = 0.0 316 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 318 self.e_windowsize[e_name] = 0 319 else: 320 # Standard automatic windowing procedure 321 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 322 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 323 for n in range(1, w_max): 324 if n < w_max // 2 - 2: 325 _compute_drho(gapsize * n + gapsize) 326 if g_w[n - 1] < 0 or n >= w_max - 1: 327 n *= gapsize 328 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 329 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 330 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 331 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 332 self.e_windowsize[e_name] = n 333 break 334 335 self._dvalue += self.e_dvalue[e_name] ** 2 336 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 337 338 for e_name in self.cov_names: 339 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 340 self.e_ddvalue[e_name] = 0 341 self._dvalue += self.e_dvalue[e_name]**2 342 343 self._dvalue = np.sqrt(self._dvalue) 344 if self._dvalue == 0.0: 345 self.ddvalue = 0.0 346 else: 347 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 348 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 for e_name in self.mc_names: 404 if len(self.e_names) > 1: 405 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 406 if self.tau_exp[e_name] > 0: 407 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 408 else: 409 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 410 for e_name in self.cov_names: 411 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 412 if ens_content is True: 413 if len(self.e_names) == 1: 414 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 415 else: 416 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 417 my_string_list = [] 418 for key, value in sorted(self.e_content.items()): 419 if key not in self.covobs: 420 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 421 if len(value) == 1: 422 my_string += f': {self.shape[value[0]]} configurations' 423 if isinstance(self.idl[value[0]], range): 424 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 425 else: 426 my_string += ' (irregular range)' 427 else: 428 sublist = [] 429 for v in value: 430 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 431 my_substring += f': {self.shape[v]} configurations' 432 if isinstance(self.idl[v], range): 433 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 434 else: 435 my_substring += ' (irregular range)' 436 sublist.append(my_substring) 437 438 my_string += '\n' + '\n'.join(sublist) 439 else: 440 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 441 my_string_list.append(my_string) 442 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
444 def reweight(self, weight): 445 """Reweight the obs with given rewighting factors. 446 447 Parameters 448 ---------- 449 weight : Obs 450 Reweighting factor. An Observable that has to be defined on a superset of the 451 configurations in obs[i].idl for all i. 452 all_configs : bool 453 if True, the reweighted observables are normalized by the average of 454 the reweighting factor on all configurations in weight.idl and not 455 on the configurations in obs[i].idl. Default False. 456 """ 457 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
459 def is_zero_within_error(self, sigma=1): 460 """Checks whether the observable is zero within 'sigma' standard errors. 461 462 Parameters 463 ---------- 464 sigma : int 465 Number of standard errors used for the check. 466 467 Works only properly when the gamma method was run. 468 """ 469 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
471 def is_zero(self, atol=1e-10): 472 """Checks whether the observable is zero within a given tolerance. 473 474 Parameters 475 ---------- 476 atol : float 477 Absolute tolerance (for details see numpy documentation). 478 """ 479 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
481 def plot_tauint(self, save=None): 482 """Plot integrated autocorrelation time for each ensemble. 483 484 Parameters 485 ---------- 486 save : str 487 saves the figure to a file named 'save' if. 488 """ 489 if not hasattr(self, 'e_dvalue'): 490 raise Exception('Run the gamma method first.') 491 492 for e, e_name in enumerate(self.mc_names): 493 fig = plt.figure() 494 plt.xlabel(r'$W$') 495 plt.ylabel(r'$\tau_\mathrm{int}$') 496 length = int(len(self.e_n_tauint[e_name])) 497 if self.tau_exp[e_name] > 0: 498 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 499 x_help = np.arange(2 * self.tau_exp[e_name]) 500 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 501 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 502 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 503 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 504 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 505 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 506 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 507 else: 508 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 509 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 510 511 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 512 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 513 plt.legend() 514 plt.xlim(-0.5, xmax) 515 ylim = plt.ylim() 516 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 517 plt.draw() 518 if save: 519 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
521 def plot_rho(self, save=None): 522 """Plot normalized autocorrelation function time for each ensemble. 523 524 Parameters 525 ---------- 526 save : str 527 saves the figure to a file named 'save' if. 528 """ 529 if not hasattr(self, 'e_dvalue'): 530 raise Exception('Run the gamma method first.') 531 for e, e_name in enumerate(self.mc_names): 532 fig = plt.figure() 533 plt.xlabel('W') 534 plt.ylabel('rho') 535 length = int(len(self.e_drho[e_name])) 536 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 537 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 538 if self.tau_exp[e_name] > 0: 539 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 540 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 541 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 542 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 543 else: 544 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 545 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 546 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 547 plt.xlim(-0.5, xmax) 548 plt.draw() 549 if save: 550 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
552 def plot_rep_dist(self): 553 """Plot replica distribution for each ensemble with more than one replicum.""" 554 if not hasattr(self, 'e_dvalue'): 555 raise Exception('Run the gamma method first.') 556 for e, e_name in enumerate(self.mc_names): 557 if len(self.e_content[e_name]) == 1: 558 print('No replica distribution for a single replicum (', e_name, ')') 559 continue 560 r_length = [] 561 sub_r_mean = 0 562 for r, r_name in enumerate(self.e_content[e_name]): 563 r_length.append(len(self.deltas[r_name])) 564 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 565 e_N = np.sum(r_length) 566 sub_r_mean /= e_N 567 arr = np.zeros(len(self.e_content[e_name])) 568 for r, r_name in enumerate(self.e_content[e_name]): 569 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 570 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 571 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 572 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
574 def plot_history(self, expand=True): 575 """Plot derived Monte Carlo history for each ensemble 576 577 Parameters 578 ---------- 579 expand : bool 580 show expanded history for irregular Monte Carlo chains (default: True). 581 """ 582 for e, e_name in enumerate(self.mc_names): 583 plt.figure() 584 r_length = [] 585 tmp = [] 586 tmp_expanded = [] 587 for r, r_name in enumerate(self.e_content[e_name]): 588 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 589 if expand: 590 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 591 r_length.append(len(tmp_expanded[-1])) 592 else: 593 r_length.append(len(tmp[-1])) 594 e_N = np.sum(r_length) 595 x = np.arange(e_N) 596 y_test = np.concatenate(tmp, axis=0) 597 if expand: 598 y = np.concatenate(tmp_expanded, axis=0) 599 else: 600 y = y_test 601 plt.errorbar(x, y, fmt='.', markersize=3) 602 plt.xlim(-0.5, e_N - 0.5) 603 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 604 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
606 def plot_piechart(self, save=None): 607 """Plot piechart which shows the fractional contribution of each 608 ensemble to the error and returns a dictionary containing the fractions. 609 610 Parameters 611 ---------- 612 save : str 613 saves the figure to a file named 'save' if. 614 """ 615 if not hasattr(self, 'e_dvalue'): 616 raise Exception('Run the gamma method first.') 617 if np.isclose(0.0, self._dvalue, atol=1e-15): 618 raise Exception('Error is 0.0') 619 labels = self.e_names 620 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 621 fig1, ax1 = plt.subplots() 622 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 623 ax1.axis('equal') 624 plt.draw() 625 if save: 626 fig1.savefig(save) 627 628 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
630 def dump(self, filename, datatype="json.gz", description="", **kwargs): 631 """Dump the Obs to a file 'name' of chosen format. 632 633 Parameters 634 ---------- 635 filename : str 636 name of the file to be saved. 637 datatype : str 638 Format of the exported file. Supported formats include 639 "json.gz" and "pickle" 640 description : str 641 Description for output file, only relevant for json.gz format. 642 path : str 643 specifies a custom path for the file (default '.') 644 """ 645 if 'path' in kwargs: 646 file_name = kwargs.get('path') + '/' + filename 647 else: 648 file_name = filename 649 650 if datatype == "json.gz": 651 from .input.json import dump_to_json 652 dump_to_json([self], file_name, description=description) 653 elif datatype == "pickle": 654 with open(file_name + '.p', 'wb') as fb: 655 pickle.dump(self, fb) 656 else: 657 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
659 def export_jackknife(self): 660 """Export jackknife samples from the Obs 661 662 Returns 663 ------- 664 numpy.ndarray 665 Returns a numpy array of length N + 1 where N is the number of samples 666 for the given ensemble and replicum. The zeroth entry of the array contains 667 the mean value of the Obs, entries 1 to N contain the N jackknife samples 668 derived from the Obs. The current implementation only works for observables 669 defined on exactly one ensemble and replicum. The derived jackknife samples 670 should agree with samples from a full jackknife analysis up to O(1/N). 671 """ 672 673 if len(self.names) != 1: 674 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 675 676 name = self.names[0] 677 full_data = self.deltas[name] + self.r_values[name] 678 n = full_data.size 679 mean = self.value 680 tmp_jacks = np.zeros(n + 1) 681 tmp_jacks[0] = mean 682 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 683 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
865class CObs: 866 """Class for a complex valued observable.""" 867 __slots__ = ['_real', '_imag', 'tag'] 868 869 def __init__(self, real, imag=0.0): 870 self._real = real 871 self._imag = imag 872 self.tag = None 873 874 @property 875 def real(self): 876 return self._real 877 878 @property 879 def imag(self): 880 return self._imag 881 882 def gamma_method(self, **kwargs): 883 """Executes the gamma_method for the real and the imaginary part.""" 884 if isinstance(self.real, Obs): 885 self.real.gamma_method(**kwargs) 886 if isinstance(self.imag, Obs): 887 self.imag.gamma_method(**kwargs) 888 889 def is_zero(self): 890 """Checks whether both real and imaginary part are zero within machine precision.""" 891 return self.real == 0.0 and self.imag == 0.0 892 893 def conjugate(self): 894 return CObs(self.real, -self.imag) 895 896 def __add__(self, other): 897 if isinstance(other, np.ndarray): 898 return other + self 899 elif hasattr(other, 'real') and hasattr(other, 'imag'): 900 return CObs(self.real + other.real, 901 self.imag + other.imag) 902 else: 903 return CObs(self.real + other, self.imag) 904 905 def __radd__(self, y): 906 return self + y 907 908 def __sub__(self, other): 909 if isinstance(other, np.ndarray): 910 return -1 * (other - self) 911 elif hasattr(other, 'real') and hasattr(other, 'imag'): 912 return CObs(self.real - other.real, self.imag - other.imag) 913 else: 914 return CObs(self.real - other, self.imag) 915 916 def __rsub__(self, other): 917 return -1 * (self - other) 918 919 def __mul__(self, other): 920 if isinstance(other, np.ndarray): 921 return other * self 922 elif hasattr(other, 'real') and hasattr(other, 'imag'): 923 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 924 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 925 [self.real, other.real, self.imag, other.imag], 926 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 927 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 928 [self.real, other.real, self.imag, other.imag], 929 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 930 elif getattr(other, 'imag', 0) != 0: 931 return CObs(self.real * other.real - self.imag * other.imag, 932 self.imag * other.real + self.real * other.imag) 933 else: 934 return CObs(self.real * other.real, self.imag * other.real) 935 else: 936 return CObs(self.real * other, self.imag * other) 937 938 def __rmul__(self, other): 939 return self * other 940 941 def __truediv__(self, other): 942 if isinstance(other, np.ndarray): 943 return 1 / (other / self) 944 elif hasattr(other, 'real') and hasattr(other, 'imag'): 945 r = other.real ** 2 + other.imag ** 2 946 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 947 else: 948 return CObs(self.real / other, self.imag / other) 949 950 def __rtruediv__(self, other): 951 r = self.real ** 2 + self.imag ** 2 952 if hasattr(other, 'real') and hasattr(other, 'imag'): 953 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 954 else: 955 return CObs(self.real * other / r, -self.imag * other / r) 956 957 def __abs__(self): 958 return np.sqrt(self.real**2 + self.imag**2) 959 960 def __pos__(self): 961 return self 962 963 def __neg__(self): 964 return -1 * self 965 966 def __eq__(self, other): 967 return self.real == other.real and self.imag == other.imag 968 969 def __str__(self): 970 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 971 972 def __repr__(self): 973 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
882 def gamma_method(self, **kwargs): 883 """Executes the gamma_method for the real and the imaginary part.""" 884 if isinstance(self.real, Obs): 885 self.real.gamma_method(**kwargs) 886 if isinstance(self.imag, Obs): 887 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1142def derived_observable(func, data, array_mode=False, **kwargs): 1143 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1144 1145 Parameters 1146 ---------- 1147 func : object 1148 arbitrary function of the form func(data, **kwargs). For the 1149 automatic differentiation to work, all numpy functions have to have 1150 the autograd wrapper (use 'import autograd.numpy as anp'). 1151 data : list 1152 list of Obs, e.g. [obs1, obs2, obs3]. 1153 num_grad : bool 1154 if True, numerical derivatives are used instead of autograd 1155 (default False). To control the numerical differentiation the 1156 kwargs of numdifftools.step_generators.MaxStepGenerator 1157 can be used. 1158 man_grad : list 1159 manually supply a list or an array which contains the jacobian 1160 of func. Use cautiously, supplying the wrong derivative will 1161 not be intercepted. 1162 1163 Notes 1164 ----- 1165 For simple mathematical operations it can be practical to use anonymous 1166 functions. For the ratio of two observables one can e.g. use 1167 1168 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1169 """ 1170 1171 data = np.asarray(data) 1172 raveled_data = data.ravel() 1173 1174 # Workaround for matrix operations containing non Obs data 1175 if not all(isinstance(x, Obs) for x in raveled_data): 1176 for i in range(len(raveled_data)): 1177 if isinstance(raveled_data[i], (int, float)): 1178 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1179 1180 allcov = {} 1181 for o in raveled_data: 1182 for name in o.cov_names: 1183 if name in allcov: 1184 if not np.allclose(allcov[name], o.covobs[name].cov): 1185 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1186 else: 1187 allcov[name] = o.covobs[name].cov 1188 1189 n_obs = len(raveled_data) 1190 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1191 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1192 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1193 1194 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1195 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1196 1197 if data.ndim == 1: 1198 values = np.array([o.value for o in data]) 1199 else: 1200 values = np.vectorize(lambda x: x.value)(data) 1201 1202 new_values = func(values, **kwargs) 1203 1204 multi = int(isinstance(new_values, np.ndarray)) 1205 1206 new_r_values = {} 1207 new_idl_d = {} 1208 for name in new_sample_names: 1209 idl = [] 1210 tmp_values = np.zeros(n_obs) 1211 for i, item in enumerate(raveled_data): 1212 tmp_values[i] = item.r_values.get(name, item.value) 1213 tmp_idl = item.idl.get(name) 1214 if tmp_idl is not None: 1215 idl.append(tmp_idl) 1216 if multi > 0: 1217 tmp_values = np.array(tmp_values).reshape(data.shape) 1218 new_r_values[name] = func(tmp_values, **kwargs) 1219 new_idl_d[name] = _merge_idx(idl) 1220 if not is_merged[name]: 1221 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1222 1223 if 'man_grad' in kwargs: 1224 deriv = np.asarray(kwargs.get('man_grad')) 1225 if new_values.shape + data.shape != deriv.shape: 1226 raise Exception('Manual derivative does not have correct shape.') 1227 elif kwargs.get('num_grad') is True: 1228 if multi > 0: 1229 raise Exception('Multi mode currently not supported for numerical derivative') 1230 options = { 1231 'base_step': 0.1, 1232 'step_ratio': 2.5} 1233 for key in options.keys(): 1234 kwarg = kwargs.get(key) 1235 if kwarg is not None: 1236 options[key] = kwarg 1237 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1238 if tmp_df.size == 1: 1239 deriv = np.array([tmp_df.real]) 1240 else: 1241 deriv = tmp_df.real 1242 else: 1243 deriv = jacobian(func)(values, **kwargs) 1244 1245 final_result = np.zeros(new_values.shape, dtype=object) 1246 1247 if array_mode is True: 1248 1249 class _Zero_grad(): 1250 def __init__(self, N): 1251 self.grad = np.zeros((N, 1)) 1252 1253 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1254 d_extracted = {} 1255 g_extracted = {} 1256 for name in new_sample_names: 1257 d_extracted[name] = [] 1258 ens_length = len(new_idl_d[name]) 1259 for i_dat, dat in enumerate(data): 1260 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1261 for name in new_cov_names: 1262 g_extracted[name] = [] 1263 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1264 for i_dat, dat in enumerate(data): 1265 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1266 1267 for i_val, new_val in np.ndenumerate(new_values): 1268 new_deltas = {} 1269 new_grad = {} 1270 if array_mode is True: 1271 for name in new_sample_names: 1272 ens_length = d_extracted[name][0].shape[-1] 1273 new_deltas[name] = np.zeros(ens_length) 1274 for i_dat, dat in enumerate(d_extracted[name]): 1275 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1276 for name in new_cov_names: 1277 new_grad[name] = 0 1278 for i_dat, dat in enumerate(g_extracted[name]): 1279 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1280 else: 1281 for j_obs, obs in np.ndenumerate(data): 1282 for name in obs.names: 1283 if name in obs.cov_names: 1284 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1285 else: 1286 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1287 1288 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1289 1290 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1291 raise Exception('The same name has been used for deltas and covobs!') 1292 new_samples = [] 1293 new_means = [] 1294 new_idl = [] 1295 new_names_obs = [] 1296 for name in new_names: 1297 if name not in new_covobs: 1298 if is_merged[name]: 1299 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1300 else: 1301 filtered_deltas = new_deltas[name] 1302 filtered_idl_d = new_idl_d[name] 1303 1304 new_samples.append(filtered_deltas) 1305 new_idl.append(filtered_idl_d) 1306 new_means.append(new_r_values[name][i_val]) 1307 new_names_obs.append(name) 1308 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1309 for name in new_covobs: 1310 final_result[i_val].names.append(name) 1311 final_result[i_val]._covobs = new_covobs 1312 final_result[i_val]._value = new_val 1313 final_result[i_val].is_merged = is_merged 1314 final_result[i_val].reweighted = reweighted 1315 1316 if multi == 0: 1317 final_result = final_result.item() 1318 1319 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1359def reweight(weight, obs, **kwargs): 1360 """Reweight a list of observables. 1361 1362 Parameters 1363 ---------- 1364 weight : Obs 1365 Reweighting factor. An Observable that has to be defined on a superset of the 1366 configurations in obs[i].idl for all i. 1367 obs : list 1368 list of Obs, e.g. [obs1, obs2, obs3]. 1369 all_configs : bool 1370 if True, the reweighted observables are normalized by the average of 1371 the reweighting factor on all configurations in weight.idl and not 1372 on the configurations in obs[i].idl. Default False. 1373 """ 1374 result = [] 1375 for i in range(len(obs)): 1376 if len(obs[i].cov_names): 1377 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1378 if not set(obs[i].names).issubset(weight.names): 1379 raise Exception('Error: Ensembles do not fit') 1380 for name in obs[i].names: 1381 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1382 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1383 new_samples = [] 1384 w_deltas = {} 1385 for name in sorted(obs[i].names): 1386 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1387 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1388 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1389 1390 if kwargs.get('all_configs'): 1391 new_weight = weight 1392 else: 1393 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1394 1395 result.append(tmp_obs / new_weight) 1396 result[-1].reweighted = True 1397 result[-1].is_merged = obs[i].is_merged 1398 1399 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1402def correlate(obs_a, obs_b): 1403 """Correlate two observables. 1404 1405 Parameters 1406 ---------- 1407 obs_a : Obs 1408 First observable 1409 obs_b : Obs 1410 Second observable 1411 1412 Notes 1413 ----- 1414 Keep in mind to only correlate primary observables which have not been reweighted 1415 yet. The reweighting has to be applied after correlating the observables. 1416 Currently only works if ensembles are identical (this is not strictly necessary). 1417 """ 1418 1419 if sorted(obs_a.names) != sorted(obs_b.names): 1420 raise Exception('Ensembles do not fit') 1421 if len(obs_a.cov_names) or len(obs_b.cov_names): 1422 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1423 for name in obs_a.names: 1424 if obs_a.shape[name] != obs_b.shape[name]: 1425 raise Exception('Shapes of ensemble', name, 'do not fit') 1426 if obs_a.idl[name] != obs_b.idl[name]: 1427 raise Exception('idl of ensemble', name, 'do not fit') 1428 1429 if obs_a.reweighted is True: 1430 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1431 if obs_b.reweighted is True: 1432 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1433 1434 new_samples = [] 1435 new_idl = [] 1436 for name in sorted(obs_a.names): 1437 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1438 new_idl.append(obs_a.idl[name]) 1439 1440 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1441 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1442 o.reweighted = obs_a.reweighted or obs_b.reweighted 1443 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1446def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1447 r'''Calculates the error covariance matrix of a set of observables. 1448 1449 The gamma method has to be applied first to all observables. 1450 1451 Parameters 1452 ---------- 1453 obs : list or numpy.ndarray 1454 List or one dimensional array of Obs 1455 visualize : bool 1456 If True plots the corresponding normalized correlation matrix (default False). 1457 correlation : bool 1458 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1459 smooth : None or int 1460 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1461 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1462 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1463 small ones. 1464 1465 Notes 1466 ----- 1467 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1468 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1469 in the absence of autocorrelation. 1470 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1471 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1472 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1473 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1474 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1475 ''' 1476 1477 length = len(obs) 1478 1479 max_samples = np.max([o.N for o in obs]) 1480 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1481 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1482 1483 cov = np.zeros((length, length)) 1484 for i in range(length): 1485 for j in range(i, length): 1486 cov[i, j] = _covariance_element(obs[i], obs[j]) 1487 cov = cov + cov.T - np.diag(np.diag(cov)) 1488 1489 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1490 1491 if isinstance(smooth, int): 1492 corr = _smooth_eigenvalues(corr, smooth) 1493 1494 if visualize: 1495 plt.matshow(corr, vmin=-1, vmax=1) 1496 plt.set_cmap('RdBu') 1497 plt.colorbar() 1498 plt.draw() 1499 1500 if correlation is True: 1501 return corr 1502 1503 errors = [o.dvalue for o in obs] 1504 cov = np.diag(errors) @ corr @ np.diag(errors) 1505 1506 eigenvalues = np.linalg.eigh(cov)[0] 1507 if not np.all(eigenvalues >= 0): 1508 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1509 1510 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1590def import_jackknife(jacks, name, idl=None): 1591 """Imports jackknife samples and returns an Obs 1592 1593 Parameters 1594 ---------- 1595 jacks : numpy.ndarray 1596 numpy array containing the mean value as zeroth entry and 1597 the N jackknife samples as first to Nth entry. 1598 name : str 1599 name of the ensemble the samples are defined on. 1600 """ 1601 length = len(jacks) - 1 1602 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1603 samples = jacks[1:] @ prj 1604 mean = np.mean(samples) 1605 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1606 new_obs._value = jacks[0] 1607 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1610def merge_obs(list_of_obs): 1611 """Combine all observables in list_of_obs into one new observable 1612 1613 Parameters 1614 ---------- 1615 list_of_obs : list 1616 list of the Obs object to be combined 1617 1618 Notes 1619 ----- 1620 It is not possible to combine obs which are based on the same replicum 1621 """ 1622 replist = [item for obs in list_of_obs for item in obs.names] 1623 if (len(replist) == len(set(replist))) is False: 1624 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1625 if any([len(o.cov_names) for o in list_of_obs]): 1626 raise Exception('Not possible to merge data that contains covobs!') 1627 new_dict = {} 1628 idl_dict = {} 1629 for o in list_of_obs: 1630 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1631 for key in set(o.deltas) | set(o.r_values)}) 1632 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1633 1634 names = sorted(new_dict.keys()) 1635 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1636 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1637 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1638 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1641def cov_Obs(means, cov, name, grad=None): 1642 """Create an Obs based on mean(s) and a covariance matrix 1643 1644 Parameters 1645 ---------- 1646 mean : list of floats or float 1647 N mean value(s) of the new Obs 1648 cov : list or array 1649 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1650 name : str 1651 identifier for the covariance matrix 1652 grad : list or array 1653 Gradient of the Covobs wrt. the means belonging to cov. 1654 """ 1655 1656 def covobs_to_obs(co): 1657 """Make an Obs out of a Covobs 1658 1659 Parameters 1660 ---------- 1661 co : Covobs 1662 Covobs to be embedded into the Obs 1663 """ 1664 o = Obs([], [], means=[]) 1665 o._value = co.value 1666 o.names.append(co.name) 1667 o._covobs[co.name] = co 1668 o._dvalue = np.sqrt(co.errsq()) 1669 return o 1670 1671 ol = [] 1672 if isinstance(means, (float, int)): 1673 means = [means] 1674 1675 for i in range(len(means)): 1676 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1677 if ol[0].covobs[name].N != len(means): 1678 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1679 if len(ol) == 1: 1680 return ol[0] 1681 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.