pyerrors.obs
View Source
0import warnings 1import pickle 2import numpy as np 3import autograd.numpy as anp # Thinly-wrapped numpy 4from autograd import jacobian 5import matplotlib.pyplot as plt 6from scipy.stats import skew, skewtest, kurtosis, kurtosistest 7import numdifftools as nd 8from itertools import groupby 9from .covobs import Covobs 10 11 12class Obs: 13 """Class for a general observable. 14 15 Instances of Obs are the basic objects of a pyerrors error analysis. 16 They are initialized with a list which contains arrays of samples for 17 different ensembles/replica and another list of same length which contains 18 the names of the ensembles/replica. Mathematical operations can be 19 performed on instances. The result is another instance of Obs. The error of 20 an instance can be computed with the gamma_method. Also contains additional 21 methods for output and visualization of the error calculation. 22 23 Attributes 24 ---------- 25 S_global : float 26 Standard value for S (default 2.0) 27 S_dict : dict 28 Dictionary for S values. If an entry for a given ensemble 29 exists this overwrites the standard value for that ensemble. 30 tau_exp_global : float 31 Standard value for tau_exp (default 0.0) 32 tau_exp_dict : dict 33 Dictionary for tau_exp values. If an entry for a given ensemble exists 34 this overwrites the standard value for that ensemble. 35 N_sigma_global : float 36 Standard value for N_sigma (default 1.0) 37 N_sigma_dict : dict 38 Dictionary for N_sigma values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 """ 41 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 42 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 43 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 44 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 45 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 46 47 S_global = 2.0 48 S_dict = {} 49 tau_exp_global = 0.0 50 tau_exp_dict = {} 51 N_sigma_global = 1.0 52 N_sigma_dict = {} 53 filter_eps = 1e-10 54 55 def __init__(self, samples, names, idl=None, **kwargs): 56 """ Initialize Obs object. 57 58 Parameters 59 ---------- 60 samples : list 61 list of numpy arrays containing the Monte Carlo samples 62 names : list 63 list of strings labeling the individual samples 64 idl : list, optional 65 list of ranges or lists on which the samples are defined 66 """ 67 68 if kwargs.get("means") is None and len(samples): 69 if len(samples) != len(names): 70 raise Exception('Length of samples and names incompatible.') 71 if idl is not None: 72 if len(idl) != len(names): 73 raise Exception('Length of idl incompatible with samples and names.') 74 name_length = len(names) 75 if name_length > 1: 76 if name_length != len(set(names)): 77 raise Exception('names are not unique.') 78 if not all(isinstance(x, str) for x in names): 79 raise TypeError('All names have to be strings.') 80 else: 81 if not isinstance(names[0], str): 82 raise TypeError('All names have to be strings.') 83 if min(len(x) for x in samples) <= 4: 84 raise Exception('Samples have to have at least 5 entries.') 85 86 self.names = sorted(names) 87 self.shape = {} 88 self.r_values = {} 89 self.deltas = {} 90 self._covobs = {} 91 92 self._value = 0 93 self.N = 0 94 self.is_merged = {} 95 self.idl = {} 96 if idl is not None: 97 for name, idx in sorted(zip(names, idl)): 98 if isinstance(idx, range): 99 self.idl[name] = idx 100 elif isinstance(idx, (list, np.ndarray)): 101 dc = np.unique(np.diff(idx)) 102 if np.any(dc < 0): 103 raise Exception("Unsorted idx for idl[%s]" % (name)) 104 if len(dc) == 1: 105 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 106 else: 107 self.idl[name] = list(idx) 108 else: 109 raise Exception('incompatible type for idl[%s].' % (name)) 110 else: 111 for name, sample in sorted(zip(names, samples)): 112 self.idl[name] = range(1, len(sample) + 1) 113 114 if kwargs.get("means") is not None: 115 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 116 self.shape[name] = len(self.idl[name]) 117 self.N += self.shape[name] 118 self.r_values[name] = mean 119 self.deltas[name] = sample 120 else: 121 for name, sample in sorted(zip(names, samples)): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 if len(sample) != self.shape[name]: 125 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 126 self.r_values[name] = np.mean(sample) 127 self.deltas[name] = sample - self.r_values[name] 128 self._value += self.shape[name] * self.r_values[name] 129 self._value /= self.N 130 131 self._dvalue = 0.0 132 self.ddvalue = 0.0 133 self.reweighted = False 134 135 self.tag = None 136 137 @property 138 def value(self): 139 return self._value 140 141 @property 142 def dvalue(self): 143 return self._dvalue 144 145 @property 146 def e_names(self): 147 return sorted(set([o.split('|')[0] for o in self.names])) 148 149 @property 150 def cov_names(self): 151 return sorted(set([o for o in self.covobs.keys()])) 152 153 @property 154 def mc_names(self): 155 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 156 157 @property 158 def e_content(self): 159 res = {} 160 for e, e_name in enumerate(self.e_names): 161 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 162 if e_name in self.names: 163 res[e_name].append(e_name) 164 return res 165 166 @property 167 def covobs(self): 168 return self._covobs 169 170 def gamma_method(self, **kwargs): 171 """Estimate the error and related properties of the Obs. 172 173 Parameters 174 ---------- 175 S : float 176 specifies a custom value for the parameter S (default 2.0). 177 If set to 0 it is assumed that the data exhibits no 178 autocorrelation. In this case the error estimates coincides 179 with the sample standard error. 180 tau_exp : float 181 positive value triggers the critical slowing down analysis 182 (default 0.0). 183 N_sigma : float 184 number of standard deviations from zero until the tail is 185 attached to the autocorrelation function (default 1). 186 fft : bool 187 determines whether the fft algorithm is used for the computation 188 of the autocorrelation function (default True) 189 """ 190 191 e_content = self.e_content 192 self.e_dvalue = {} 193 self.e_ddvalue = {} 194 self.e_tauint = {} 195 self.e_dtauint = {} 196 self.e_windowsize = {} 197 self.e_n_tauint = {} 198 self.e_n_dtauint = {} 199 e_gamma = {} 200 self.e_rho = {} 201 self.e_drho = {} 202 self._dvalue = 0 203 self.ddvalue = 0 204 205 self.S = {} 206 self.tau_exp = {} 207 self.N_sigma = {} 208 209 if kwargs.get('fft') is False: 210 fft = False 211 else: 212 fft = True 213 214 def _parse_kwarg(kwarg_name): 215 if kwarg_name in kwargs: 216 tmp = kwargs.get(kwarg_name) 217 if isinstance(tmp, (int, float)): 218 if tmp < 0: 219 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 220 for e, e_name in enumerate(self.e_names): 221 getattr(self, kwarg_name)[e_name] = tmp 222 else: 223 raise TypeError(kwarg_name + ' is not in proper format.') 224 else: 225 for e, e_name in enumerate(self.e_names): 226 if e_name in getattr(Obs, kwarg_name + '_dict'): 227 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 228 else: 229 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 230 231 _parse_kwarg('S') 232 _parse_kwarg('tau_exp') 233 _parse_kwarg('N_sigma') 234 235 for e, e_name in enumerate(self.mc_names): 236 r_length = [] 237 for r_name in e_content[e_name]: 238 if isinstance(self.idl[r_name], range): 239 r_length.append(len(self.idl[r_name])) 240 else: 241 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 242 243 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 244 w_max = max(r_length) // 2 245 e_gamma[e_name] = np.zeros(w_max) 246 self.e_rho[e_name] = np.zeros(w_max) 247 self.e_drho[e_name] = np.zeros(w_max) 248 249 for r_name in e_content[e_name]: 250 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 251 252 gamma_div = np.zeros(w_max) 253 for r_name in e_content[e_name]: 254 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 255 gamma_div[gamma_div < 1] = 1.0 256 e_gamma[e_name] /= gamma_div[:w_max] 257 258 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 259 self.e_tauint[e_name] = 0.5 260 self.e_dtauint[e_name] = 0.0 261 self.e_dvalue[e_name] = 0.0 262 self.e_ddvalue[e_name] = 0.0 263 self.e_windowsize[e_name] = 0 264 continue 265 266 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 267 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 268 # Make sure no entry of tauint is smaller than 0.5 269 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 270 # hep-lat/0306017 eq. (42) 271 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 272 self.e_n_dtauint[e_name][0] = 0.0 273 274 def _compute_drho(i): 275 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 276 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 277 278 _compute_drho(1) 279 if self.tau_exp[e_name] > 0: 280 texp = self.tau_exp[e_name] 281 # Critical slowing down analysis 282 if w_max // 2 <= 1: 283 raise Exception("Need at least 8 samples for tau_exp error analysis") 284 for n in range(1, w_max // 2): 285 _compute_drho(n + 1) 286 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 287 # Bias correction hep-lat/0306017 eq. (49) included 288 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 289 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 290 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 291 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 292 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 293 self.e_windowsize[e_name] = n 294 break 295 else: 296 if self.S[e_name] == 0.0: 297 self.e_tauint[e_name] = 0.5 298 self.e_dtauint[e_name] = 0.0 299 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 300 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 301 self.e_windowsize[e_name] = 0 302 else: 303 # Standard automatic windowing procedure 304 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 305 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 306 for n in range(1, w_max): 307 if n < w_max // 2 - 2: 308 _compute_drho(n + 1) 309 if g_w[n - 1] < 0 or n >= w_max - 1: 310 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 311 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 312 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 314 self.e_windowsize[e_name] = n 315 break 316 317 self._dvalue += self.e_dvalue[e_name] ** 2 318 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 319 320 for e_name in self.cov_names: 321 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 322 self.e_ddvalue[e_name] = 0 323 self._dvalue += self.e_dvalue[e_name]**2 324 325 self._dvalue = np.sqrt(self._dvalue) 326 if self._dvalue == 0.0: 327 self.ddvalue = 0.0 328 else: 329 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 330 return 331 332 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 333 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 334 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 335 336 Parameters 337 ---------- 338 deltas : list 339 List of fluctuations 340 idx : list 341 List or range of configurations on which the deltas are defined. 342 shape : int 343 Number of configurations in idx. 344 w_max : int 345 Upper bound for the summation window. 346 fft : bool 347 determines whether the fft algorithm is used for the computation 348 of the autocorrelation function. 349 """ 350 gamma = np.zeros(w_max) 351 deltas = _expand_deltas(deltas, idx, shape) 352 new_shape = len(deltas) 353 if fft: 354 max_gamma = min(new_shape, w_max) 355 # The padding for the fft has to be even 356 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 357 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 358 else: 359 for n in range(w_max): 360 if new_shape - n >= 0: 361 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 362 363 return gamma 364 365 def details(self, ens_content=True): 366 """Output detailed properties of the Obs. 367 368 Parameters 369 ---------- 370 ens_content : bool 371 print details about the ensembles and replica if true. 372 """ 373 if self.tag is not None: 374 print("Description:", self.tag) 375 if not hasattr(self, 'e_dvalue'): 376 print('Result\t %3.8e' % (self.value)) 377 else: 378 if self.value == 0.0: 379 percentage = np.nan 380 else: 381 percentage = np.abs(self._dvalue / self.value) * 100 382 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 383 if len(self.e_names) > 1: 384 print(' Ensemble errors:') 385 for e_name in self.mc_names: 386 if len(self.e_names) > 1: 387 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 388 if self.tau_exp[e_name] > 0: 389 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 390 else: 391 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 392 for e_name in self.cov_names: 393 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 394 if ens_content is True: 395 if len(self.e_names) == 1: 396 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 397 else: 398 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 399 my_string_list = [] 400 for key, value in sorted(self.e_content.items()): 401 if key not in self.covobs: 402 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 403 if len(value) == 1: 404 my_string += f': {self.shape[value[0]]} configurations' 405 if isinstance(self.idl[value[0]], range): 406 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 407 else: 408 my_string += ' (irregular range)' 409 else: 410 sublist = [] 411 for v in value: 412 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 413 my_substring += f': {self.shape[v]} configurations' 414 if isinstance(self.idl[v], range): 415 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 416 else: 417 my_substring += ' (irregular range)' 418 sublist.append(my_substring) 419 420 my_string += '\n' + '\n'.join(sublist) 421 else: 422 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 423 my_string_list.append(my_string) 424 print('\n'.join(my_string_list)) 425 426 def is_zero_within_error(self, sigma=1): 427 """Checks whether the observable is zero within 'sigma' standard errors. 428 429 Parameters 430 ---------- 431 sigma : int 432 Number of standard errors used for the check. 433 434 Works only properly when the gamma method was run. 435 """ 436 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 437 438 def is_zero(self, atol=1e-10): 439 """Checks whether the observable is zero within a given tolerance. 440 441 Parameters 442 ---------- 443 atol : float 444 Absolute tolerance (for details see numpy documentation). 445 """ 446 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 447 448 def plot_tauint(self, save=None): 449 """Plot integrated autocorrelation time for each ensemble. 450 451 Parameters 452 ---------- 453 save : str 454 saves the figure to a file named 'save' if. 455 """ 456 if not hasattr(self, 'e_dvalue'): 457 raise Exception('Run the gamma method first.') 458 459 for e, e_name in enumerate(self.mc_names): 460 fig = plt.figure() 461 plt.xlabel(r'$W$') 462 plt.ylabel(r'$\tau_\mathrm{int}$') 463 length = int(len(self.e_n_tauint[e_name])) 464 if self.tau_exp[e_name] > 0: 465 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 466 x_help = np.arange(2 * self.tau_exp[e_name]) 467 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 468 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 469 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 470 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 471 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 472 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 473 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 474 else: 475 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 476 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 477 478 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 479 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 480 plt.legend() 481 plt.xlim(-0.5, xmax) 482 ylim = plt.ylim() 483 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 484 plt.draw() 485 if save: 486 fig.savefig(save + "_" + str(e)) 487 488 def plot_rho(self, save=None): 489 """Plot normalized autocorrelation function time for each ensemble. 490 491 Parameters 492 ---------- 493 save : str 494 saves the figure to a file named 'save' if. 495 """ 496 if not hasattr(self, 'e_dvalue'): 497 raise Exception('Run the gamma method first.') 498 for e, e_name in enumerate(self.mc_names): 499 fig = plt.figure() 500 plt.xlabel('W') 501 plt.ylabel('rho') 502 length = int(len(self.e_drho[e_name])) 503 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 504 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 505 if self.tau_exp[e_name] > 0: 506 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 507 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 508 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 509 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 510 else: 511 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 512 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 513 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 514 plt.xlim(-0.5, xmax) 515 plt.draw() 516 if save: 517 fig.savefig(save + "_" + str(e)) 518 519 def plot_rep_dist(self): 520 """Plot replica distribution for each ensemble with more than one replicum.""" 521 if not hasattr(self, 'e_dvalue'): 522 raise Exception('Run the gamma method first.') 523 for e, e_name in enumerate(self.mc_names): 524 if len(self.e_content[e_name]) == 1: 525 print('No replica distribution for a single replicum (', e_name, ')') 526 continue 527 r_length = [] 528 sub_r_mean = 0 529 for r, r_name in enumerate(self.e_content[e_name]): 530 r_length.append(len(self.deltas[r_name])) 531 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 532 e_N = np.sum(r_length) 533 sub_r_mean /= e_N 534 arr = np.zeros(len(self.e_content[e_name])) 535 for r, r_name in enumerate(self.e_content[e_name]): 536 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 537 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 538 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 539 plt.draw() 540 541 def plot_history(self, expand=True): 542 """Plot derived Monte Carlo history for each ensemble 543 544 Parameters 545 ---------- 546 expand : bool 547 show expanded history for irregular Monte Carlo chains (default: True). 548 """ 549 for e, e_name in enumerate(self.mc_names): 550 plt.figure() 551 r_length = [] 552 tmp = [] 553 tmp_expanded = [] 554 for r, r_name in enumerate(self.e_content[e_name]): 555 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 556 if expand: 557 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 558 r_length.append(len(tmp_expanded[-1])) 559 else: 560 r_length.append(len(tmp[-1])) 561 e_N = np.sum(r_length) 562 x = np.arange(e_N) 563 y_test = np.concatenate(tmp, axis=0) 564 if expand: 565 y = np.concatenate(tmp_expanded, axis=0) 566 else: 567 y = y_test 568 plt.errorbar(x, y, fmt='.', markersize=3) 569 plt.xlim(-0.5, e_N - 0.5) 570 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 571 plt.draw() 572 573 def plot_piechart(self, save=None): 574 """Plot piechart which shows the fractional contribution of each 575 ensemble to the error and returns a dictionary containing the fractions. 576 577 Parameters 578 ---------- 579 save : str 580 saves the figure to a file named 'save' if. 581 """ 582 if not hasattr(self, 'e_dvalue'): 583 raise Exception('Run the gamma method first.') 584 if np.isclose(0.0, self._dvalue, atol=1e-15): 585 raise Exception('Error is 0.0') 586 labels = self.e_names 587 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 588 fig1, ax1 = plt.subplots() 589 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 590 ax1.axis('equal') 591 plt.draw() 592 if save: 593 fig1.savefig(save) 594 595 return dict(zip(self.e_names, sizes)) 596 597 def dump(self, filename, datatype="json.gz", description="", **kwargs): 598 """Dump the Obs to a file 'name' of chosen format. 599 600 Parameters 601 ---------- 602 filename : str 603 name of the file to be saved. 604 datatype : str 605 Format of the exported file. Supported formats include 606 "json.gz" and "pickle" 607 description : str 608 Description for output file, only relevant for json.gz format. 609 path : str 610 specifies a custom path for the file (default '.') 611 """ 612 if 'path' in kwargs: 613 file_name = kwargs.get('path') + '/' + filename 614 else: 615 file_name = filename 616 617 if datatype == "json.gz": 618 from .input.json import dump_to_json 619 dump_to_json([self], file_name, description=description) 620 elif datatype == "pickle": 621 with open(file_name + '.p', 'wb') as fb: 622 pickle.dump(self, fb) 623 else: 624 raise Exception("Unknown datatype " + str(datatype)) 625 626 def export_jackknife(self): 627 """Export jackknife samples from the Obs 628 629 Returns 630 ------- 631 numpy.ndarray 632 Returns a numpy array of length N + 1 where N is the number of samples 633 for the given ensemble and replicum. The zeroth entry of the array contains 634 the mean value of the Obs, entries 1 to N contain the N jackknife samples 635 derived from the Obs. The current implementation only works for observables 636 defined on exactly one ensemble and replicum. The derived jackknife samples 637 should agree with samples from a full jackknife analysis up to O(1/N). 638 """ 639 640 if len(self.names) != 1: 641 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 642 643 name = self.names[0] 644 full_data = self.deltas[name] + self.r_values[name] 645 n = full_data.size 646 mean = self.value 647 tmp_jacks = np.zeros(n + 1) 648 tmp_jacks[0] = mean 649 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 650 return tmp_jacks 651 652 def __float__(self): 653 return float(self.value) 654 655 def __repr__(self): 656 return 'Obs[' + str(self) + ']' 657 658 def __str__(self): 659 if self._dvalue == 0.0: 660 return str(self.value) 661 fexp = np.floor(np.log10(self._dvalue)) 662 if fexp < 0.0: 663 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 664 elif fexp == 0.0: 665 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 666 else: 667 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 668 669 # Overload comparisons 670 def __lt__(self, other): 671 return self.value < other 672 673 def __le__(self, other): 674 return self.value <= other 675 676 def __gt__(self, other): 677 return self.value > other 678 679 def __ge__(self, other): 680 return self.value >= other 681 682 def __eq__(self, other): 683 return (self - other).is_zero() 684 685 def __ne__(self, other): 686 return not (self - other).is_zero() 687 688 # Overload math operations 689 def __add__(self, y): 690 if isinstance(y, Obs): 691 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 692 else: 693 if isinstance(y, np.ndarray): 694 return np.array([self + o for o in y]) 695 elif y.__class__.__name__ in ['Corr', 'CObs']: 696 return NotImplemented 697 else: 698 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 699 700 def __radd__(self, y): 701 return self + y 702 703 def __mul__(self, y): 704 if isinstance(y, Obs): 705 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 706 else: 707 if isinstance(y, np.ndarray): 708 return np.array([self * o for o in y]) 709 elif isinstance(y, complex): 710 return CObs(self * y.real, self * y.imag) 711 elif y.__class__.__name__ in ['Corr', 'CObs']: 712 return NotImplemented 713 else: 714 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 715 716 def __rmul__(self, y): 717 return self * y 718 719 def __sub__(self, y): 720 if isinstance(y, Obs): 721 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 722 else: 723 if isinstance(y, np.ndarray): 724 return np.array([self - o for o in y]) 725 elif y.__class__.__name__ in ['Corr', 'CObs']: 726 return NotImplemented 727 else: 728 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 729 730 def __rsub__(self, y): 731 return -1 * (self - y) 732 733 def __pos__(self): 734 return self 735 736 def __neg__(self): 737 return -1 * self 738 739 def __truediv__(self, y): 740 if isinstance(y, Obs): 741 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 742 else: 743 if isinstance(y, np.ndarray): 744 return np.array([self / o for o in y]) 745 elif y.__class__.__name__ in ['Corr', 'CObs']: 746 return NotImplemented 747 else: 748 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 749 750 def __rtruediv__(self, y): 751 if isinstance(y, Obs): 752 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 753 else: 754 if isinstance(y, np.ndarray): 755 return np.array([o / self for o in y]) 756 elif y.__class__.__name__ in ['Corr', 'CObs']: 757 return NotImplemented 758 else: 759 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 760 761 def __pow__(self, y): 762 if isinstance(y, Obs): 763 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 764 else: 765 return derived_observable(lambda x: x[0] ** y, [self]) 766 767 def __rpow__(self, y): 768 if isinstance(y, Obs): 769 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 770 else: 771 return derived_observable(lambda x: y ** x[0], [self]) 772 773 def __abs__(self): 774 return derived_observable(lambda x: anp.abs(x[0]), [self]) 775 776 # Overload numpy functions 777 def sqrt(self): 778 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 779 780 def log(self): 781 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 782 783 def exp(self): 784 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 785 786 def sin(self): 787 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 788 789 def cos(self): 790 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 791 792 def tan(self): 793 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 794 795 def arcsin(self): 796 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 797 798 def arccos(self): 799 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 800 801 def arctan(self): 802 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 803 804 def sinh(self): 805 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 806 807 def cosh(self): 808 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 809 810 def tanh(self): 811 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 812 813 def arcsinh(self): 814 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 815 816 def arccosh(self): 817 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 818 819 def arctanh(self): 820 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 821 822 823class CObs: 824 """Class for a complex valued observable.""" 825 __slots__ = ['_real', '_imag', 'tag'] 826 827 def __init__(self, real, imag=0.0): 828 self._real = real 829 self._imag = imag 830 self.tag = None 831 832 @property 833 def real(self): 834 return self._real 835 836 @property 837 def imag(self): 838 return self._imag 839 840 def gamma_method(self, **kwargs): 841 """Executes the gamma_method for the real and the imaginary part.""" 842 if isinstance(self.real, Obs): 843 self.real.gamma_method(**kwargs) 844 if isinstance(self.imag, Obs): 845 self.imag.gamma_method(**kwargs) 846 847 def is_zero(self): 848 """Checks whether both real and imaginary part are zero within machine precision.""" 849 return self.real == 0.0 and self.imag == 0.0 850 851 def conjugate(self): 852 return CObs(self.real, -self.imag) 853 854 def __add__(self, other): 855 if isinstance(other, np.ndarray): 856 return other + self 857 elif hasattr(other, 'real') and hasattr(other, 'imag'): 858 return CObs(self.real + other.real, 859 self.imag + other.imag) 860 else: 861 return CObs(self.real + other, self.imag) 862 863 def __radd__(self, y): 864 return self + y 865 866 def __sub__(self, other): 867 if isinstance(other, np.ndarray): 868 return -1 * (other - self) 869 elif hasattr(other, 'real') and hasattr(other, 'imag'): 870 return CObs(self.real - other.real, self.imag - other.imag) 871 else: 872 return CObs(self.real - other, self.imag) 873 874 def __rsub__(self, other): 875 return -1 * (self - other) 876 877 def __mul__(self, other): 878 if isinstance(other, np.ndarray): 879 return other * self 880 elif hasattr(other, 'real') and hasattr(other, 'imag'): 881 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 882 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 883 [self.real, other.real, self.imag, other.imag], 884 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 885 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 886 [self.real, other.real, self.imag, other.imag], 887 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 888 elif getattr(other, 'imag', 0) != 0: 889 return CObs(self.real * other.real - self.imag * other.imag, 890 self.imag * other.real + self.real * other.imag) 891 else: 892 return CObs(self.real * other.real, self.imag * other.real) 893 else: 894 return CObs(self.real * other, self.imag * other) 895 896 def __rmul__(self, other): 897 return self * other 898 899 def __truediv__(self, other): 900 if isinstance(other, np.ndarray): 901 return 1 / (other / self) 902 elif hasattr(other, 'real') and hasattr(other, 'imag'): 903 r = other.real ** 2 + other.imag ** 2 904 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 905 else: 906 return CObs(self.real / other, self.imag / other) 907 908 def __rtruediv__(self, other): 909 r = self.real ** 2 + self.imag ** 2 910 if hasattr(other, 'real') and hasattr(other, 'imag'): 911 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 912 else: 913 return CObs(self.real * other / r, -self.imag * other / r) 914 915 def __abs__(self): 916 return np.sqrt(self.real**2 + self.imag**2) 917 918 def __pos__(self): 919 return self 920 921 def __neg__(self): 922 return -1 * self 923 924 def __eq__(self, other): 925 return self.real == other.real and self.imag == other.imag 926 927 def __str__(self): 928 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 929 930 def __repr__(self): 931 return 'CObs[' + str(self) + ']' 932 933 934def _expand_deltas(deltas, idx, shape): 935 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 936 If idx is of type range, the deltas are not changed 937 938 Parameters 939 ---------- 940 deltas : list 941 List of fluctuations 942 idx : list 943 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 944 shape : int 945 Number of configs in idx. 946 """ 947 if isinstance(idx, range): 948 return deltas 949 else: 950 ret = np.zeros(idx[-1] - idx[0] + 1) 951 for i in range(shape): 952 ret[idx[i] - idx[0]] = deltas[i] 953 return ret 954 955 956def _merge_idx(idl): 957 """Returns the union of all lists in idl as sorted list 958 959 Parameters 960 ---------- 961 idl : list 962 List of lists or ranges. 963 """ 964 965 # Use groupby to efficiently check whether all elements of idl are identical 966 try: 967 g = groupby(idl) 968 if next(g, True) and not next(g, False): 969 return idl[0] 970 except Exception: 971 pass 972 973 if np.all([type(idx) is range for idx in idl]): 974 if len(set([idx[0] for idx in idl])) == 1: 975 idstart = min([idx.start for idx in idl]) 976 idstop = max([idx.stop for idx in idl]) 977 idstep = min([idx.step for idx in idl]) 978 return range(idstart, idstop, idstep) 979 980 return sorted(set().union(*idl)) 981 982 983def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 984 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 985 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 986 common divisor of the step sizes is used as new step size. 987 988 Parameters 989 ---------- 990 deltas : list 991 List of fluctuations 992 idx : list 993 List or range of configs on which the deltas are defined. 994 Has to be a subset of new_idx and has to be sorted in ascending order. 995 shape : list 996 Number of configs in idx. 997 new_idx : list 998 List of configs that defines the new range, has to be sorted in ascending order. 999 """ 1000 1001 if type(idx) is range and type(new_idx) is range: 1002 if idx == new_idx: 1003 return deltas 1004 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1005 for i in range(shape): 1006 ret[idx[i] - new_idx[0]] = deltas[i] 1007 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1008 1009 1010def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1011 """Filter out all configurations with vanishing fluctuation such that they do not 1012 contribute to the error estimate anymore. Returns the new deltas and 1013 idx according to the filtering. 1014 A fluctuation is considered to be vanishing, if it is smaller than eps times 1015 the mean of the absolute values of all deltas in one list. 1016 1017 Parameters 1018 ---------- 1019 deltas : list 1020 List of fluctuations 1021 idx : list 1022 List or ranges of configs on which the deltas are defined. 1023 eps : float 1024 Prefactor that enters the filter criterion. 1025 """ 1026 new_deltas = [] 1027 new_idx = [] 1028 maxd = np.mean(np.fabs(deltas)) 1029 for i in range(len(deltas)): 1030 if abs(deltas[i]) > eps * maxd: 1031 new_deltas.append(deltas[i]) 1032 new_idx.append(idx[i]) 1033 if new_idx: 1034 return np.array(new_deltas), new_idx 1035 else: 1036 return deltas, idx 1037 1038 1039def derived_observable(func, data, array_mode=False, **kwargs): 1040 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1041 1042 Parameters 1043 ---------- 1044 func : object 1045 arbitrary function of the form func(data, **kwargs). For the 1046 automatic differentiation to work, all numpy functions have to have 1047 the autograd wrapper (use 'import autograd.numpy as anp'). 1048 data : list 1049 list of Obs, e.g. [obs1, obs2, obs3]. 1050 num_grad : bool 1051 if True, numerical derivatives are used instead of autograd 1052 (default False). To control the numerical differentiation the 1053 kwargs of numdifftools.step_generators.MaxStepGenerator 1054 can be used. 1055 man_grad : list 1056 manually supply a list or an array which contains the jacobian 1057 of func. Use cautiously, supplying the wrong derivative will 1058 not be intercepted. 1059 1060 Notes 1061 ----- 1062 For simple mathematical operations it can be practical to use anonymous 1063 functions. For the ratio of two observables one can e.g. use 1064 1065 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1066 """ 1067 1068 data = np.asarray(data) 1069 raveled_data = data.ravel() 1070 1071 # Workaround for matrix operations containing non Obs data 1072 if not all(isinstance(x, Obs) for x in raveled_data): 1073 for i in range(len(raveled_data)): 1074 if isinstance(raveled_data[i], (int, float)): 1075 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1076 1077 allcov = {} 1078 for o in raveled_data: 1079 for name in o.cov_names: 1080 if name in allcov: 1081 if not np.allclose(allcov[name], o.covobs[name].cov): 1082 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1083 else: 1084 allcov[name] = o.covobs[name].cov 1085 1086 n_obs = len(raveled_data) 1087 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1088 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1089 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1090 1091 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1092 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1093 1094 if data.ndim == 1: 1095 values = np.array([o.value for o in data]) 1096 else: 1097 values = np.vectorize(lambda x: x.value)(data) 1098 1099 new_values = func(values, **kwargs) 1100 1101 multi = int(isinstance(new_values, np.ndarray)) 1102 1103 new_r_values = {} 1104 new_idl_d = {} 1105 for name in new_sample_names: 1106 idl = [] 1107 tmp_values = np.zeros(n_obs) 1108 for i, item in enumerate(raveled_data): 1109 tmp_values[i] = item.r_values.get(name, item.value) 1110 tmp_idl = item.idl.get(name) 1111 if tmp_idl is not None: 1112 idl.append(tmp_idl) 1113 if multi > 0: 1114 tmp_values = np.array(tmp_values).reshape(data.shape) 1115 new_r_values[name] = func(tmp_values, **kwargs) 1116 new_idl_d[name] = _merge_idx(idl) 1117 if not is_merged[name]: 1118 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1119 1120 if 'man_grad' in kwargs: 1121 deriv = np.asarray(kwargs.get('man_grad')) 1122 if new_values.shape + data.shape != deriv.shape: 1123 raise Exception('Manual derivative does not have correct shape.') 1124 elif kwargs.get('num_grad') is True: 1125 if multi > 0: 1126 raise Exception('Multi mode currently not supported for numerical derivative') 1127 options = { 1128 'base_step': 0.1, 1129 'step_ratio': 2.5} 1130 for key in options.keys(): 1131 kwarg = kwargs.get(key) 1132 if kwarg is not None: 1133 options[key] = kwarg 1134 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1135 if tmp_df.size == 1: 1136 deriv = np.array([tmp_df.real]) 1137 else: 1138 deriv = tmp_df.real 1139 else: 1140 deriv = jacobian(func)(values, **kwargs) 1141 1142 final_result = np.zeros(new_values.shape, dtype=object) 1143 1144 if array_mode is True: 1145 1146 class _Zero_grad(): 1147 def __init__(self, N): 1148 self.grad = np.zeros((N, 1)) 1149 1150 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1151 d_extracted = {} 1152 g_extracted = {} 1153 for name in new_sample_names: 1154 d_extracted[name] = [] 1155 ens_length = len(new_idl_d[name]) 1156 for i_dat, dat in enumerate(data): 1157 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1158 for name in new_cov_names: 1159 g_extracted[name] = [] 1160 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1161 for i_dat, dat in enumerate(data): 1162 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1163 1164 for i_val, new_val in np.ndenumerate(new_values): 1165 new_deltas = {} 1166 new_grad = {} 1167 if array_mode is True: 1168 for name in new_sample_names: 1169 ens_length = d_extracted[name][0].shape[-1] 1170 new_deltas[name] = np.zeros(ens_length) 1171 for i_dat, dat in enumerate(d_extracted[name]): 1172 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1173 for name in new_cov_names: 1174 new_grad[name] = 0 1175 for i_dat, dat in enumerate(g_extracted[name]): 1176 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1177 else: 1178 for j_obs, obs in np.ndenumerate(data): 1179 for name in obs.names: 1180 if name in obs.cov_names: 1181 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1182 else: 1183 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1184 1185 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1186 1187 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1188 raise Exception('The same name has been used for deltas and covobs!') 1189 new_samples = [] 1190 new_means = [] 1191 new_idl = [] 1192 new_names_obs = [] 1193 for name in new_names: 1194 if name not in new_covobs: 1195 if is_merged[name]: 1196 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1197 else: 1198 filtered_deltas = new_deltas[name] 1199 filtered_idl_d = new_idl_d[name] 1200 1201 new_samples.append(filtered_deltas) 1202 new_idl.append(filtered_idl_d) 1203 new_means.append(new_r_values[name][i_val]) 1204 new_names_obs.append(name) 1205 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1206 for name in new_covobs: 1207 final_result[i_val].names.append(name) 1208 final_result[i_val]._covobs = new_covobs 1209 final_result[i_val]._value = new_val 1210 final_result[i_val].is_merged = is_merged 1211 final_result[i_val].reweighted = reweighted 1212 1213 if multi == 0: 1214 final_result = final_result.item() 1215 1216 return final_result 1217 1218 1219def _reduce_deltas(deltas, idx_old, idx_new): 1220 """Extract deltas defined on idx_old on all configs of idx_new. 1221 1222 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1223 are ordered in an ascending order. 1224 1225 Parameters 1226 ---------- 1227 deltas : list 1228 List of fluctuations 1229 idx_old : list 1230 List or range of configs on which the deltas are defined 1231 idx_new : list 1232 List of configs for which we want to extract the deltas. 1233 Has to be a subset of idx_old. 1234 """ 1235 if not len(deltas) == len(idx_old): 1236 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1237 if type(idx_old) is range and type(idx_new) is range: 1238 if idx_old == idx_new: 1239 return deltas 1240 shape = len(idx_new) 1241 ret = np.zeros(shape) 1242 oldpos = 0 1243 for i in range(shape): 1244 pos = -1 1245 for j in range(oldpos, len(idx_old)): 1246 if idx_old[j] == idx_new[i]: 1247 pos = j 1248 break 1249 if pos < 0: 1250 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1251 ret[i] = deltas[pos] 1252 oldpos = pos 1253 return np.array(ret) 1254 1255 1256def reweight(weight, obs, **kwargs): 1257 """Reweight a list of observables. 1258 1259 Parameters 1260 ---------- 1261 weight : Obs 1262 Reweighting factor. An Observable that has to be defined on a superset of the 1263 configurations in obs[i].idl for all i. 1264 obs : list 1265 list of Obs, e.g. [obs1, obs2, obs3]. 1266 all_configs : bool 1267 if True, the reweighted observables are normalized by the average of 1268 the reweighting factor on all configurations in weight.idl and not 1269 on the configurations in obs[i].idl. 1270 """ 1271 result = [] 1272 for i in range(len(obs)): 1273 if len(obs[i].cov_names): 1274 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1275 if not set(obs[i].names).issubset(weight.names): 1276 raise Exception('Error: Ensembles do not fit') 1277 for name in obs[i].names: 1278 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1279 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1280 new_samples = [] 1281 w_deltas = {} 1282 for name in sorted(obs[i].names): 1283 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1284 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1285 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1286 1287 if kwargs.get('all_configs'): 1288 new_weight = weight 1289 else: 1290 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1291 1292 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1293 result[-1].reweighted = True 1294 result[-1].is_merged = obs[i].is_merged 1295 1296 return result 1297 1298 1299def correlate(obs_a, obs_b): 1300 """Correlate two observables. 1301 1302 Parameters 1303 ---------- 1304 obs_a : Obs 1305 First observable 1306 obs_b : Obs 1307 Second observable 1308 1309 Notes 1310 ----- 1311 Keep in mind to only correlate primary observables which have not been reweighted 1312 yet. The reweighting has to be applied after correlating the observables. 1313 Currently only works if ensembles are identical (this is not strictly necessary). 1314 """ 1315 1316 if sorted(obs_a.names) != sorted(obs_b.names): 1317 raise Exception('Ensembles do not fit') 1318 if len(obs_a.cov_names) or len(obs_b.cov_names): 1319 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1320 for name in obs_a.names: 1321 if obs_a.shape[name] != obs_b.shape[name]: 1322 raise Exception('Shapes of ensemble', name, 'do not fit') 1323 if obs_a.idl[name] != obs_b.idl[name]: 1324 raise Exception('idl of ensemble', name, 'do not fit') 1325 1326 if obs_a.reweighted is True: 1327 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1328 if obs_b.reweighted is True: 1329 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1330 1331 new_samples = [] 1332 new_idl = [] 1333 for name in sorted(obs_a.names): 1334 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1335 new_idl.append(obs_a.idl[name]) 1336 1337 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1338 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1339 o.reweighted = obs_a.reweighted or obs_b.reweighted 1340 return o 1341 1342 1343def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1344 r'''Calculates the covariance matrix of a set of observables. 1345 1346 The gamma method has to be applied first to all observables. 1347 1348 Parameters 1349 ---------- 1350 obs : list or numpy.ndarray 1351 List or one dimensional array of Obs 1352 visualize : bool 1353 If True plots the corresponding normalized correlation matrix (default False). 1354 correlation : bool 1355 If True the correlation instead of the covariance is returned (default False). 1356 smooth : None or int 1357 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1358 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1359 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1360 small ones. 1361 1362 Notes 1363 ----- 1364 The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1365 $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1366 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1367 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1368 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1369 ''' 1370 1371 length = len(obs) 1372 1373 max_samples = np.max([o.N for o in obs]) 1374 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1375 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1376 1377 cov = np.zeros((length, length)) 1378 for i in range(length): 1379 for j in range(i, length): 1380 cov[i, j] = _covariance_element(obs[i], obs[j]) 1381 cov = cov + cov.T - np.diag(np.diag(cov)) 1382 1383 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1384 1385 if isinstance(smooth, int): 1386 corr = _smooth_eigenvalues(corr, smooth) 1387 1388 errors = [o.dvalue for o in obs] 1389 cov = np.diag(errors) @ corr @ np.diag(errors) 1390 1391 eigenvalues = np.linalg.eigh(cov)[0] 1392 if not np.all(eigenvalues >= 0): 1393 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1394 1395 if visualize: 1396 plt.matshow(corr, vmin=-1, vmax=1) 1397 plt.set_cmap('RdBu') 1398 plt.colorbar() 1399 plt.draw() 1400 1401 if correlation is True: 1402 return corr 1403 else: 1404 return cov 1405 1406 1407def _smooth_eigenvalues(corr, E): 1408 """Eigenvalue smoothing as described in hep-lat/9412087 1409 1410 corr : np.ndarray 1411 correlation matrix 1412 E : integer 1413 Number of eigenvalues to be left substantially unchanged 1414 """ 1415 if not (2 < E < corr.shape[0] - 1): 1416 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1417 vals, vec = np.linalg.eigh(corr) 1418 lambda_min = np.mean(vals[:-E]) 1419 vals[vals < lambda_min] = lambda_min 1420 vals /= np.mean(vals) 1421 return vec @ np.diag(vals) @ vec.T 1422 1423 1424def _covariance_element(obs1, obs2): 1425 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1426 1427 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1428 deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1429 deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1430 return np.sum(deltas1 * deltas2) 1431 1432 if set(obs1.names).isdisjoint(set(obs2.names)): 1433 return 0.0 1434 1435 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1436 raise Exception('The gamma method has to be applied to both Obs first.') 1437 1438 dvalue = 0.0 1439 1440 for e_name in obs1.mc_names: 1441 1442 if e_name not in obs2.mc_names: 1443 continue 1444 1445 idl_d = {} 1446 for r_name in obs1.e_content[e_name]: 1447 if r_name not in obs2.e_content[e_name]: 1448 continue 1449 idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1450 1451 gamma = 0.0 1452 1453 for r_name in obs1.e_content[e_name]: 1454 if r_name not in obs2.e_content[e_name]: 1455 continue 1456 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1457 1458 if gamma == 0.0: 1459 continue 1460 1461 gamma_div = 0.0 1462 e_N = 0 1463 for r_name in obs1.e_content[e_name]: 1464 if r_name not in obs2.e_content[e_name]: 1465 continue 1466 gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1467 e_N += len(idl_d[r_name]) 1468 gamma /= max(gamma_div, 1.0) 1469 1470 # Bias correction hep-lat/0306017 eq. (49) 1471 dvalue += (1 + 1 / e_N) * gamma / e_N 1472 1473 for e_name in obs1.cov_names: 1474 1475 if e_name not in obs2.cov_names: 1476 continue 1477 1478 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1479 1480 return dvalue 1481 1482 1483def import_jackknife(jacks, name, idl=None): 1484 """Imports jackknife samples and returns an Obs 1485 1486 Parameters 1487 ---------- 1488 jacks : numpy.ndarray 1489 numpy array containing the mean value as zeroth entry and 1490 the N jackknife samples as first to Nth entry. 1491 name : str 1492 name of the ensemble the samples are defined on. 1493 """ 1494 length = len(jacks) - 1 1495 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1496 samples = jacks[1:] @ prj 1497 mean = np.mean(samples) 1498 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1499 new_obs._value = jacks[0] 1500 return new_obs 1501 1502 1503def merge_obs(list_of_obs): 1504 """Combine all observables in list_of_obs into one new observable 1505 1506 Parameters 1507 ---------- 1508 list_of_obs : list 1509 list of the Obs object to be combined 1510 1511 Notes 1512 ----- 1513 It is not possible to combine obs which are based on the same replicum 1514 """ 1515 replist = [item for obs in list_of_obs for item in obs.names] 1516 if (len(replist) == len(set(replist))) is False: 1517 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1518 if any([len(o.cov_names) for o in list_of_obs]): 1519 raise Exception('Not possible to merge data that contains covobs!') 1520 new_dict = {} 1521 idl_dict = {} 1522 for o in list_of_obs: 1523 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1524 for key in set(o.deltas) | set(o.r_values)}) 1525 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1526 1527 names = sorted(new_dict.keys()) 1528 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1529 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1530 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1531 return o 1532 1533 1534def cov_Obs(means, cov, name, grad=None): 1535 """Create an Obs based on mean(s) and a covariance matrix 1536 1537 Parameters 1538 ---------- 1539 mean : list of floats or float 1540 N mean value(s) of the new Obs 1541 cov : list or array 1542 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1543 name : str 1544 identifier for the covariance matrix 1545 grad : list or array 1546 Gradient of the Covobs wrt. the means belonging to cov. 1547 """ 1548 1549 def covobs_to_obs(co): 1550 """Make an Obs out of a Covobs 1551 1552 Parameters 1553 ---------- 1554 co : Covobs 1555 Covobs to be embedded into the Obs 1556 """ 1557 o = Obs([], [], means=[]) 1558 o._value = co.value 1559 o.names.append(co.name) 1560 o._covobs[co.name] = co 1561 o._dvalue = np.sqrt(co.errsq()) 1562 return o 1563 1564 ol = [] 1565 if isinstance(means, (float, int)): 1566 means = [means] 1567 1568 for i in range(len(means)): 1569 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1570 if ol[0].covobs[name].N != len(means): 1571 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1572 if len(ol) == 1: 1573 return ol[0] 1574 return ol
View Source
13class Obs: 14 """Class for a general observable. 15 16 Instances of Obs are the basic objects of a pyerrors error analysis. 17 They are initialized with a list which contains arrays of samples for 18 different ensembles/replica and another list of same length which contains 19 the names of the ensembles/replica. Mathematical operations can be 20 performed on instances. The result is another instance of Obs. The error of 21 an instance can be computed with the gamma_method. Also contains additional 22 methods for output and visualization of the error calculation. 23 24 Attributes 25 ---------- 26 S_global : float 27 Standard value for S (default 2.0) 28 S_dict : dict 29 Dictionary for S values. If an entry for a given ensemble 30 exists this overwrites the standard value for that ensemble. 31 tau_exp_global : float 32 Standard value for tau_exp (default 0.0) 33 tau_exp_dict : dict 34 Dictionary for tau_exp values. If an entry for a given ensemble exists 35 this overwrites the standard value for that ensemble. 36 N_sigma_global : float 37 Standard value for N_sigma (default 1.0) 38 N_sigma_dict : dict 39 Dictionary for N_sigma values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 """ 42 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 43 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 44 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 45 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 46 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 47 48 S_global = 2.0 49 S_dict = {} 50 tau_exp_global = 0.0 51 tau_exp_dict = {} 52 N_sigma_global = 1.0 53 N_sigma_dict = {} 54 filter_eps = 1e-10 55 56 def __init__(self, samples, names, idl=None, **kwargs): 57 """ Initialize Obs object. 58 59 Parameters 60 ---------- 61 samples : list 62 list of numpy arrays containing the Monte Carlo samples 63 names : list 64 list of strings labeling the individual samples 65 idl : list, optional 66 list of ranges or lists on which the samples are defined 67 """ 68 69 if kwargs.get("means") is None and len(samples): 70 if len(samples) != len(names): 71 raise Exception('Length of samples and names incompatible.') 72 if idl is not None: 73 if len(idl) != len(names): 74 raise Exception('Length of idl incompatible with samples and names.') 75 name_length = len(names) 76 if name_length > 1: 77 if name_length != len(set(names)): 78 raise Exception('names are not unique.') 79 if not all(isinstance(x, str) for x in names): 80 raise TypeError('All names have to be strings.') 81 else: 82 if not isinstance(names[0], str): 83 raise TypeError('All names have to be strings.') 84 if min(len(x) for x in samples) <= 4: 85 raise Exception('Samples have to have at least 5 entries.') 86 87 self.names = sorted(names) 88 self.shape = {} 89 self.r_values = {} 90 self.deltas = {} 91 self._covobs = {} 92 93 self._value = 0 94 self.N = 0 95 self.is_merged = {} 96 self.idl = {} 97 if idl is not None: 98 for name, idx in sorted(zip(names, idl)): 99 if isinstance(idx, range): 100 self.idl[name] = idx 101 elif isinstance(idx, (list, np.ndarray)): 102 dc = np.unique(np.diff(idx)) 103 if np.any(dc < 0): 104 raise Exception("Unsorted idx for idl[%s]" % (name)) 105 if len(dc) == 1: 106 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 107 else: 108 self.idl[name] = list(idx) 109 else: 110 raise Exception('incompatible type for idl[%s].' % (name)) 111 else: 112 for name, sample in sorted(zip(names, samples)): 113 self.idl[name] = range(1, len(sample) + 1) 114 115 if kwargs.get("means") is not None: 116 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 117 self.shape[name] = len(self.idl[name]) 118 self.N += self.shape[name] 119 self.r_values[name] = mean 120 self.deltas[name] = sample 121 else: 122 for name, sample in sorted(zip(names, samples)): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 if len(sample) != self.shape[name]: 126 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 127 self.r_values[name] = np.mean(sample) 128 self.deltas[name] = sample - self.r_values[name] 129 self._value += self.shape[name] * self.r_values[name] 130 self._value /= self.N 131 132 self._dvalue = 0.0 133 self.ddvalue = 0.0 134 self.reweighted = False 135 136 self.tag = None 137 138 @property 139 def value(self): 140 return self._value 141 142 @property 143 def dvalue(self): 144 return self._dvalue 145 146 @property 147 def e_names(self): 148 return sorted(set([o.split('|')[0] for o in self.names])) 149 150 @property 151 def cov_names(self): 152 return sorted(set([o for o in self.covobs.keys()])) 153 154 @property 155 def mc_names(self): 156 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 157 158 @property 159 def e_content(self): 160 res = {} 161 for e, e_name in enumerate(self.e_names): 162 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 163 if e_name in self.names: 164 res[e_name].append(e_name) 165 return res 166 167 @property 168 def covobs(self): 169 return self._covobs 170 171 def gamma_method(self, **kwargs): 172 """Estimate the error and related properties of the Obs. 173 174 Parameters 175 ---------- 176 S : float 177 specifies a custom value for the parameter S (default 2.0). 178 If set to 0 it is assumed that the data exhibits no 179 autocorrelation. In this case the error estimates coincides 180 with the sample standard error. 181 tau_exp : float 182 positive value triggers the critical slowing down analysis 183 (default 0.0). 184 N_sigma : float 185 number of standard deviations from zero until the tail is 186 attached to the autocorrelation function (default 1). 187 fft : bool 188 determines whether the fft algorithm is used for the computation 189 of the autocorrelation function (default True) 190 """ 191 192 e_content = self.e_content 193 self.e_dvalue = {} 194 self.e_ddvalue = {} 195 self.e_tauint = {} 196 self.e_dtauint = {} 197 self.e_windowsize = {} 198 self.e_n_tauint = {} 199 self.e_n_dtauint = {} 200 e_gamma = {} 201 self.e_rho = {} 202 self.e_drho = {} 203 self._dvalue = 0 204 self.ddvalue = 0 205 206 self.S = {} 207 self.tau_exp = {} 208 self.N_sigma = {} 209 210 if kwargs.get('fft') is False: 211 fft = False 212 else: 213 fft = True 214 215 def _parse_kwarg(kwarg_name): 216 if kwarg_name in kwargs: 217 tmp = kwargs.get(kwarg_name) 218 if isinstance(tmp, (int, float)): 219 if tmp < 0: 220 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 221 for e, e_name in enumerate(self.e_names): 222 getattr(self, kwarg_name)[e_name] = tmp 223 else: 224 raise TypeError(kwarg_name + ' is not in proper format.') 225 else: 226 for e, e_name in enumerate(self.e_names): 227 if e_name in getattr(Obs, kwarg_name + '_dict'): 228 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 229 else: 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 231 232 _parse_kwarg('S') 233 _parse_kwarg('tau_exp') 234 _parse_kwarg('N_sigma') 235 236 for e, e_name in enumerate(self.mc_names): 237 r_length = [] 238 for r_name in e_content[e_name]: 239 if isinstance(self.idl[r_name], range): 240 r_length.append(len(self.idl[r_name])) 241 else: 242 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 243 244 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 245 w_max = max(r_length) // 2 246 e_gamma[e_name] = np.zeros(w_max) 247 self.e_rho[e_name] = np.zeros(w_max) 248 self.e_drho[e_name] = np.zeros(w_max) 249 250 for r_name in e_content[e_name]: 251 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 252 253 gamma_div = np.zeros(w_max) 254 for r_name in e_content[e_name]: 255 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 256 gamma_div[gamma_div < 1] = 1.0 257 e_gamma[e_name] /= gamma_div[:w_max] 258 259 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 260 self.e_tauint[e_name] = 0.5 261 self.e_dtauint[e_name] = 0.0 262 self.e_dvalue[e_name] = 0.0 263 self.e_ddvalue[e_name] = 0.0 264 self.e_windowsize[e_name] = 0 265 continue 266 267 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 268 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 269 # Make sure no entry of tauint is smaller than 0.5 270 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 271 # hep-lat/0306017 eq. (42) 272 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 273 self.e_n_dtauint[e_name][0] = 0.0 274 275 def _compute_drho(i): 276 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 277 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 278 279 _compute_drho(1) 280 if self.tau_exp[e_name] > 0: 281 texp = self.tau_exp[e_name] 282 # Critical slowing down analysis 283 if w_max // 2 <= 1: 284 raise Exception("Need at least 8 samples for tau_exp error analysis") 285 for n in range(1, w_max // 2): 286 _compute_drho(n + 1) 287 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 288 # Bias correction hep-lat/0306017 eq. (49) included 289 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 290 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 291 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 292 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 293 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 294 self.e_windowsize[e_name] = n 295 break 296 else: 297 if self.S[e_name] == 0.0: 298 self.e_tauint[e_name] = 0.5 299 self.e_dtauint[e_name] = 0.0 300 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 302 self.e_windowsize[e_name] = 0 303 else: 304 # Standard automatic windowing procedure 305 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 306 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 307 for n in range(1, w_max): 308 if n < w_max // 2 - 2: 309 _compute_drho(n + 1) 310 if g_w[n - 1] < 0 or n >= w_max - 1: 311 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 312 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 313 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 315 self.e_windowsize[e_name] = n 316 break 317 318 self._dvalue += self.e_dvalue[e_name] ** 2 319 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 320 321 for e_name in self.cov_names: 322 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 323 self.e_ddvalue[e_name] = 0 324 self._dvalue += self.e_dvalue[e_name]**2 325 326 self._dvalue = np.sqrt(self._dvalue) 327 if self._dvalue == 0.0: 328 self.ddvalue = 0.0 329 else: 330 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 331 return 332 333 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 334 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 335 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 336 337 Parameters 338 ---------- 339 deltas : list 340 List of fluctuations 341 idx : list 342 List or range of configurations on which the deltas are defined. 343 shape : int 344 Number of configurations in idx. 345 w_max : int 346 Upper bound for the summation window. 347 fft : bool 348 determines whether the fft algorithm is used for the computation 349 of the autocorrelation function. 350 """ 351 gamma = np.zeros(w_max) 352 deltas = _expand_deltas(deltas, idx, shape) 353 new_shape = len(deltas) 354 if fft: 355 max_gamma = min(new_shape, w_max) 356 # The padding for the fft has to be even 357 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 358 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 359 else: 360 for n in range(w_max): 361 if new_shape - n >= 0: 362 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 363 364 return gamma 365 366 def details(self, ens_content=True): 367 """Output detailed properties of the Obs. 368 369 Parameters 370 ---------- 371 ens_content : bool 372 print details about the ensembles and replica if true. 373 """ 374 if self.tag is not None: 375 print("Description:", self.tag) 376 if not hasattr(self, 'e_dvalue'): 377 print('Result\t %3.8e' % (self.value)) 378 else: 379 if self.value == 0.0: 380 percentage = np.nan 381 else: 382 percentage = np.abs(self._dvalue / self.value) * 100 383 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 384 if len(self.e_names) > 1: 385 print(' Ensemble errors:') 386 for e_name in self.mc_names: 387 if len(self.e_names) > 1: 388 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 389 if self.tau_exp[e_name] > 0: 390 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 391 else: 392 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 393 for e_name in self.cov_names: 394 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 395 if ens_content is True: 396 if len(self.e_names) == 1: 397 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 398 else: 399 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 400 my_string_list = [] 401 for key, value in sorted(self.e_content.items()): 402 if key not in self.covobs: 403 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 404 if len(value) == 1: 405 my_string += f': {self.shape[value[0]]} configurations' 406 if isinstance(self.idl[value[0]], range): 407 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 408 else: 409 my_string += ' (irregular range)' 410 else: 411 sublist = [] 412 for v in value: 413 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 414 my_substring += f': {self.shape[v]} configurations' 415 if isinstance(self.idl[v], range): 416 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 417 else: 418 my_substring += ' (irregular range)' 419 sublist.append(my_substring) 420 421 my_string += '\n' + '\n'.join(sublist) 422 else: 423 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 424 my_string_list.append(my_string) 425 print('\n'.join(my_string_list)) 426 427 def is_zero_within_error(self, sigma=1): 428 """Checks whether the observable is zero within 'sigma' standard errors. 429 430 Parameters 431 ---------- 432 sigma : int 433 Number of standard errors used for the check. 434 435 Works only properly when the gamma method was run. 436 """ 437 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 438 439 def is_zero(self, atol=1e-10): 440 """Checks whether the observable is zero within a given tolerance. 441 442 Parameters 443 ---------- 444 atol : float 445 Absolute tolerance (for details see numpy documentation). 446 """ 447 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 448 449 def plot_tauint(self, save=None): 450 """Plot integrated autocorrelation time for each ensemble. 451 452 Parameters 453 ---------- 454 save : str 455 saves the figure to a file named 'save' if. 456 """ 457 if not hasattr(self, 'e_dvalue'): 458 raise Exception('Run the gamma method first.') 459 460 for e, e_name in enumerate(self.mc_names): 461 fig = plt.figure() 462 plt.xlabel(r'$W$') 463 plt.ylabel(r'$\tau_\mathrm{int}$') 464 length = int(len(self.e_n_tauint[e_name])) 465 if self.tau_exp[e_name] > 0: 466 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 467 x_help = np.arange(2 * self.tau_exp[e_name]) 468 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 469 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 470 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 471 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 472 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 473 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 474 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 475 else: 476 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 477 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 478 479 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 480 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 481 plt.legend() 482 plt.xlim(-0.5, xmax) 483 ylim = plt.ylim() 484 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 485 plt.draw() 486 if save: 487 fig.savefig(save + "_" + str(e)) 488 489 def plot_rho(self, save=None): 490 """Plot normalized autocorrelation function time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel('W') 502 plt.ylabel('rho') 503 length = int(len(self.e_drho[e_name])) 504 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 505 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 506 if self.tau_exp[e_name] > 0: 507 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 508 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 511 else: 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 514 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 515 plt.xlim(-0.5, xmax) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e)) 519 520 def plot_rep_dist(self): 521 """Plot replica distribution for each ensemble with more than one replicum.""" 522 if not hasattr(self, 'e_dvalue'): 523 raise Exception('Run the gamma method first.') 524 for e, e_name in enumerate(self.mc_names): 525 if len(self.e_content[e_name]) == 1: 526 print('No replica distribution for a single replicum (', e_name, ')') 527 continue 528 r_length = [] 529 sub_r_mean = 0 530 for r, r_name in enumerate(self.e_content[e_name]): 531 r_length.append(len(self.deltas[r_name])) 532 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 533 e_N = np.sum(r_length) 534 sub_r_mean /= e_N 535 arr = np.zeros(len(self.e_content[e_name])) 536 for r, r_name in enumerate(self.e_content[e_name]): 537 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 538 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 539 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 540 plt.draw() 541 542 def plot_history(self, expand=True): 543 """Plot derived Monte Carlo history for each ensemble 544 545 Parameters 546 ---------- 547 expand : bool 548 show expanded history for irregular Monte Carlo chains (default: True). 549 """ 550 for e, e_name in enumerate(self.mc_names): 551 plt.figure() 552 r_length = [] 553 tmp = [] 554 tmp_expanded = [] 555 for r, r_name in enumerate(self.e_content[e_name]): 556 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 557 if expand: 558 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 559 r_length.append(len(tmp_expanded[-1])) 560 else: 561 r_length.append(len(tmp[-1])) 562 e_N = np.sum(r_length) 563 x = np.arange(e_N) 564 y_test = np.concatenate(tmp, axis=0) 565 if expand: 566 y = np.concatenate(tmp_expanded, axis=0) 567 else: 568 y = y_test 569 plt.errorbar(x, y, fmt='.', markersize=3) 570 plt.xlim(-0.5, e_N - 0.5) 571 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 572 plt.draw() 573 574 def plot_piechart(self, save=None): 575 """Plot piechart which shows the fractional contribution of each 576 ensemble to the error and returns a dictionary containing the fractions. 577 578 Parameters 579 ---------- 580 save : str 581 saves the figure to a file named 'save' if. 582 """ 583 if not hasattr(self, 'e_dvalue'): 584 raise Exception('Run the gamma method first.') 585 if np.isclose(0.0, self._dvalue, atol=1e-15): 586 raise Exception('Error is 0.0') 587 labels = self.e_names 588 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 589 fig1, ax1 = plt.subplots() 590 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 591 ax1.axis('equal') 592 plt.draw() 593 if save: 594 fig1.savefig(save) 595 596 return dict(zip(self.e_names, sizes)) 597 598 def dump(self, filename, datatype="json.gz", description="", **kwargs): 599 """Dump the Obs to a file 'name' of chosen format. 600 601 Parameters 602 ---------- 603 filename : str 604 name of the file to be saved. 605 datatype : str 606 Format of the exported file. Supported formats include 607 "json.gz" and "pickle" 608 description : str 609 Description for output file, only relevant for json.gz format. 610 path : str 611 specifies a custom path for the file (default '.') 612 """ 613 if 'path' in kwargs: 614 file_name = kwargs.get('path') + '/' + filename 615 else: 616 file_name = filename 617 618 if datatype == "json.gz": 619 from .input.json import dump_to_json 620 dump_to_json([self], file_name, description=description) 621 elif datatype == "pickle": 622 with open(file_name + '.p', 'wb') as fb: 623 pickle.dump(self, fb) 624 else: 625 raise Exception("Unknown datatype " + str(datatype)) 626 627 def export_jackknife(self): 628 """Export jackknife samples from the Obs 629 630 Returns 631 ------- 632 numpy.ndarray 633 Returns a numpy array of length N + 1 where N is the number of samples 634 for the given ensemble and replicum. The zeroth entry of the array contains 635 the mean value of the Obs, entries 1 to N contain the N jackknife samples 636 derived from the Obs. The current implementation only works for observables 637 defined on exactly one ensemble and replicum. The derived jackknife samples 638 should agree with samples from a full jackknife analysis up to O(1/N). 639 """ 640 641 if len(self.names) != 1: 642 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 643 644 name = self.names[0] 645 full_data = self.deltas[name] + self.r_values[name] 646 n = full_data.size 647 mean = self.value 648 tmp_jacks = np.zeros(n + 1) 649 tmp_jacks[0] = mean 650 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 651 return tmp_jacks 652 653 def __float__(self): 654 return float(self.value) 655 656 def __repr__(self): 657 return 'Obs[' + str(self) + ']' 658 659 def __str__(self): 660 if self._dvalue == 0.0: 661 return str(self.value) 662 fexp = np.floor(np.log10(self._dvalue)) 663 if fexp < 0.0: 664 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 665 elif fexp == 0.0: 666 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 667 else: 668 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 669 670 # Overload comparisons 671 def __lt__(self, other): 672 return self.value < other 673 674 def __le__(self, other): 675 return self.value <= other 676 677 def __gt__(self, other): 678 return self.value > other 679 680 def __ge__(self, other): 681 return self.value >= other 682 683 def __eq__(self, other): 684 return (self - other).is_zero() 685 686 def __ne__(self, other): 687 return not (self - other).is_zero() 688 689 # Overload math operations 690 def __add__(self, y): 691 if isinstance(y, Obs): 692 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 693 else: 694 if isinstance(y, np.ndarray): 695 return np.array([self + o for o in y]) 696 elif y.__class__.__name__ in ['Corr', 'CObs']: 697 return NotImplemented 698 else: 699 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 700 701 def __radd__(self, y): 702 return self + y 703 704 def __mul__(self, y): 705 if isinstance(y, Obs): 706 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 707 else: 708 if isinstance(y, np.ndarray): 709 return np.array([self * o for o in y]) 710 elif isinstance(y, complex): 711 return CObs(self * y.real, self * y.imag) 712 elif y.__class__.__name__ in ['Corr', 'CObs']: 713 return NotImplemented 714 else: 715 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 716 717 def __rmul__(self, y): 718 return self * y 719 720 def __sub__(self, y): 721 if isinstance(y, Obs): 722 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 723 else: 724 if isinstance(y, np.ndarray): 725 return np.array([self - o for o in y]) 726 elif y.__class__.__name__ in ['Corr', 'CObs']: 727 return NotImplemented 728 else: 729 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 730 731 def __rsub__(self, y): 732 return -1 * (self - y) 733 734 def __pos__(self): 735 return self 736 737 def __neg__(self): 738 return -1 * self 739 740 def __truediv__(self, y): 741 if isinstance(y, Obs): 742 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 743 else: 744 if isinstance(y, np.ndarray): 745 return np.array([self / o for o in y]) 746 elif y.__class__.__name__ in ['Corr', 'CObs']: 747 return NotImplemented 748 else: 749 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 750 751 def __rtruediv__(self, y): 752 if isinstance(y, Obs): 753 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 754 else: 755 if isinstance(y, np.ndarray): 756 return np.array([o / self for o in y]) 757 elif y.__class__.__name__ in ['Corr', 'CObs']: 758 return NotImplemented 759 else: 760 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 761 762 def __pow__(self, y): 763 if isinstance(y, Obs): 764 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 765 else: 766 return derived_observable(lambda x: x[0] ** y, [self]) 767 768 def __rpow__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 771 else: 772 return derived_observable(lambda x: y ** x[0], [self]) 773 774 def __abs__(self): 775 return derived_observable(lambda x: anp.abs(x[0]), [self]) 776 777 # Overload numpy functions 778 def sqrt(self): 779 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 780 781 def log(self): 782 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 783 784 def exp(self): 785 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 786 787 def sin(self): 788 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 789 790 def cos(self): 791 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 792 793 def tan(self): 794 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 795 796 def arcsin(self): 797 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 798 799 def arccos(self): 800 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 801 802 def arctan(self): 803 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 804 805 def sinh(self): 806 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 807 808 def cosh(self): 809 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 810 811 def tanh(self): 812 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 813 814 def arcsinh(self): 815 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 816 817 def arccosh(self): 818 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 819 820 def arctanh(self): 821 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
View Source
56 def __init__(self, samples, names, idl=None, **kwargs): 57 """ Initialize Obs object. 58 59 Parameters 60 ---------- 61 samples : list 62 list of numpy arrays containing the Monte Carlo samples 63 names : list 64 list of strings labeling the individual samples 65 idl : list, optional 66 list of ranges or lists on which the samples are defined 67 """ 68 69 if kwargs.get("means") is None and len(samples): 70 if len(samples) != len(names): 71 raise Exception('Length of samples and names incompatible.') 72 if idl is not None: 73 if len(idl) != len(names): 74 raise Exception('Length of idl incompatible with samples and names.') 75 name_length = len(names) 76 if name_length > 1: 77 if name_length != len(set(names)): 78 raise Exception('names are not unique.') 79 if not all(isinstance(x, str) for x in names): 80 raise TypeError('All names have to be strings.') 81 else: 82 if not isinstance(names[0], str): 83 raise TypeError('All names have to be strings.') 84 if min(len(x) for x in samples) <= 4: 85 raise Exception('Samples have to have at least 5 entries.') 86 87 self.names = sorted(names) 88 self.shape = {} 89 self.r_values = {} 90 self.deltas = {} 91 self._covobs = {} 92 93 self._value = 0 94 self.N = 0 95 self.is_merged = {} 96 self.idl = {} 97 if idl is not None: 98 for name, idx in sorted(zip(names, idl)): 99 if isinstance(idx, range): 100 self.idl[name] = idx 101 elif isinstance(idx, (list, np.ndarray)): 102 dc = np.unique(np.diff(idx)) 103 if np.any(dc < 0): 104 raise Exception("Unsorted idx for idl[%s]" % (name)) 105 if len(dc) == 1: 106 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 107 else: 108 self.idl[name] = list(idx) 109 else: 110 raise Exception('incompatible type for idl[%s].' % (name)) 111 else: 112 for name, sample in sorted(zip(names, samples)): 113 self.idl[name] = range(1, len(sample) + 1) 114 115 if kwargs.get("means") is not None: 116 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 117 self.shape[name] = len(self.idl[name]) 118 self.N += self.shape[name] 119 self.r_values[name] = mean 120 self.deltas[name] = sample 121 else: 122 for name, sample in sorted(zip(names, samples)): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 if len(sample) != self.shape[name]: 126 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 127 self.r_values[name] = np.mean(sample) 128 self.deltas[name] = sample - self.r_values[name] 129 self._value += self.shape[name] * self.r_values[name] 130 self._value /= self.N 131 132 self._dvalue = 0.0 133 self.ddvalue = 0.0 134 self.reweighted = False 135 136 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
View Source
171 def gamma_method(self, **kwargs): 172 """Estimate the error and related properties of the Obs. 173 174 Parameters 175 ---------- 176 S : float 177 specifies a custom value for the parameter S (default 2.0). 178 If set to 0 it is assumed that the data exhibits no 179 autocorrelation. In this case the error estimates coincides 180 with the sample standard error. 181 tau_exp : float 182 positive value triggers the critical slowing down analysis 183 (default 0.0). 184 N_sigma : float 185 number of standard deviations from zero until the tail is 186 attached to the autocorrelation function (default 1). 187 fft : bool 188 determines whether the fft algorithm is used for the computation 189 of the autocorrelation function (default True) 190 """ 191 192 e_content = self.e_content 193 self.e_dvalue = {} 194 self.e_ddvalue = {} 195 self.e_tauint = {} 196 self.e_dtauint = {} 197 self.e_windowsize = {} 198 self.e_n_tauint = {} 199 self.e_n_dtauint = {} 200 e_gamma = {} 201 self.e_rho = {} 202 self.e_drho = {} 203 self._dvalue = 0 204 self.ddvalue = 0 205 206 self.S = {} 207 self.tau_exp = {} 208 self.N_sigma = {} 209 210 if kwargs.get('fft') is False: 211 fft = False 212 else: 213 fft = True 214 215 def _parse_kwarg(kwarg_name): 216 if kwarg_name in kwargs: 217 tmp = kwargs.get(kwarg_name) 218 if isinstance(tmp, (int, float)): 219 if tmp < 0: 220 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 221 for e, e_name in enumerate(self.e_names): 222 getattr(self, kwarg_name)[e_name] = tmp 223 else: 224 raise TypeError(kwarg_name + ' is not in proper format.') 225 else: 226 for e, e_name in enumerate(self.e_names): 227 if e_name in getattr(Obs, kwarg_name + '_dict'): 228 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 229 else: 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 231 232 _parse_kwarg('S') 233 _parse_kwarg('tau_exp') 234 _parse_kwarg('N_sigma') 235 236 for e, e_name in enumerate(self.mc_names): 237 r_length = [] 238 for r_name in e_content[e_name]: 239 if isinstance(self.idl[r_name], range): 240 r_length.append(len(self.idl[r_name])) 241 else: 242 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 243 244 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 245 w_max = max(r_length) // 2 246 e_gamma[e_name] = np.zeros(w_max) 247 self.e_rho[e_name] = np.zeros(w_max) 248 self.e_drho[e_name] = np.zeros(w_max) 249 250 for r_name in e_content[e_name]: 251 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 252 253 gamma_div = np.zeros(w_max) 254 for r_name in e_content[e_name]: 255 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 256 gamma_div[gamma_div < 1] = 1.0 257 e_gamma[e_name] /= gamma_div[:w_max] 258 259 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 260 self.e_tauint[e_name] = 0.5 261 self.e_dtauint[e_name] = 0.0 262 self.e_dvalue[e_name] = 0.0 263 self.e_ddvalue[e_name] = 0.0 264 self.e_windowsize[e_name] = 0 265 continue 266 267 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 268 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 269 # Make sure no entry of tauint is smaller than 0.5 270 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 271 # hep-lat/0306017 eq. (42) 272 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 273 self.e_n_dtauint[e_name][0] = 0.0 274 275 def _compute_drho(i): 276 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 277 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 278 279 _compute_drho(1) 280 if self.tau_exp[e_name] > 0: 281 texp = self.tau_exp[e_name] 282 # Critical slowing down analysis 283 if w_max // 2 <= 1: 284 raise Exception("Need at least 8 samples for tau_exp error analysis") 285 for n in range(1, w_max // 2): 286 _compute_drho(n + 1) 287 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 288 # Bias correction hep-lat/0306017 eq. (49) included 289 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 290 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 291 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 292 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 293 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 294 self.e_windowsize[e_name] = n 295 break 296 else: 297 if self.S[e_name] == 0.0: 298 self.e_tauint[e_name] = 0.5 299 self.e_dtauint[e_name] = 0.0 300 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 302 self.e_windowsize[e_name] = 0 303 else: 304 # Standard automatic windowing procedure 305 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 306 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 307 for n in range(1, w_max): 308 if n < w_max // 2 - 2: 309 _compute_drho(n + 1) 310 if g_w[n - 1] < 0 or n >= w_max - 1: 311 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 312 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 313 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 315 self.e_windowsize[e_name] = n 316 break 317 318 self._dvalue += self.e_dvalue[e_name] ** 2 319 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 320 321 for e_name in self.cov_names: 322 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 323 self.e_ddvalue[e_name] = 0 324 self._dvalue += self.e_dvalue[e_name]**2 325 326 self._dvalue = np.sqrt(self._dvalue) 327 if self._dvalue == 0.0: 328 self.ddvalue = 0.0 329 else: 330 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 331 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
View Source
366 def details(self, ens_content=True): 367 """Output detailed properties of the Obs. 368 369 Parameters 370 ---------- 371 ens_content : bool 372 print details about the ensembles and replica if true. 373 """ 374 if self.tag is not None: 375 print("Description:", self.tag) 376 if not hasattr(self, 'e_dvalue'): 377 print('Result\t %3.8e' % (self.value)) 378 else: 379 if self.value == 0.0: 380 percentage = np.nan 381 else: 382 percentage = np.abs(self._dvalue / self.value) * 100 383 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 384 if len(self.e_names) > 1: 385 print(' Ensemble errors:') 386 for e_name in self.mc_names: 387 if len(self.e_names) > 1: 388 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 389 if self.tau_exp[e_name] > 0: 390 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 391 else: 392 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 393 for e_name in self.cov_names: 394 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 395 if ens_content is True: 396 if len(self.e_names) == 1: 397 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 398 else: 399 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 400 my_string_list = [] 401 for key, value in sorted(self.e_content.items()): 402 if key not in self.covobs: 403 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 404 if len(value) == 1: 405 my_string += f': {self.shape[value[0]]} configurations' 406 if isinstance(self.idl[value[0]], range): 407 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 408 else: 409 my_string += ' (irregular range)' 410 else: 411 sublist = [] 412 for v in value: 413 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 414 my_substring += f': {self.shape[v]} configurations' 415 if isinstance(self.idl[v], range): 416 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 417 else: 418 my_substring += ' (irregular range)' 419 sublist.append(my_substring) 420 421 my_string += '\n' + '\n'.join(sublist) 422 else: 423 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 424 my_string_list.append(my_string) 425 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
View Source
427 def is_zero_within_error(self, sigma=1): 428 """Checks whether the observable is zero within 'sigma' standard errors. 429 430 Parameters 431 ---------- 432 sigma : int 433 Number of standard errors used for the check. 434 435 Works only properly when the gamma method was run. 436 """ 437 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
View Source
439 def is_zero(self, atol=1e-10): 440 """Checks whether the observable is zero within a given tolerance. 441 442 Parameters 443 ---------- 444 atol : float 445 Absolute tolerance (for details see numpy documentation). 446 """ 447 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
View Source
449 def plot_tauint(self, save=None): 450 """Plot integrated autocorrelation time for each ensemble. 451 452 Parameters 453 ---------- 454 save : str 455 saves the figure to a file named 'save' if. 456 """ 457 if not hasattr(self, 'e_dvalue'): 458 raise Exception('Run the gamma method first.') 459 460 for e, e_name in enumerate(self.mc_names): 461 fig = plt.figure() 462 plt.xlabel(r'$W$') 463 plt.ylabel(r'$\tau_\mathrm{int}$') 464 length = int(len(self.e_n_tauint[e_name])) 465 if self.tau_exp[e_name] > 0: 466 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 467 x_help = np.arange(2 * self.tau_exp[e_name]) 468 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 469 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 470 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 471 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 472 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 473 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 474 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 475 else: 476 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 477 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 478 479 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 480 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 481 plt.legend() 482 plt.xlim(-0.5, xmax) 483 ylim = plt.ylim() 484 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 485 plt.draw() 486 if save: 487 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
View Source
489 def plot_rho(self, save=None): 490 """Plot normalized autocorrelation function time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel('W') 502 plt.ylabel('rho') 503 length = int(len(self.e_drho[e_name])) 504 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 505 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 506 if self.tau_exp[e_name] > 0: 507 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 508 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 511 else: 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 514 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 515 plt.xlim(-0.5, xmax) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
View Source
520 def plot_rep_dist(self): 521 """Plot replica distribution for each ensemble with more than one replicum.""" 522 if not hasattr(self, 'e_dvalue'): 523 raise Exception('Run the gamma method first.') 524 for e, e_name in enumerate(self.mc_names): 525 if len(self.e_content[e_name]) == 1: 526 print('No replica distribution for a single replicum (', e_name, ')') 527 continue 528 r_length = [] 529 sub_r_mean = 0 530 for r, r_name in enumerate(self.e_content[e_name]): 531 r_length.append(len(self.deltas[r_name])) 532 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 533 e_N = np.sum(r_length) 534 sub_r_mean /= e_N 535 arr = np.zeros(len(self.e_content[e_name])) 536 for r, r_name in enumerate(self.e_content[e_name]): 537 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 538 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 539 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 540 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
View Source
542 def plot_history(self, expand=True): 543 """Plot derived Monte Carlo history for each ensemble 544 545 Parameters 546 ---------- 547 expand : bool 548 show expanded history for irregular Monte Carlo chains (default: True). 549 """ 550 for e, e_name in enumerate(self.mc_names): 551 plt.figure() 552 r_length = [] 553 tmp = [] 554 tmp_expanded = [] 555 for r, r_name in enumerate(self.e_content[e_name]): 556 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 557 if expand: 558 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 559 r_length.append(len(tmp_expanded[-1])) 560 else: 561 r_length.append(len(tmp[-1])) 562 e_N = np.sum(r_length) 563 x = np.arange(e_N) 564 y_test = np.concatenate(tmp, axis=0) 565 if expand: 566 y = np.concatenate(tmp_expanded, axis=0) 567 else: 568 y = y_test 569 plt.errorbar(x, y, fmt='.', markersize=3) 570 plt.xlim(-0.5, e_N - 0.5) 571 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 572 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
View Source
574 def plot_piechart(self, save=None): 575 """Plot piechart which shows the fractional contribution of each 576 ensemble to the error and returns a dictionary containing the fractions. 577 578 Parameters 579 ---------- 580 save : str 581 saves the figure to a file named 'save' if. 582 """ 583 if not hasattr(self, 'e_dvalue'): 584 raise Exception('Run the gamma method first.') 585 if np.isclose(0.0, self._dvalue, atol=1e-15): 586 raise Exception('Error is 0.0') 587 labels = self.e_names 588 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 589 fig1, ax1 = plt.subplots() 590 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 591 ax1.axis('equal') 592 plt.draw() 593 if save: 594 fig1.savefig(save) 595 596 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
View Source
598 def dump(self, filename, datatype="json.gz", description="", **kwargs): 599 """Dump the Obs to a file 'name' of chosen format. 600 601 Parameters 602 ---------- 603 filename : str 604 name of the file to be saved. 605 datatype : str 606 Format of the exported file. Supported formats include 607 "json.gz" and "pickle" 608 description : str 609 Description for output file, only relevant for json.gz format. 610 path : str 611 specifies a custom path for the file (default '.') 612 """ 613 if 'path' in kwargs: 614 file_name = kwargs.get('path') + '/' + filename 615 else: 616 file_name = filename 617 618 if datatype == "json.gz": 619 from .input.json import dump_to_json 620 dump_to_json([self], file_name, description=description) 621 elif datatype == "pickle": 622 with open(file_name + '.p', 'wb') as fb: 623 pickle.dump(self, fb) 624 else: 625 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
View Source
627 def export_jackknife(self): 628 """Export jackknife samples from the Obs 629 630 Returns 631 ------- 632 numpy.ndarray 633 Returns a numpy array of length N + 1 where N is the number of samples 634 for the given ensemble and replicum. The zeroth entry of the array contains 635 the mean value of the Obs, entries 1 to N contain the N jackknife samples 636 derived from the Obs. The current implementation only works for observables 637 defined on exactly one ensemble and replicum. The derived jackknife samples 638 should agree with samples from a full jackknife analysis up to O(1/N). 639 """ 640 641 if len(self.names) != 1: 642 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 643 644 name = self.names[0] 645 full_data = self.deltas[name] + self.r_values[name] 646 n = full_data.size 647 mean = self.value 648 tmp_jacks = np.zeros(n + 1) 649 tmp_jacks[0] = mean 650 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 651 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
View Source
824class CObs: 825 """Class for a complex valued observable.""" 826 __slots__ = ['_real', '_imag', 'tag'] 827 828 def __init__(self, real, imag=0.0): 829 self._real = real 830 self._imag = imag 831 self.tag = None 832 833 @property 834 def real(self): 835 return self._real 836 837 @property 838 def imag(self): 839 return self._imag 840 841 def gamma_method(self, **kwargs): 842 """Executes the gamma_method for the real and the imaginary part.""" 843 if isinstance(self.real, Obs): 844 self.real.gamma_method(**kwargs) 845 if isinstance(self.imag, Obs): 846 self.imag.gamma_method(**kwargs) 847 848 def is_zero(self): 849 """Checks whether both real and imaginary part are zero within machine precision.""" 850 return self.real == 0.0 and self.imag == 0.0 851 852 def conjugate(self): 853 return CObs(self.real, -self.imag) 854 855 def __add__(self, other): 856 if isinstance(other, np.ndarray): 857 return other + self 858 elif hasattr(other, 'real') and hasattr(other, 'imag'): 859 return CObs(self.real + other.real, 860 self.imag + other.imag) 861 else: 862 return CObs(self.real + other, self.imag) 863 864 def __radd__(self, y): 865 return self + y 866 867 def __sub__(self, other): 868 if isinstance(other, np.ndarray): 869 return -1 * (other - self) 870 elif hasattr(other, 'real') and hasattr(other, 'imag'): 871 return CObs(self.real - other.real, self.imag - other.imag) 872 else: 873 return CObs(self.real - other, self.imag) 874 875 def __rsub__(self, other): 876 return -1 * (self - other) 877 878 def __mul__(self, other): 879 if isinstance(other, np.ndarray): 880 return other * self 881 elif hasattr(other, 'real') and hasattr(other, 'imag'): 882 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 883 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 884 [self.real, other.real, self.imag, other.imag], 885 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 886 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 887 [self.real, other.real, self.imag, other.imag], 888 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 889 elif getattr(other, 'imag', 0) != 0: 890 return CObs(self.real * other.real - self.imag * other.imag, 891 self.imag * other.real + self.real * other.imag) 892 else: 893 return CObs(self.real * other.real, self.imag * other.real) 894 else: 895 return CObs(self.real * other, self.imag * other) 896 897 def __rmul__(self, other): 898 return self * other 899 900 def __truediv__(self, other): 901 if isinstance(other, np.ndarray): 902 return 1 / (other / self) 903 elif hasattr(other, 'real') and hasattr(other, 'imag'): 904 r = other.real ** 2 + other.imag ** 2 905 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 906 else: 907 return CObs(self.real / other, self.imag / other) 908 909 def __rtruediv__(self, other): 910 r = self.real ** 2 + self.imag ** 2 911 if hasattr(other, 'real') and hasattr(other, 'imag'): 912 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 913 else: 914 return CObs(self.real * other / r, -self.imag * other / r) 915 916 def __abs__(self): 917 return np.sqrt(self.real**2 + self.imag**2) 918 919 def __pos__(self): 920 return self 921 922 def __neg__(self): 923 return -1 * self 924 925 def __eq__(self, other): 926 return self.real == other.real and self.imag == other.imag 927 928 def __str__(self): 929 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 930 931 def __repr__(self): 932 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
View Source
Executes the gamma_method for the real and the imaginary part.
View Source
Checks whether both real and imaginary part are zero within machine precision.
View Source
1040def derived_observable(func, data, array_mode=False, **kwargs): 1041 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1042 1043 Parameters 1044 ---------- 1045 func : object 1046 arbitrary function of the form func(data, **kwargs). For the 1047 automatic differentiation to work, all numpy functions have to have 1048 the autograd wrapper (use 'import autograd.numpy as anp'). 1049 data : list 1050 list of Obs, e.g. [obs1, obs2, obs3]. 1051 num_grad : bool 1052 if True, numerical derivatives are used instead of autograd 1053 (default False). To control the numerical differentiation the 1054 kwargs of numdifftools.step_generators.MaxStepGenerator 1055 can be used. 1056 man_grad : list 1057 manually supply a list or an array which contains the jacobian 1058 of func. Use cautiously, supplying the wrong derivative will 1059 not be intercepted. 1060 1061 Notes 1062 ----- 1063 For simple mathematical operations it can be practical to use anonymous 1064 functions. For the ratio of two observables one can e.g. use 1065 1066 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1067 """ 1068 1069 data = np.asarray(data) 1070 raveled_data = data.ravel() 1071 1072 # Workaround for matrix operations containing non Obs data 1073 if not all(isinstance(x, Obs) for x in raveled_data): 1074 for i in range(len(raveled_data)): 1075 if isinstance(raveled_data[i], (int, float)): 1076 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1077 1078 allcov = {} 1079 for o in raveled_data: 1080 for name in o.cov_names: 1081 if name in allcov: 1082 if not np.allclose(allcov[name], o.covobs[name].cov): 1083 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1084 else: 1085 allcov[name] = o.covobs[name].cov 1086 1087 n_obs = len(raveled_data) 1088 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1089 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1090 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1091 1092 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1093 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1094 1095 if data.ndim == 1: 1096 values = np.array([o.value for o in data]) 1097 else: 1098 values = np.vectorize(lambda x: x.value)(data) 1099 1100 new_values = func(values, **kwargs) 1101 1102 multi = int(isinstance(new_values, np.ndarray)) 1103 1104 new_r_values = {} 1105 new_idl_d = {} 1106 for name in new_sample_names: 1107 idl = [] 1108 tmp_values = np.zeros(n_obs) 1109 for i, item in enumerate(raveled_data): 1110 tmp_values[i] = item.r_values.get(name, item.value) 1111 tmp_idl = item.idl.get(name) 1112 if tmp_idl is not None: 1113 idl.append(tmp_idl) 1114 if multi > 0: 1115 tmp_values = np.array(tmp_values).reshape(data.shape) 1116 new_r_values[name] = func(tmp_values, **kwargs) 1117 new_idl_d[name] = _merge_idx(idl) 1118 if not is_merged[name]: 1119 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1120 1121 if 'man_grad' in kwargs: 1122 deriv = np.asarray(kwargs.get('man_grad')) 1123 if new_values.shape + data.shape != deriv.shape: 1124 raise Exception('Manual derivative does not have correct shape.') 1125 elif kwargs.get('num_grad') is True: 1126 if multi > 0: 1127 raise Exception('Multi mode currently not supported for numerical derivative') 1128 options = { 1129 'base_step': 0.1, 1130 'step_ratio': 2.5} 1131 for key in options.keys(): 1132 kwarg = kwargs.get(key) 1133 if kwarg is not None: 1134 options[key] = kwarg 1135 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1136 if tmp_df.size == 1: 1137 deriv = np.array([tmp_df.real]) 1138 else: 1139 deriv = tmp_df.real 1140 else: 1141 deriv = jacobian(func)(values, **kwargs) 1142 1143 final_result = np.zeros(new_values.shape, dtype=object) 1144 1145 if array_mode is True: 1146 1147 class _Zero_grad(): 1148 def __init__(self, N): 1149 self.grad = np.zeros((N, 1)) 1150 1151 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1152 d_extracted = {} 1153 g_extracted = {} 1154 for name in new_sample_names: 1155 d_extracted[name] = [] 1156 ens_length = len(new_idl_d[name]) 1157 for i_dat, dat in enumerate(data): 1158 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1159 for name in new_cov_names: 1160 g_extracted[name] = [] 1161 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1162 for i_dat, dat in enumerate(data): 1163 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1164 1165 for i_val, new_val in np.ndenumerate(new_values): 1166 new_deltas = {} 1167 new_grad = {} 1168 if array_mode is True: 1169 for name in new_sample_names: 1170 ens_length = d_extracted[name][0].shape[-1] 1171 new_deltas[name] = np.zeros(ens_length) 1172 for i_dat, dat in enumerate(d_extracted[name]): 1173 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1174 for name in new_cov_names: 1175 new_grad[name] = 0 1176 for i_dat, dat in enumerate(g_extracted[name]): 1177 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1178 else: 1179 for j_obs, obs in np.ndenumerate(data): 1180 for name in obs.names: 1181 if name in obs.cov_names: 1182 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1183 else: 1184 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1185 1186 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1187 1188 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1189 raise Exception('The same name has been used for deltas and covobs!') 1190 new_samples = [] 1191 new_means = [] 1192 new_idl = [] 1193 new_names_obs = [] 1194 for name in new_names: 1195 if name not in new_covobs: 1196 if is_merged[name]: 1197 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1198 else: 1199 filtered_deltas = new_deltas[name] 1200 filtered_idl_d = new_idl_d[name] 1201 1202 new_samples.append(filtered_deltas) 1203 new_idl.append(filtered_idl_d) 1204 new_means.append(new_r_values[name][i_val]) 1205 new_names_obs.append(name) 1206 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1207 for name in new_covobs: 1208 final_result[i_val].names.append(name) 1209 final_result[i_val]._covobs = new_covobs 1210 final_result[i_val]._value = new_val 1211 final_result[i_val].is_merged = is_merged 1212 final_result[i_val].reweighted = reweighted 1213 1214 if multi == 0: 1215 final_result = final_result.item() 1216 1217 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
View Source
1257def reweight(weight, obs, **kwargs): 1258 """Reweight a list of observables. 1259 1260 Parameters 1261 ---------- 1262 weight : Obs 1263 Reweighting factor. An Observable that has to be defined on a superset of the 1264 configurations in obs[i].idl for all i. 1265 obs : list 1266 list of Obs, e.g. [obs1, obs2, obs3]. 1267 all_configs : bool 1268 if True, the reweighted observables are normalized by the average of 1269 the reweighting factor on all configurations in weight.idl and not 1270 on the configurations in obs[i].idl. 1271 """ 1272 result = [] 1273 for i in range(len(obs)): 1274 if len(obs[i].cov_names): 1275 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1276 if not set(obs[i].names).issubset(weight.names): 1277 raise Exception('Error: Ensembles do not fit') 1278 for name in obs[i].names: 1279 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1280 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1281 new_samples = [] 1282 w_deltas = {} 1283 for name in sorted(obs[i].names): 1284 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1285 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1286 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1287 1288 if kwargs.get('all_configs'): 1289 new_weight = weight 1290 else: 1291 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1292 1293 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1294 result[-1].reweighted = True 1295 result[-1].is_merged = obs[i].is_merged 1296 1297 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
View Source
1300def correlate(obs_a, obs_b): 1301 """Correlate two observables. 1302 1303 Parameters 1304 ---------- 1305 obs_a : Obs 1306 First observable 1307 obs_b : Obs 1308 Second observable 1309 1310 Notes 1311 ----- 1312 Keep in mind to only correlate primary observables which have not been reweighted 1313 yet. The reweighting has to be applied after correlating the observables. 1314 Currently only works if ensembles are identical (this is not strictly necessary). 1315 """ 1316 1317 if sorted(obs_a.names) != sorted(obs_b.names): 1318 raise Exception('Ensembles do not fit') 1319 if len(obs_a.cov_names) or len(obs_b.cov_names): 1320 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1321 for name in obs_a.names: 1322 if obs_a.shape[name] != obs_b.shape[name]: 1323 raise Exception('Shapes of ensemble', name, 'do not fit') 1324 if obs_a.idl[name] != obs_b.idl[name]: 1325 raise Exception('idl of ensemble', name, 'do not fit') 1326 1327 if obs_a.reweighted is True: 1328 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1329 if obs_b.reweighted is True: 1330 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1331 1332 new_samples = [] 1333 new_idl = [] 1334 for name in sorted(obs_a.names): 1335 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1336 new_idl.append(obs_a.idl[name]) 1337 1338 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1339 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1340 o.reweighted = obs_a.reweighted or obs_b.reweighted 1341 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
View Source
1344def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1345 r'''Calculates the covariance matrix of a set of observables. 1346 1347 The gamma method has to be applied first to all observables. 1348 1349 Parameters 1350 ---------- 1351 obs : list or numpy.ndarray 1352 List or one dimensional array of Obs 1353 visualize : bool 1354 If True plots the corresponding normalized correlation matrix (default False). 1355 correlation : bool 1356 If True the correlation instead of the covariance is returned (default False). 1357 smooth : None or int 1358 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1359 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1360 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1361 small ones. 1362 1363 Notes 1364 ----- 1365 The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1366 $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1367 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1368 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1369 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1370 ''' 1371 1372 length = len(obs) 1373 1374 max_samples = np.max([o.N for o in obs]) 1375 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1376 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1377 1378 cov = np.zeros((length, length)) 1379 for i in range(length): 1380 for j in range(i, length): 1381 cov[i, j] = _covariance_element(obs[i], obs[j]) 1382 cov = cov + cov.T - np.diag(np.diag(cov)) 1383 1384 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1385 1386 if isinstance(smooth, int): 1387 corr = _smooth_eigenvalues(corr, smooth) 1388 1389 errors = [o.dvalue for o in obs] 1390 cov = np.diag(errors) @ corr @ np.diag(errors) 1391 1392 eigenvalues = np.linalg.eigh(cov)[0] 1393 if not np.all(eigenvalues >= 0): 1394 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1395 1396 if visualize: 1397 plt.matshow(corr, vmin=-1, vmax=1) 1398 plt.set_cmap('RdBu') 1399 plt.colorbar() 1400 plt.draw() 1401 1402 if correlation is True: 1403 return corr 1404 else: 1405 return cov
Calculates the covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation instead of the covariance is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
View Source
1484def import_jackknife(jacks, name, idl=None): 1485 """Imports jackknife samples and returns an Obs 1486 1487 Parameters 1488 ---------- 1489 jacks : numpy.ndarray 1490 numpy array containing the mean value as zeroth entry and 1491 the N jackknife samples as first to Nth entry. 1492 name : str 1493 name of the ensemble the samples are defined on. 1494 """ 1495 length = len(jacks) - 1 1496 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1497 samples = jacks[1:] @ prj 1498 mean = np.mean(samples) 1499 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1500 new_obs._value = jacks[0] 1501 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
View Source
1504def merge_obs(list_of_obs): 1505 """Combine all observables in list_of_obs into one new observable 1506 1507 Parameters 1508 ---------- 1509 list_of_obs : list 1510 list of the Obs object to be combined 1511 1512 Notes 1513 ----- 1514 It is not possible to combine obs which are based on the same replicum 1515 """ 1516 replist = [item for obs in list_of_obs for item in obs.names] 1517 if (len(replist) == len(set(replist))) is False: 1518 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1519 if any([len(o.cov_names) for o in list_of_obs]): 1520 raise Exception('Not possible to merge data that contains covobs!') 1521 new_dict = {} 1522 idl_dict = {} 1523 for o in list_of_obs: 1524 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1525 for key in set(o.deltas) | set(o.r_values)}) 1526 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1527 1528 names = sorted(new_dict.keys()) 1529 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1530 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1531 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1532 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
View Source
1535def cov_Obs(means, cov, name, grad=None): 1536 """Create an Obs based on mean(s) and a covariance matrix 1537 1538 Parameters 1539 ---------- 1540 mean : list of floats or float 1541 N mean value(s) of the new Obs 1542 cov : list or array 1543 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1544 name : str 1545 identifier for the covariance matrix 1546 grad : list or array 1547 Gradient of the Covobs wrt. the means belonging to cov. 1548 """ 1549 1550 def covobs_to_obs(co): 1551 """Make an Obs out of a Covobs 1552 1553 Parameters 1554 ---------- 1555 co : Covobs 1556 Covobs to be embedded into the Obs 1557 """ 1558 o = Obs([], [], means=[]) 1559 o._value = co.value 1560 o.names.append(co.name) 1561 o._covobs[co.name] = co 1562 o._dvalue = np.sqrt(co.errsq()) 1563 return o 1564 1565 ol = [] 1566 if isinstance(means, (float, int)): 1567 means = [means] 1568 1569 for i in range(len(means)): 1570 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1571 if ol[0].covobs[name].N != len(means): 1572 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1573 if len(ol) == 1: 1574 return ol[0] 1575 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.