pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60    filter_eps = 1e-10
  61
  62    def __init__(self, samples, names, idl=None, **kwargs):
  63        """ Initialize Obs object.
  64
  65        Parameters
  66        ----------
  67        samples : list
  68            list of numpy arrays containing the Monte Carlo samples
  69        names : list
  70            list of strings labeling the individual samples
  71        idl : list, optional
  72            list of ranges or lists on which the samples are defined
  73        """
  74
  75        if kwargs.get("means") is None and len(samples):
  76            if len(samples) != len(names):
  77                raise Exception('Length of samples and names incompatible.')
  78            if idl is not None:
  79                if len(idl) != len(names):
  80                    raise Exception('Length of idl incompatible with samples and names.')
  81            name_length = len(names)
  82            if name_length > 1:
  83                if name_length != len(set(names)):
  84                    raise Exception('names are not unique.')
  85                if not all(isinstance(x, str) for x in names):
  86                    raise TypeError('All names have to be strings.')
  87            else:
  88                if not isinstance(names[0], str):
  89                    raise TypeError('All names have to be strings.')
  90            if min(len(x) for x in samples) <= 4:
  91                raise Exception('Samples have to have at least 5 entries.')
  92
  93        self.names = sorted(names)
  94        self.shape = {}
  95        self.r_values = {}
  96        self.deltas = {}
  97        self._covobs = {}
  98
  99        self._value = 0
 100        self.N = 0
 101        self.is_merged = {}
 102        self.idl = {}
 103        if idl is not None:
 104            for name, idx in sorted(zip(names, idl)):
 105                if isinstance(idx, range):
 106                    self.idl[name] = idx
 107                elif isinstance(idx, (list, np.ndarray)):
 108                    dc = np.unique(np.diff(idx))
 109                    if np.any(dc < 0):
 110                        raise Exception("Unsorted idx for idl[%s]" % (name))
 111                    if len(dc) == 1:
 112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 113                    else:
 114                        self.idl[name] = list(idx)
 115                else:
 116                    raise Exception('incompatible type for idl[%s].' % (name))
 117        else:
 118            for name, sample in sorted(zip(names, samples)):
 119                self.idl[name] = range(1, len(sample) + 1)
 120
 121        if kwargs.get("means") is not None:
 122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 123                self.shape[name] = len(self.idl[name])
 124                self.N += self.shape[name]
 125                self.r_values[name] = mean
 126                self.deltas[name] = sample
 127        else:
 128            for name, sample in sorted(zip(names, samples)):
 129                self.shape[name] = len(self.idl[name])
 130                self.N += self.shape[name]
 131                if len(sample) != self.shape[name]:
 132                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 133                self.r_values[name] = np.mean(sample)
 134                self.deltas[name] = sample - self.r_values[name]
 135                self._value += self.shape[name] * self.r_values[name]
 136            self._value /= self.N
 137
 138        self._dvalue = 0.0
 139        self.ddvalue = 0.0
 140        self.reweighted = False
 141
 142        self.tag = None
 143
 144    @property
 145    def value(self):
 146        return self._value
 147
 148    @property
 149    def dvalue(self):
 150        return self._dvalue
 151
 152    @property
 153    def e_names(self):
 154        return sorted(set([o.split('|')[0] for o in self.names]))
 155
 156    @property
 157    def cov_names(self):
 158        return sorted(set([o for o in self.covobs.keys()]))
 159
 160    @property
 161    def mc_names(self):
 162        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 163
 164    @property
 165    def e_content(self):
 166        res = {}
 167        for e, e_name in enumerate(self.e_names):
 168            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 169            if e_name in self.names:
 170                res[e_name].append(e_name)
 171        return res
 172
 173    @property
 174    def covobs(self):
 175        return self._covobs
 176
 177    def gamma_method(self, **kwargs):
 178        """Estimate the error and related properties of the Obs.
 179
 180        Parameters
 181        ----------
 182        S : float
 183            specifies a custom value for the parameter S (default 2.0).
 184            If set to 0 it is assumed that the data exhibits no
 185            autocorrelation. In this case the error estimates coincides
 186            with the sample standard error.
 187        tau_exp : float
 188            positive value triggers the critical slowing down analysis
 189            (default 0.0).
 190        N_sigma : float
 191            number of standard deviations from zero until the tail is
 192            attached to the autocorrelation function (default 1).
 193        fft : bool
 194            determines whether the fft algorithm is used for the computation
 195            of the autocorrelation function (default True)
 196        """
 197
 198        e_content = self.e_content
 199        self.e_dvalue = {}
 200        self.e_ddvalue = {}
 201        self.e_tauint = {}
 202        self.e_dtauint = {}
 203        self.e_windowsize = {}
 204        self.e_n_tauint = {}
 205        self.e_n_dtauint = {}
 206        e_gamma = {}
 207        self.e_rho = {}
 208        self.e_drho = {}
 209        self._dvalue = 0
 210        self.ddvalue = 0
 211
 212        self.S = {}
 213        self.tau_exp = {}
 214        self.N_sigma = {}
 215
 216        if kwargs.get('fft') is False:
 217            fft = False
 218        else:
 219            fft = True
 220
 221        def _parse_kwarg(kwarg_name):
 222            if kwarg_name in kwargs:
 223                tmp = kwargs.get(kwarg_name)
 224                if isinstance(tmp, (int, float)):
 225                    if tmp < 0:
 226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 227                    for e, e_name in enumerate(self.e_names):
 228                        getattr(self, kwarg_name)[e_name] = tmp
 229                else:
 230                    raise TypeError(kwarg_name + ' is not in proper format.')
 231            else:
 232                for e, e_name in enumerate(self.e_names):
 233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 235                    else:
 236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 237
 238        _parse_kwarg('S')
 239        _parse_kwarg('tau_exp')
 240        _parse_kwarg('N_sigma')
 241
 242        for e, e_name in enumerate(self.mc_names):
 243            r_length = []
 244            for r_name in e_content[e_name]:
 245                if isinstance(self.idl[r_name], range):
 246                    r_length.append(len(self.idl[r_name]))
 247                else:
 248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 249
 250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 251            w_max = max(r_length) // 2
 252            e_gamma[e_name] = np.zeros(w_max)
 253            self.e_rho[e_name] = np.zeros(w_max)
 254            self.e_drho[e_name] = np.zeros(w_max)
 255
 256            for r_name in e_content[e_name]:
 257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 258
 259            gamma_div = np.zeros(w_max)
 260            for r_name in e_content[e_name]:
 261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 262            gamma_div[gamma_div < 1] = 1.0
 263            e_gamma[e_name] /= gamma_div[:w_max]
 264
 265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 266                self.e_tauint[e_name] = 0.5
 267                self.e_dtauint[e_name] = 0.0
 268                self.e_dvalue[e_name] = 0.0
 269                self.e_ddvalue[e_name] = 0.0
 270                self.e_windowsize[e_name] = 0
 271                continue
 272
 273            gaps = []
 274            for r_name in e_content[e_name]:
 275                if isinstance(self.idl[r_name], range):
 276                    gaps.append(1)
 277                else:
 278                    gaps.append(np.min(np.diff(self.idl[r_name])))
 279
 280            if not np.all([gi == gaps[0] for gi in gaps]):
 281                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 282            else:
 283                gapsize = gaps[0]
 284
 285            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 286            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 287            # Make sure no entry of tauint is smaller than 0.5
 288            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 289            # hep-lat/0306017 eq. (42)
 290            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 291            self.e_n_dtauint[e_name][0] = 0.0
 292
 293            def _compute_drho(i):
 294                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 295                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 296
 297            _compute_drho(gapsize)
 298            if self.tau_exp[e_name] > 0:
 299                texp = self.tau_exp[e_name]
 300                # Critical slowing down analysis
 301                if w_max // 2 <= 1:
 302                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 303                for n in range(gapsize, w_max // 2, gapsize):
 304                    _compute_drho(n + gapsize)
 305                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 306                        # Bias correction hep-lat/0306017 eq. (49) included
 307                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 308                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 309                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 310                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 311                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 312                        self.e_windowsize[e_name] = n
 313                        break
 314            else:
 315                if self.S[e_name] == 0.0:
 316                    self.e_tauint[e_name] = 0.5
 317                    self.e_dtauint[e_name] = 0.0
 318                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 319                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 320                    self.e_windowsize[e_name] = 0
 321                else:
 322                    # Standard automatic windowing procedure
 323                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 324                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 325                    for n in range(1, w_max):
 326                        if n < w_max // 2 - 2:
 327                            _compute_drho(gapsize * n + gapsize)
 328                        if g_w[n - 1] < 0 or n >= w_max - 1:
 329                            n *= gapsize
 330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 334                            self.e_windowsize[e_name] = n
 335                            break
 336
 337            self._dvalue += self.e_dvalue[e_name] ** 2
 338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 339
 340        for e_name in self.cov_names:
 341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 342            self.e_ddvalue[e_name] = 0
 343            self._dvalue += self.e_dvalue[e_name]**2
 344
 345        self._dvalue = np.sqrt(self._dvalue)
 346        if self._dvalue == 0.0:
 347            self.ddvalue = 0.0
 348        else:
 349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 350        return
 351
 352    gm = gamma_method
 353
 354    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 355        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 356           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 357
 358        Parameters
 359        ----------
 360        deltas : list
 361            List of fluctuations
 362        idx : list
 363            List or range of configurations on which the deltas are defined.
 364        shape : int
 365            Number of configurations in idx.
 366        w_max : int
 367            Upper bound for the summation window.
 368        fft : bool
 369            determines whether the fft algorithm is used for the computation
 370            of the autocorrelation function.
 371        """
 372        gamma = np.zeros(w_max)
 373        deltas = _expand_deltas(deltas, idx, shape)
 374        new_shape = len(deltas)
 375        if fft:
 376            max_gamma = min(new_shape, w_max)
 377            # The padding for the fft has to be even
 378            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 379            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 380        else:
 381            for n in range(w_max):
 382                if new_shape - n >= 0:
 383                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 384
 385        return gamma
 386
 387    def details(self, ens_content=True):
 388        """Output detailed properties of the Obs.
 389
 390        Parameters
 391        ----------
 392        ens_content : bool
 393            print details about the ensembles and replica if true.
 394        """
 395        if self.tag is not None:
 396            print("Description:", self.tag)
 397        if not hasattr(self, 'e_dvalue'):
 398            print('Result\t %3.8e' % (self.value))
 399        else:
 400            if self.value == 0.0:
 401                percentage = np.nan
 402            else:
 403                percentage = np.abs(self._dvalue / self.value) * 100
 404            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 405            if len(self.e_names) > 1:
 406                print(' Ensemble errors:')
 407            e_content = self.e_content
 408            for e_name in self.mc_names:
 409                if isinstance(self.idl[e_content[e_name][0]], range):
 410                    gap = self.idl[e_content[e_name][0]].step
 411                else:
 412                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 413
 414                if len(self.e_names) > 1:
 415                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 416                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 417                tau_string += f" in units of {gap} config"
 418                if gap > 1:
 419                    tau_string += "s"
 420                if self.tau_exp[e_name] > 0:
 421                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 422                else:
 423                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 424                print(tau_string)
 425            for e_name in self.cov_names:
 426                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 427        if ens_content is True:
 428            if len(self.e_names) == 1:
 429                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 430            else:
 431                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 432            my_string_list = []
 433            for key, value in sorted(self.e_content.items()):
 434                if key not in self.covobs:
 435                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 436                    if len(value) == 1:
 437                        my_string += f': {self.shape[value[0]]} configurations'
 438                        if isinstance(self.idl[value[0]], range):
 439                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 440                        else:
 441                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 442                    else:
 443                        sublist = []
 444                        for v in value:
 445                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 446                            my_substring += f': {self.shape[v]} configurations'
 447                            if isinstance(self.idl[v], range):
 448                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 449                            else:
 450                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 451                            sublist.append(my_substring)
 452
 453                        my_string += '\n' + '\n'.join(sublist)
 454                else:
 455                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 456                my_string_list.append(my_string)
 457            print('\n'.join(my_string_list))
 458
 459    def reweight(self, weight):
 460        """Reweight the obs with given rewighting factors.
 461
 462        Parameters
 463        ----------
 464        weight : Obs
 465            Reweighting factor. An Observable that has to be defined on a superset of the
 466            configurations in obs[i].idl for all i.
 467        all_configs : bool
 468            if True, the reweighted observables are normalized by the average of
 469            the reweighting factor on all configurations in weight.idl and not
 470            on the configurations in obs[i].idl. Default False.
 471        """
 472        return reweight(weight, [self])[0]
 473
 474    def is_zero_within_error(self, sigma=1):
 475        """Checks whether the observable is zero within 'sigma' standard errors.
 476
 477        Parameters
 478        ----------
 479        sigma : int
 480            Number of standard errors used for the check.
 481
 482        Works only properly when the gamma method was run.
 483        """
 484        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 485
 486    def is_zero(self, atol=1e-10):
 487        """Checks whether the observable is zero within a given tolerance.
 488
 489        Parameters
 490        ----------
 491        atol : float
 492            Absolute tolerance (for details see numpy documentation).
 493        """
 494        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 495
 496    def plot_tauint(self, save=None):
 497        """Plot integrated autocorrelation time for each ensemble.
 498
 499        Parameters
 500        ----------
 501        save : str
 502            saves the figure to a file named 'save' if.
 503        """
 504        if not hasattr(self, 'e_dvalue'):
 505            raise Exception('Run the gamma method first.')
 506
 507        for e, e_name in enumerate(self.mc_names):
 508            fig = plt.figure()
 509            plt.xlabel(r'$W$')
 510            plt.ylabel(r'$\tau_\mathrm{int}$')
 511            length = int(len(self.e_n_tauint[e_name]))
 512            if self.tau_exp[e_name] > 0:
 513                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 514                x_help = np.arange(2 * self.tau_exp[e_name])
 515                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 516                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 517                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 518                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 519                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 520                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 521                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 522            else:
 523                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 524                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 525
 526            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 527            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 528            plt.legend()
 529            plt.xlim(-0.5, xmax)
 530            ylim = plt.ylim()
 531            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 532            plt.draw()
 533            if save:
 534                fig.savefig(save + "_" + str(e))
 535
 536    def plot_rho(self, save=None):
 537        """Plot normalized autocorrelation function time for each ensemble.
 538
 539        Parameters
 540        ----------
 541        save : str
 542            saves the figure to a file named 'save' if.
 543        """
 544        if not hasattr(self, 'e_dvalue'):
 545            raise Exception('Run the gamma method first.')
 546        for e, e_name in enumerate(self.mc_names):
 547            fig = plt.figure()
 548            plt.xlabel('W')
 549            plt.ylabel('rho')
 550            length = int(len(self.e_drho[e_name]))
 551            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 552            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 553            if self.tau_exp[e_name] > 0:
 554                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 555                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 556                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 557                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 558            else:
 559                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 560                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 561            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 562            plt.xlim(-0.5, xmax)
 563            plt.draw()
 564            if save:
 565                fig.savefig(save + "_" + str(e))
 566
 567    def plot_rep_dist(self):
 568        """Plot replica distribution for each ensemble with more than one replicum."""
 569        if not hasattr(self, 'e_dvalue'):
 570            raise Exception('Run the gamma method first.')
 571        for e, e_name in enumerate(self.mc_names):
 572            if len(self.e_content[e_name]) == 1:
 573                print('No replica distribution for a single replicum (', e_name, ')')
 574                continue
 575            r_length = []
 576            sub_r_mean = 0
 577            for r, r_name in enumerate(self.e_content[e_name]):
 578                r_length.append(len(self.deltas[r_name]))
 579                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 580            e_N = np.sum(r_length)
 581            sub_r_mean /= e_N
 582            arr = np.zeros(len(self.e_content[e_name]))
 583            for r, r_name in enumerate(self.e_content[e_name]):
 584                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 585            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 586            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 587            plt.draw()
 588
 589    def plot_history(self, expand=True):
 590        """Plot derived Monte Carlo history for each ensemble
 591
 592        Parameters
 593        ----------
 594        expand : bool
 595            show expanded history for irregular Monte Carlo chains (default: True).
 596        """
 597        for e, e_name in enumerate(self.mc_names):
 598            plt.figure()
 599            r_length = []
 600            tmp = []
 601            tmp_expanded = []
 602            for r, r_name in enumerate(self.e_content[e_name]):
 603                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 604                if expand:
 605                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 606                    r_length.append(len(tmp_expanded[-1]))
 607                else:
 608                    r_length.append(len(tmp[-1]))
 609            e_N = np.sum(r_length)
 610            x = np.arange(e_N)
 611            y_test = np.concatenate(tmp, axis=0)
 612            if expand:
 613                y = np.concatenate(tmp_expanded, axis=0)
 614            else:
 615                y = y_test
 616            plt.errorbar(x, y, fmt='.', markersize=3)
 617            plt.xlim(-0.5, e_N - 0.5)
 618            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 619            plt.draw()
 620
 621    def plot_piechart(self, save=None):
 622        """Plot piechart which shows the fractional contribution of each
 623        ensemble to the error and returns a dictionary containing the fractions.
 624
 625        Parameters
 626        ----------
 627        save : str
 628            saves the figure to a file named 'save' if.
 629        """
 630        if not hasattr(self, 'e_dvalue'):
 631            raise Exception('Run the gamma method first.')
 632        if np.isclose(0.0, self._dvalue, atol=1e-15):
 633            raise Exception('Error is 0.0')
 634        labels = self.e_names
 635        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 636        fig1, ax1 = plt.subplots()
 637        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 638        ax1.axis('equal')
 639        plt.draw()
 640        if save:
 641            fig1.savefig(save)
 642
 643        return dict(zip(self.e_names, sizes))
 644
 645    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 646        """Dump the Obs to a file 'name' of chosen format.
 647
 648        Parameters
 649        ----------
 650        filename : str
 651            name of the file to be saved.
 652        datatype : str
 653            Format of the exported file. Supported formats include
 654            "json.gz" and "pickle"
 655        description : str
 656            Description for output file, only relevant for json.gz format.
 657        path : str
 658            specifies a custom path for the file (default '.')
 659        """
 660        if 'path' in kwargs:
 661            file_name = kwargs.get('path') + '/' + filename
 662        else:
 663            file_name = filename
 664
 665        if datatype == "json.gz":
 666            from .input.json import dump_to_json
 667            dump_to_json([self], file_name, description=description)
 668        elif datatype == "pickle":
 669            with open(file_name + '.p', 'wb') as fb:
 670                pickle.dump(self, fb)
 671        else:
 672            raise Exception("Unknown datatype " + str(datatype))
 673
 674    def export_jackknife(self):
 675        """Export jackknife samples from the Obs
 676
 677        Returns
 678        -------
 679        numpy.ndarray
 680            Returns a numpy array of length N + 1 where N is the number of samples
 681            for the given ensemble and replicum. The zeroth entry of the array contains
 682            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 683            derived from the Obs. The current implementation only works for observables
 684            defined on exactly one ensemble and replicum. The derived jackknife samples
 685            should agree with samples from a full jackknife analysis up to O(1/N).
 686        """
 687
 688        if len(self.names) != 1:
 689            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 690
 691        name = self.names[0]
 692        full_data = self.deltas[name] + self.r_values[name]
 693        n = full_data.size
 694        mean = self.value
 695        tmp_jacks = np.zeros(n + 1)
 696        tmp_jacks[0] = mean
 697        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 698        return tmp_jacks
 699
 700    def __float__(self):
 701        return float(self.value)
 702
 703    def __repr__(self):
 704        return 'Obs[' + str(self) + ']'
 705
 706    def __str__(self):
 707        return _format_uncertainty(self.value, self._dvalue)
 708
 709    def __hash__(self):
 710        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 711        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 712        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 713        hash_tuple += tuple([o.encode() for o in self.names])
 714        m = hashlib.md5()
 715        [m.update(o) for o in hash_tuple]
 716        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 717
 718    # Overload comparisons
 719    def __lt__(self, other):
 720        return self.value < other
 721
 722    def __le__(self, other):
 723        return self.value <= other
 724
 725    def __gt__(self, other):
 726        return self.value > other
 727
 728    def __ge__(self, other):
 729        return self.value >= other
 730
 731    def __eq__(self, other):
 732        return (self - other).is_zero()
 733
 734    def __ne__(self, other):
 735        return not (self - other).is_zero()
 736
 737    # Overload math operations
 738    def __add__(self, y):
 739        if isinstance(y, Obs):
 740            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 741        else:
 742            if isinstance(y, np.ndarray):
 743                return np.array([self + o for o in y])
 744            elif y.__class__.__name__ in ['Corr', 'CObs']:
 745                return NotImplemented
 746            else:
 747                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 748
 749    def __radd__(self, y):
 750        return self + y
 751
 752    def __mul__(self, y):
 753        if isinstance(y, Obs):
 754            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 755        else:
 756            if isinstance(y, np.ndarray):
 757                return np.array([self * o for o in y])
 758            elif isinstance(y, complex):
 759                return CObs(self * y.real, self * y.imag)
 760            elif y.__class__.__name__ in ['Corr', 'CObs']:
 761                return NotImplemented
 762            else:
 763                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 764
 765    def __rmul__(self, y):
 766        return self * y
 767
 768    def __sub__(self, y):
 769        if isinstance(y, Obs):
 770            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 771        else:
 772            if isinstance(y, np.ndarray):
 773                return np.array([self - o for o in y])
 774            elif y.__class__.__name__ in ['Corr', 'CObs']:
 775                return NotImplemented
 776            else:
 777                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 778
 779    def __rsub__(self, y):
 780        return -1 * (self - y)
 781
 782    def __pos__(self):
 783        return self
 784
 785    def __neg__(self):
 786        return -1 * self
 787
 788    def __truediv__(self, y):
 789        if isinstance(y, Obs):
 790            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 791        else:
 792            if isinstance(y, np.ndarray):
 793                return np.array([self / o for o in y])
 794            elif y.__class__.__name__ in ['Corr', 'CObs']:
 795                return NotImplemented
 796            else:
 797                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 798
 799    def __rtruediv__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([o / self for o in y])
 805            elif y.__class__.__name__ in ['Corr', 'CObs']:
 806                return NotImplemented
 807            else:
 808                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 809
 810    def __pow__(self, y):
 811        if isinstance(y, Obs):
 812            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 813        else:
 814            return derived_observable(lambda x: x[0] ** y, [self])
 815
 816    def __rpow__(self, y):
 817        if isinstance(y, Obs):
 818            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 819        else:
 820            return derived_observable(lambda x: y ** x[0], [self])
 821
 822    def __abs__(self):
 823        return derived_observable(lambda x: anp.abs(x[0]), [self])
 824
 825    # Overload numpy functions
 826    def sqrt(self):
 827        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 828
 829    def log(self):
 830        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 831
 832    def exp(self):
 833        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 834
 835    def sin(self):
 836        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 837
 838    def cos(self):
 839        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 840
 841    def tan(self):
 842        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 843
 844    def arcsin(self):
 845        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 846
 847    def arccos(self):
 848        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 849
 850    def arctan(self):
 851        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 852
 853    def sinh(self):
 854        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 855
 856    def cosh(self):
 857        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 858
 859    def tanh(self):
 860        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 861
 862    def arcsinh(self):
 863        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 864
 865    def arccosh(self):
 866        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 867
 868    def arctanh(self):
 869        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 870
 871
 872class CObs:
 873    """Class for a complex valued observable."""
 874    __slots__ = ['_real', '_imag', 'tag']
 875
 876    def __init__(self, real, imag=0.0):
 877        self._real = real
 878        self._imag = imag
 879        self.tag = None
 880
 881    @property
 882    def real(self):
 883        return self._real
 884
 885    @property
 886    def imag(self):
 887        return self._imag
 888
 889    def gamma_method(self, **kwargs):
 890        """Executes the gamma_method for the real and the imaginary part."""
 891        if isinstance(self.real, Obs):
 892            self.real.gamma_method(**kwargs)
 893        if isinstance(self.imag, Obs):
 894            self.imag.gamma_method(**kwargs)
 895
 896    def is_zero(self):
 897        """Checks whether both real and imaginary part are zero within machine precision."""
 898        return self.real == 0.0 and self.imag == 0.0
 899
 900    def conjugate(self):
 901        return CObs(self.real, -self.imag)
 902
 903    def __add__(self, other):
 904        if isinstance(other, np.ndarray):
 905            return other + self
 906        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 907            return CObs(self.real + other.real,
 908                        self.imag + other.imag)
 909        else:
 910            return CObs(self.real + other, self.imag)
 911
 912    def __radd__(self, y):
 913        return self + y
 914
 915    def __sub__(self, other):
 916        if isinstance(other, np.ndarray):
 917            return -1 * (other - self)
 918        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 919            return CObs(self.real - other.real, self.imag - other.imag)
 920        else:
 921            return CObs(self.real - other, self.imag)
 922
 923    def __rsub__(self, other):
 924        return -1 * (self - other)
 925
 926    def __mul__(self, other):
 927        if isinstance(other, np.ndarray):
 928            return other * self
 929        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 930            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 931                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 932                                               [self.real, other.real, self.imag, other.imag],
 933                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 934                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 935                                               [self.real, other.real, self.imag, other.imag],
 936                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 937            elif getattr(other, 'imag', 0) != 0:
 938                return CObs(self.real * other.real - self.imag * other.imag,
 939                            self.imag * other.real + self.real * other.imag)
 940            else:
 941                return CObs(self.real * other.real, self.imag * other.real)
 942        else:
 943            return CObs(self.real * other, self.imag * other)
 944
 945    def __rmul__(self, other):
 946        return self * other
 947
 948    def __truediv__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return 1 / (other / self)
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            r = other.real ** 2 + other.imag ** 2
 953            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 954        else:
 955            return CObs(self.real / other, self.imag / other)
 956
 957    def __rtruediv__(self, other):
 958        r = self.real ** 2 + self.imag ** 2
 959        if hasattr(other, 'real') and hasattr(other, 'imag'):
 960            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 961        else:
 962            return CObs(self.real * other / r, -self.imag * other / r)
 963
 964    def __abs__(self):
 965        return np.sqrt(self.real**2 + self.imag**2)
 966
 967    def __pos__(self):
 968        return self
 969
 970    def __neg__(self):
 971        return -1 * self
 972
 973    def __eq__(self, other):
 974        return self.real == other.real and self.imag == other.imag
 975
 976    def __str__(self):
 977        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 978
 979    def __repr__(self):
 980        return 'CObs[' + str(self) + ']'
 981
 982
 983def _format_uncertainty(value, dvalue):
 984    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 985    if dvalue == 0.0:
 986        return str(value)
 987    fexp = np.floor(np.log10(dvalue))
 988    if fexp < 0.0:
 989        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 990    elif fexp == 0.0:
 991        return '{:.1f}({:1.1f})'.format(value, dvalue)
 992    else:
 993        return '{:.0f}({:2.0f})'.format(value, dvalue)
 994
 995
 996def _expand_deltas(deltas, idx, shape):
 997    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 998       If idx is of type range, the deltas are not changed
 999
1000    Parameters
1001    ----------
1002    deltas : list
1003        List of fluctuations
1004    idx : list
1005        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1006    shape : int
1007        Number of configs in idx.
1008    """
1009    if isinstance(idx, range):
1010        return deltas
1011    else:
1012        ret = np.zeros(idx[-1] - idx[0] + 1)
1013        for i in range(shape):
1014            ret[idx[i] - idx[0]] = deltas[i]
1015        return ret
1016
1017
1018def _merge_idx(idl):
1019    """Returns the union of all lists in idl as sorted list
1020
1021    Parameters
1022    ----------
1023    idl : list
1024        List of lists or ranges.
1025    """
1026
1027    # Use groupby to efficiently check whether all elements of idl are identical
1028    try:
1029        g = groupby(idl)
1030        if next(g, True) and not next(g, False):
1031            return idl[0]
1032    except Exception:
1033        pass
1034
1035    if np.all([type(idx) is range for idx in idl]):
1036        if len(set([idx[0] for idx in idl])) == 1:
1037            idstart = min([idx.start for idx in idl])
1038            idstop = max([idx.stop for idx in idl])
1039            idstep = min([idx.step for idx in idl])
1040            return range(idstart, idstop, idstep)
1041
1042    return sorted(set().union(*idl))
1043
1044
1045def _intersection_idx(idl):
1046    """Returns the intersection of all lists in idl as sorted list
1047
1048    Parameters
1049    ----------
1050    idl : list
1051        List of lists or ranges.
1052    """
1053
1054    def _lcm(*args):
1055        """Returns the lowest common multiple of args.
1056
1057        From python 3.9 onwards the math library contains an lcm function."""
1058        return reduce(lambda a, b: a * b // gcd(a, b), args)
1059
1060    # Use groupby to efficiently check whether all elements of idl are identical
1061    try:
1062        g = groupby(idl)
1063        if next(g, True) and not next(g, False):
1064            return idl[0]
1065    except Exception:
1066        pass
1067
1068    if np.all([type(idx) is range for idx in idl]):
1069        if len(set([idx[0] for idx in idl])) == 1:
1070            idstart = max([idx.start for idx in idl])
1071            idstop = min([idx.stop for idx in idl])
1072            idstep = _lcm(*[idx.step for idx in idl])
1073            return range(idstart, idstop, idstep)
1074
1075    return sorted(set.intersection(*[set(o) for o in idl]))
1076
1077
1078def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1079    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1080       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1081       common divisor of the step sizes is used as new step size.
1082
1083    Parameters
1084    ----------
1085    deltas : list
1086        List of fluctuations
1087    idx : list
1088        List or range of configs on which the deltas are defined.
1089        Has to be a subset of new_idx and has to be sorted in ascending order.
1090    shape : list
1091        Number of configs in idx.
1092    new_idx : list
1093        List of configs that defines the new range, has to be sorted in ascending order.
1094    """
1095
1096    if type(idx) is range and type(new_idx) is range:
1097        if idx == new_idx:
1098            return deltas
1099    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1100    for i in range(shape):
1101        ret[idx[i] - new_idx[0]] = deltas[i]
1102    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1103
1104
1105def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1106    """Filter out all configurations with vanishing fluctuation such that they do not
1107       contribute to the error estimate anymore. Returns the new deltas and
1108       idx according to the filtering.
1109       A fluctuation is considered to be vanishing, if it is smaller than eps times
1110       the mean of the absolute values of all deltas in one list.
1111
1112    Parameters
1113    ----------
1114    deltas : list
1115        List of fluctuations
1116    idx : list
1117        List or ranges of configs on which the deltas are defined.
1118    eps : float
1119        Prefactor that enters the filter criterion.
1120    """
1121    new_deltas = []
1122    new_idx = []
1123    maxd = np.mean(np.fabs(deltas))
1124    for i in range(len(deltas)):
1125        if abs(deltas[i]) > eps * maxd:
1126            new_deltas.append(deltas[i])
1127            new_idx.append(idx[i])
1128    if new_idx:
1129        return np.array(new_deltas), new_idx
1130    else:
1131        return deltas, idx
1132
1133
1134def derived_observable(func, data, array_mode=False, **kwargs):
1135    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1136
1137    Parameters
1138    ----------
1139    func : object
1140        arbitrary function of the form func(data, **kwargs). For the
1141        automatic differentiation to work, all numpy functions have to have
1142        the autograd wrapper (use 'import autograd.numpy as anp').
1143    data : list
1144        list of Obs, e.g. [obs1, obs2, obs3].
1145    num_grad : bool
1146        if True, numerical derivatives are used instead of autograd
1147        (default False). To control the numerical differentiation the
1148        kwargs of numdifftools.step_generators.MaxStepGenerator
1149        can be used.
1150    man_grad : list
1151        manually supply a list or an array which contains the jacobian
1152        of func. Use cautiously, supplying the wrong derivative will
1153        not be intercepted.
1154
1155    Notes
1156    -----
1157    For simple mathematical operations it can be practical to use anonymous
1158    functions. For the ratio of two observables one can e.g. use
1159
1160    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1161    """
1162
1163    data = np.asarray(data)
1164    raveled_data = data.ravel()
1165
1166    # Workaround for matrix operations containing non Obs data
1167    if not all(isinstance(x, Obs) for x in raveled_data):
1168        for i in range(len(raveled_data)):
1169            if isinstance(raveled_data[i], (int, float)):
1170                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1171
1172    allcov = {}
1173    for o in raveled_data:
1174        for name in o.cov_names:
1175            if name in allcov:
1176                if not np.allclose(allcov[name], o.covobs[name].cov):
1177                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1178            else:
1179                allcov[name] = o.covobs[name].cov
1180
1181    n_obs = len(raveled_data)
1182    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1183    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1184    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1185
1186    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1187    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1188
1189    if data.ndim == 1:
1190        values = np.array([o.value for o in data])
1191    else:
1192        values = np.vectorize(lambda x: x.value)(data)
1193
1194    new_values = func(values, **kwargs)
1195
1196    multi = int(isinstance(new_values, np.ndarray))
1197
1198    new_r_values = {}
1199    new_idl_d = {}
1200    for name in new_sample_names:
1201        idl = []
1202        tmp_values = np.zeros(n_obs)
1203        for i, item in enumerate(raveled_data):
1204            tmp_values[i] = item.r_values.get(name, item.value)
1205            tmp_idl = item.idl.get(name)
1206            if tmp_idl is not None:
1207                idl.append(tmp_idl)
1208        if multi > 0:
1209            tmp_values = np.array(tmp_values).reshape(data.shape)
1210        new_r_values[name] = func(tmp_values, **kwargs)
1211        new_idl_d[name] = _merge_idx(idl)
1212        if not is_merged[name]:
1213            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1214
1215    if 'man_grad' in kwargs:
1216        deriv = np.asarray(kwargs.get('man_grad'))
1217        if new_values.shape + data.shape != deriv.shape:
1218            raise Exception('Manual derivative does not have correct shape.')
1219    elif kwargs.get('num_grad') is True:
1220        if multi > 0:
1221            raise Exception('Multi mode currently not supported for numerical derivative')
1222        options = {
1223            'base_step': 0.1,
1224            'step_ratio': 2.5}
1225        for key in options.keys():
1226            kwarg = kwargs.get(key)
1227            if kwarg is not None:
1228                options[key] = kwarg
1229        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1230        if tmp_df.size == 1:
1231            deriv = np.array([tmp_df.real])
1232        else:
1233            deriv = tmp_df.real
1234    else:
1235        deriv = jacobian(func)(values, **kwargs)
1236
1237    final_result = np.zeros(new_values.shape, dtype=object)
1238
1239    if array_mode is True:
1240
1241        class _Zero_grad():
1242            def __init__(self, N):
1243                self.grad = np.zeros((N, 1))
1244
1245        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1246        d_extracted = {}
1247        g_extracted = {}
1248        for name in new_sample_names:
1249            d_extracted[name] = []
1250            ens_length = len(new_idl_d[name])
1251            for i_dat, dat in enumerate(data):
1252                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1253        for name in new_cov_names:
1254            g_extracted[name] = []
1255            zero_grad = _Zero_grad(new_covobs_lengths[name])
1256            for i_dat, dat in enumerate(data):
1257                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1258
1259    for i_val, new_val in np.ndenumerate(new_values):
1260        new_deltas = {}
1261        new_grad = {}
1262        if array_mode is True:
1263            for name in new_sample_names:
1264                ens_length = d_extracted[name][0].shape[-1]
1265                new_deltas[name] = np.zeros(ens_length)
1266                for i_dat, dat in enumerate(d_extracted[name]):
1267                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1268            for name in new_cov_names:
1269                new_grad[name] = 0
1270                for i_dat, dat in enumerate(g_extracted[name]):
1271                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1272        else:
1273            for j_obs, obs in np.ndenumerate(data):
1274                for name in obs.names:
1275                    if name in obs.cov_names:
1276                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1277                    else:
1278                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1279
1280        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1281
1282        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1283            raise Exception('The same name has been used for deltas and covobs!')
1284        new_samples = []
1285        new_means = []
1286        new_idl = []
1287        new_names_obs = []
1288        for name in new_names:
1289            if name not in new_covobs:
1290                if is_merged[name]:
1291                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1292                else:
1293                    filtered_deltas = new_deltas[name]
1294                    filtered_idl_d = new_idl_d[name]
1295
1296                new_samples.append(filtered_deltas)
1297                new_idl.append(filtered_idl_d)
1298                new_means.append(new_r_values[name][i_val])
1299                new_names_obs.append(name)
1300        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1301        for name in new_covobs:
1302            final_result[i_val].names.append(name)
1303        final_result[i_val]._covobs = new_covobs
1304        final_result[i_val]._value = new_val
1305        final_result[i_val].is_merged = is_merged
1306        final_result[i_val].reweighted = reweighted
1307
1308    if multi == 0:
1309        final_result = final_result.item()
1310
1311    return final_result
1312
1313
1314def _reduce_deltas(deltas, idx_old, idx_new):
1315    """Extract deltas defined on idx_old on all configs of idx_new.
1316
1317    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1318    are ordered in an ascending order.
1319
1320    Parameters
1321    ----------
1322    deltas : list
1323        List of fluctuations
1324    idx_old : list
1325        List or range of configs on which the deltas are defined
1326    idx_new : list
1327        List of configs for which we want to extract the deltas.
1328        Has to be a subset of idx_old.
1329    """
1330    if not len(deltas) == len(idx_old):
1331        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1332    if type(idx_old) is range and type(idx_new) is range:
1333        if idx_old == idx_new:
1334            return deltas
1335    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1336    try:
1337        g = groupby([idx_old, idx_new])
1338        if next(g, True) and not next(g, False):
1339            return deltas
1340    except Exception:
1341        pass
1342    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1343    if len(indices) < len(idx_new):
1344        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1345    return np.array(deltas)[indices]
1346
1347
1348def reweight(weight, obs, **kwargs):
1349    """Reweight a list of observables.
1350
1351    Parameters
1352    ----------
1353    weight : Obs
1354        Reweighting factor. An Observable that has to be defined on a superset of the
1355        configurations in obs[i].idl for all i.
1356    obs : list
1357        list of Obs, e.g. [obs1, obs2, obs3].
1358    all_configs : bool
1359        if True, the reweighted observables are normalized by the average of
1360        the reweighting factor on all configurations in weight.idl and not
1361        on the configurations in obs[i].idl. Default False.
1362    """
1363    result = []
1364    for i in range(len(obs)):
1365        if len(obs[i].cov_names):
1366            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1367        if not set(obs[i].names).issubset(weight.names):
1368            raise Exception('Error: Ensembles do not fit')
1369        for name in obs[i].names:
1370            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1371                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1372        new_samples = []
1373        w_deltas = {}
1374        for name in sorted(obs[i].names):
1375            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1376            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1377        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1378
1379        if kwargs.get('all_configs'):
1380            new_weight = weight
1381        else:
1382            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1383
1384        result.append(tmp_obs / new_weight)
1385        result[-1].reweighted = True
1386        result[-1].is_merged = obs[i].is_merged
1387
1388    return result
1389
1390
1391def correlate(obs_a, obs_b):
1392    """Correlate two observables.
1393
1394    Parameters
1395    ----------
1396    obs_a : Obs
1397        First observable
1398    obs_b : Obs
1399        Second observable
1400
1401    Notes
1402    -----
1403    Keep in mind to only correlate primary observables which have not been reweighted
1404    yet. The reweighting has to be applied after correlating the observables.
1405    Currently only works if ensembles are identical (this is not strictly necessary).
1406    """
1407
1408    if sorted(obs_a.names) != sorted(obs_b.names):
1409        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1410    if len(obs_a.cov_names) or len(obs_b.cov_names):
1411        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1412    for name in obs_a.names:
1413        if obs_a.shape[name] != obs_b.shape[name]:
1414            raise Exception('Shapes of ensemble', name, 'do not fit')
1415        if obs_a.idl[name] != obs_b.idl[name]:
1416            raise Exception('idl of ensemble', name, 'do not fit')
1417
1418    if obs_a.reweighted is True:
1419        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1420    if obs_b.reweighted is True:
1421        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1422
1423    new_samples = []
1424    new_idl = []
1425    for name in sorted(obs_a.names):
1426        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1427        new_idl.append(obs_a.idl[name])
1428
1429    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1430    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1431    o.reweighted = obs_a.reweighted or obs_b.reweighted
1432    return o
1433
1434
1435def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1436    r'''Calculates the error covariance matrix of a set of observables.
1437
1438    WARNING: This function should be used with care, especially for observables with support on multiple
1439             ensembles with differing autocorrelations. See the notes below for details.
1440
1441    The gamma method has to be applied first to all observables.
1442
1443    Parameters
1444    ----------
1445    obs : list or numpy.ndarray
1446        List or one dimensional array of Obs
1447    visualize : bool
1448        If True plots the corresponding normalized correlation matrix (default False).
1449    correlation : bool
1450        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1451    smooth : None or int
1452        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1453        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1454        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1455        small ones.
1456
1457    Notes
1458    -----
1459    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1460    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1461    in the absence of autocorrelation.
1462    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1463    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1464    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1465    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1466    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1467    '''
1468
1469    length = len(obs)
1470
1471    max_samples = np.max([o.N for o in obs])
1472    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1473        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1474
1475    cov = np.zeros((length, length))
1476    for i in range(length):
1477        for j in range(i, length):
1478            cov[i, j] = _covariance_element(obs[i], obs[j])
1479    cov = cov + cov.T - np.diag(np.diag(cov))
1480
1481    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1482
1483    if isinstance(smooth, int):
1484        corr = _smooth_eigenvalues(corr, smooth)
1485
1486    if visualize:
1487        plt.matshow(corr, vmin=-1, vmax=1)
1488        plt.set_cmap('RdBu')
1489        plt.colorbar()
1490        plt.draw()
1491
1492    if correlation is True:
1493        return corr
1494
1495    errors = [o.dvalue for o in obs]
1496    cov = np.diag(errors) @ corr @ np.diag(errors)
1497
1498    eigenvalues = np.linalg.eigh(cov)[0]
1499    if not np.all(eigenvalues >= 0):
1500        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1501
1502    return cov
1503
1504
1505def _smooth_eigenvalues(corr, E):
1506    """Eigenvalue smoothing as described in hep-lat/9412087
1507
1508    corr : np.ndarray
1509        correlation matrix
1510    E : integer
1511        Number of eigenvalues to be left substantially unchanged
1512    """
1513    if not (2 < E < corr.shape[0] - 1):
1514        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1515    vals, vec = np.linalg.eigh(corr)
1516    lambda_min = np.mean(vals[:-E])
1517    vals[vals < lambda_min] = lambda_min
1518    vals /= np.mean(vals)
1519    return vec @ np.diag(vals) @ vec.T
1520
1521
1522def _covariance_element(obs1, obs2):
1523    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1524
1525    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1526        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1527        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1528        return np.sum(deltas1 * deltas2)
1529
1530    if set(obs1.names).isdisjoint(set(obs2.names)):
1531        return 0.0
1532
1533    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1534        raise Exception('The gamma method has to be applied to both Obs first.')
1535
1536    dvalue = 0.0
1537
1538    for e_name in obs1.mc_names:
1539
1540        if e_name not in obs2.mc_names:
1541            continue
1542
1543        idl_d = {}
1544        for r_name in obs1.e_content[e_name]:
1545            if r_name not in obs2.e_content[e_name]:
1546                continue
1547            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1548
1549        gamma = 0.0
1550
1551        for r_name in obs1.e_content[e_name]:
1552            if r_name not in obs2.e_content[e_name]:
1553                continue
1554            if len(idl_d[r_name]) == 0:
1555                continue
1556            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1557
1558        if gamma == 0.0:
1559            continue
1560
1561        gamma_div = 0.0
1562        for r_name in obs1.e_content[e_name]:
1563            if r_name not in obs2.e_content[e_name]:
1564                continue
1565            if len(idl_d[r_name]) == 0:
1566                continue
1567            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1568        gamma /= gamma_div
1569
1570        dvalue += gamma
1571
1572    for e_name in obs1.cov_names:
1573
1574        if e_name not in obs2.cov_names:
1575            continue
1576
1577        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1578
1579    return dvalue
1580
1581
1582def import_jackknife(jacks, name, idl=None):
1583    """Imports jackknife samples and returns an Obs
1584
1585    Parameters
1586    ----------
1587    jacks : numpy.ndarray
1588        numpy array containing the mean value as zeroth entry and
1589        the N jackknife samples as first to Nth entry.
1590    name : str
1591        name of the ensemble the samples are defined on.
1592    """
1593    length = len(jacks) - 1
1594    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1595    samples = jacks[1:] @ prj
1596    mean = np.mean(samples)
1597    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1598    new_obs._value = jacks[0]
1599    return new_obs
1600
1601
1602def merge_obs(list_of_obs):
1603    """Combine all observables in list_of_obs into one new observable
1604
1605    Parameters
1606    ----------
1607    list_of_obs : list
1608        list of the Obs object to be combined
1609
1610    Notes
1611    -----
1612    It is not possible to combine obs which are based on the same replicum
1613    """
1614    replist = [item for obs in list_of_obs for item in obs.names]
1615    if (len(replist) == len(set(replist))) is False:
1616        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1617    if any([len(o.cov_names) for o in list_of_obs]):
1618        raise Exception('Not possible to merge data that contains covobs!')
1619    new_dict = {}
1620    idl_dict = {}
1621    for o in list_of_obs:
1622        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1623                        for key in set(o.deltas) | set(o.r_values)})
1624        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1625
1626    names = sorted(new_dict.keys())
1627    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1628    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1629    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1630    return o
1631
1632
1633def cov_Obs(means, cov, name, grad=None):
1634    """Create an Obs based on mean(s) and a covariance matrix
1635
1636    Parameters
1637    ----------
1638    mean : list of floats or float
1639        N mean value(s) of the new Obs
1640    cov : list or array
1641        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1642    name : str
1643        identifier for the covariance matrix
1644    grad : list or array
1645        Gradient of the Covobs wrt. the means belonging to cov.
1646    """
1647
1648    def covobs_to_obs(co):
1649        """Make an Obs out of a Covobs
1650
1651        Parameters
1652        ----------
1653        co : Covobs
1654            Covobs to be embedded into the Obs
1655        """
1656        o = Obs([], [], means=[])
1657        o._value = co.value
1658        o.names.append(co.name)
1659        o._covobs[co.name] = co
1660        o._dvalue = np.sqrt(co.errsq())
1661        return o
1662
1663    ol = []
1664    if isinstance(means, (float, int)):
1665        means = [means]
1666
1667    for i in range(len(means)):
1668        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1669    if ol[0].covobs[name].N != len(means):
1670        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1671    if len(ol) == 1:
1672        return ol[0]
1673    return ol
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61    filter_eps = 1e-10
 62
 63    def __init__(self, samples, names, idl=None, **kwargs):
 64        """ Initialize Obs object.
 65
 66        Parameters
 67        ----------
 68        samples : list
 69            list of numpy arrays containing the Monte Carlo samples
 70        names : list
 71            list of strings labeling the individual samples
 72        idl : list, optional
 73            list of ranges or lists on which the samples are defined
 74        """
 75
 76        if kwargs.get("means") is None and len(samples):
 77            if len(samples) != len(names):
 78                raise Exception('Length of samples and names incompatible.')
 79            if idl is not None:
 80                if len(idl) != len(names):
 81                    raise Exception('Length of idl incompatible with samples and names.')
 82            name_length = len(names)
 83            if name_length > 1:
 84                if name_length != len(set(names)):
 85                    raise Exception('names are not unique.')
 86                if not all(isinstance(x, str) for x in names):
 87                    raise TypeError('All names have to be strings.')
 88            else:
 89                if not isinstance(names[0], str):
 90                    raise TypeError('All names have to be strings.')
 91            if min(len(x) for x in samples) <= 4:
 92                raise Exception('Samples have to have at least 5 entries.')
 93
 94        self.names = sorted(names)
 95        self.shape = {}
 96        self.r_values = {}
 97        self.deltas = {}
 98        self._covobs = {}
 99
100        self._value = 0
101        self.N = 0
102        self.is_merged = {}
103        self.idl = {}
104        if idl is not None:
105            for name, idx in sorted(zip(names, idl)):
106                if isinstance(idx, range):
107                    self.idl[name] = idx
108                elif isinstance(idx, (list, np.ndarray)):
109                    dc = np.unique(np.diff(idx))
110                    if np.any(dc < 0):
111                        raise Exception("Unsorted idx for idl[%s]" % (name))
112                    if len(dc) == 1:
113                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
114                    else:
115                        self.idl[name] = list(idx)
116                else:
117                    raise Exception('incompatible type for idl[%s].' % (name))
118        else:
119            for name, sample in sorted(zip(names, samples)):
120                self.idl[name] = range(1, len(sample) + 1)
121
122        if kwargs.get("means") is not None:
123            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
124                self.shape[name] = len(self.idl[name])
125                self.N += self.shape[name]
126                self.r_values[name] = mean
127                self.deltas[name] = sample
128        else:
129            for name, sample in sorted(zip(names, samples)):
130                self.shape[name] = len(self.idl[name])
131                self.N += self.shape[name]
132                if len(sample) != self.shape[name]:
133                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
134                self.r_values[name] = np.mean(sample)
135                self.deltas[name] = sample - self.r_values[name]
136                self._value += self.shape[name] * self.r_values[name]
137            self._value /= self.N
138
139        self._dvalue = 0.0
140        self.ddvalue = 0.0
141        self.reweighted = False
142
143        self.tag = None
144
145    @property
146    def value(self):
147        return self._value
148
149    @property
150    def dvalue(self):
151        return self._dvalue
152
153    @property
154    def e_names(self):
155        return sorted(set([o.split('|')[0] for o in self.names]))
156
157    @property
158    def cov_names(self):
159        return sorted(set([o for o in self.covobs.keys()]))
160
161    @property
162    def mc_names(self):
163        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
164
165    @property
166    def e_content(self):
167        res = {}
168        for e, e_name in enumerate(self.e_names):
169            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
170            if e_name in self.names:
171                res[e_name].append(e_name)
172        return res
173
174    @property
175    def covobs(self):
176        return self._covobs
177
178    def gamma_method(self, **kwargs):
179        """Estimate the error and related properties of the Obs.
180
181        Parameters
182        ----------
183        S : float
184            specifies a custom value for the parameter S (default 2.0).
185            If set to 0 it is assumed that the data exhibits no
186            autocorrelation. In this case the error estimates coincides
187            with the sample standard error.
188        tau_exp : float
189            positive value triggers the critical slowing down analysis
190            (default 0.0).
191        N_sigma : float
192            number of standard deviations from zero until the tail is
193            attached to the autocorrelation function (default 1).
194        fft : bool
195            determines whether the fft algorithm is used for the computation
196            of the autocorrelation function (default True)
197        """
198
199        e_content = self.e_content
200        self.e_dvalue = {}
201        self.e_ddvalue = {}
202        self.e_tauint = {}
203        self.e_dtauint = {}
204        self.e_windowsize = {}
205        self.e_n_tauint = {}
206        self.e_n_dtauint = {}
207        e_gamma = {}
208        self.e_rho = {}
209        self.e_drho = {}
210        self._dvalue = 0
211        self.ddvalue = 0
212
213        self.S = {}
214        self.tau_exp = {}
215        self.N_sigma = {}
216
217        if kwargs.get('fft') is False:
218            fft = False
219        else:
220            fft = True
221
222        def _parse_kwarg(kwarg_name):
223            if kwarg_name in kwargs:
224                tmp = kwargs.get(kwarg_name)
225                if isinstance(tmp, (int, float)):
226                    if tmp < 0:
227                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
228                    for e, e_name in enumerate(self.e_names):
229                        getattr(self, kwarg_name)[e_name] = tmp
230                else:
231                    raise TypeError(kwarg_name + ' is not in proper format.')
232            else:
233                for e, e_name in enumerate(self.e_names):
234                    if e_name in getattr(Obs, kwarg_name + '_dict'):
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
236                    else:
237                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
238
239        _parse_kwarg('S')
240        _parse_kwarg('tau_exp')
241        _parse_kwarg('N_sigma')
242
243        for e, e_name in enumerate(self.mc_names):
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]))
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            gaps = []
275            for r_name in e_content[e_name]:
276                if isinstance(self.idl[r_name], range):
277                    gaps.append(1)
278                else:
279                    gaps.append(np.min(np.diff(self.idl[r_name])))
280
281            if not np.all([gi == gaps[0] for gi in gaps]):
282                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
283            else:
284                gapsize = gaps[0]
285
286            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
287            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
288            # Make sure no entry of tauint is smaller than 0.5
289            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
290            # hep-lat/0306017 eq. (42)
291            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
292            self.e_n_dtauint[e_name][0] = 0.0
293
294            def _compute_drho(i):
295                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
296                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
297
298            _compute_drho(gapsize)
299            if self.tau_exp[e_name] > 0:
300                texp = self.tau_exp[e_name]
301                # Critical slowing down analysis
302                if w_max // 2 <= 1:
303                    raise Exception("Need at least 8 samples for tau_exp error analysis")
304                for n in range(gapsize, w_max // 2, gapsize):
305                    _compute_drho(n + gapsize)
306                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
307                        # Bias correction hep-lat/0306017 eq. (49) included
308                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
309                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
310                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
311                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
312                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
313                        self.e_windowsize[e_name] = n
314                        break
315            else:
316                if self.S[e_name] == 0.0:
317                    self.e_tauint[e_name] = 0.5
318                    self.e_dtauint[e_name] = 0.0
319                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
320                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
321                    self.e_windowsize[e_name] = 0
322                else:
323                    # Standard automatic windowing procedure
324                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
325                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
326                    for n in range(1, w_max):
327                        if n < w_max // 2 - 2:
328                            _compute_drho(gapsize * n + gapsize)
329                        if g_w[n - 1] < 0 or n >= w_max - 1:
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return
352
353    gm = gamma_method
354
355    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
356        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
357           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
358
359        Parameters
360        ----------
361        deltas : list
362            List of fluctuations
363        idx : list
364            List or range of configurations on which the deltas are defined.
365        shape : int
366            Number of configurations in idx.
367        w_max : int
368            Upper bound for the summation window.
369        fft : bool
370            determines whether the fft algorithm is used for the computation
371            of the autocorrelation function.
372        """
373        gamma = np.zeros(w_max)
374        deltas = _expand_deltas(deltas, idx, shape)
375        new_shape = len(deltas)
376        if fft:
377            max_gamma = min(new_shape, w_max)
378            # The padding for the fft has to be even
379            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
380            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
381        else:
382            for n in range(w_max):
383                if new_shape - n >= 0:
384                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
385
386        return gamma
387
388    def details(self, ens_content=True):
389        """Output detailed properties of the Obs.
390
391        Parameters
392        ----------
393        ens_content : bool
394            print details about the ensembles and replica if true.
395        """
396        if self.tag is not None:
397            print("Description:", self.tag)
398        if not hasattr(self, 'e_dvalue'):
399            print('Result\t %3.8e' % (self.value))
400        else:
401            if self.value == 0.0:
402                percentage = np.nan
403            else:
404                percentage = np.abs(self._dvalue / self.value) * 100
405            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
406            if len(self.e_names) > 1:
407                print(' Ensemble errors:')
408            e_content = self.e_content
409            for e_name in self.mc_names:
410                if isinstance(self.idl[e_content[e_name][0]], range):
411                    gap = self.idl[e_content[e_name][0]].step
412                else:
413                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
414
415                if len(self.e_names) > 1:
416                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
417                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
418                tau_string += f" in units of {gap} config"
419                if gap > 1:
420                    tau_string += "s"
421                if self.tau_exp[e_name] > 0:
422                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
423                else:
424                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
425                print(tau_string)
426            for e_name in self.cov_names:
427                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
428        if ens_content is True:
429            if len(self.e_names) == 1:
430                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
431            else:
432                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
433            my_string_list = []
434            for key, value in sorted(self.e_content.items()):
435                if key not in self.covobs:
436                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
437                    if len(value) == 1:
438                        my_string += f': {self.shape[value[0]]} configurations'
439                        if isinstance(self.idl[value[0]], range):
440                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
441                        else:
442                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
443                    else:
444                        sublist = []
445                        for v in value:
446                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
447                            my_substring += f': {self.shape[v]} configurations'
448                            if isinstance(self.idl[v], range):
449                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
450                            else:
451                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
452                            sublist.append(my_substring)
453
454                        my_string += '\n' + '\n'.join(sublist)
455                else:
456                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
457                my_string_list.append(my_string)
458            print('\n'.join(my_string_list))
459
460    def reweight(self, weight):
461        """Reweight the obs with given rewighting factors.
462
463        Parameters
464        ----------
465        weight : Obs
466            Reweighting factor. An Observable that has to be defined on a superset of the
467            configurations in obs[i].idl for all i.
468        all_configs : bool
469            if True, the reweighted observables are normalized by the average of
470            the reweighting factor on all configurations in weight.idl and not
471            on the configurations in obs[i].idl. Default False.
472        """
473        return reweight(weight, [self])[0]
474
475    def is_zero_within_error(self, sigma=1):
476        """Checks whether the observable is zero within 'sigma' standard errors.
477
478        Parameters
479        ----------
480        sigma : int
481            Number of standard errors used for the check.
482
483        Works only properly when the gamma method was run.
484        """
485        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
486
487    def is_zero(self, atol=1e-10):
488        """Checks whether the observable is zero within a given tolerance.
489
490        Parameters
491        ----------
492        atol : float
493            Absolute tolerance (for details see numpy documentation).
494        """
495        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
496
497    def plot_tauint(self, save=None):
498        """Plot integrated autocorrelation time for each ensemble.
499
500        Parameters
501        ----------
502        save : str
503            saves the figure to a file named 'save' if.
504        """
505        if not hasattr(self, 'e_dvalue'):
506            raise Exception('Run the gamma method first.')
507
508        for e, e_name in enumerate(self.mc_names):
509            fig = plt.figure()
510            plt.xlabel(r'$W$')
511            plt.ylabel(r'$\tau_\mathrm{int}$')
512            length = int(len(self.e_n_tauint[e_name]))
513            if self.tau_exp[e_name] > 0:
514                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
515                x_help = np.arange(2 * self.tau_exp[e_name])
516                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
517                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
518                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
519                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
520                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
521                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
522                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
523            else:
524                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
525                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
526
527            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
528            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
529            plt.legend()
530            plt.xlim(-0.5, xmax)
531            ylim = plt.ylim()
532            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
533            plt.draw()
534            if save:
535                fig.savefig(save + "_" + str(e))
536
537    def plot_rho(self, save=None):
538        """Plot normalized autocorrelation function time for each ensemble.
539
540        Parameters
541        ----------
542        save : str
543            saves the figure to a file named 'save' if.
544        """
545        if not hasattr(self, 'e_dvalue'):
546            raise Exception('Run the gamma method first.')
547        for e, e_name in enumerate(self.mc_names):
548            fig = plt.figure()
549            plt.xlabel('W')
550            plt.ylabel('rho')
551            length = int(len(self.e_drho[e_name]))
552            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
553            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
554            if self.tau_exp[e_name] > 0:
555                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
556                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
557                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
558                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
559            else:
560                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
561                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
562            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
563            plt.xlim(-0.5, xmax)
564            plt.draw()
565            if save:
566                fig.savefig(save + "_" + str(e))
567
568    def plot_rep_dist(self):
569        """Plot replica distribution for each ensemble with more than one replicum."""
570        if not hasattr(self, 'e_dvalue'):
571            raise Exception('Run the gamma method first.')
572        for e, e_name in enumerate(self.mc_names):
573            if len(self.e_content[e_name]) == 1:
574                print('No replica distribution for a single replicum (', e_name, ')')
575                continue
576            r_length = []
577            sub_r_mean = 0
578            for r, r_name in enumerate(self.e_content[e_name]):
579                r_length.append(len(self.deltas[r_name]))
580                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
581            e_N = np.sum(r_length)
582            sub_r_mean /= e_N
583            arr = np.zeros(len(self.e_content[e_name]))
584            for r, r_name in enumerate(self.e_content[e_name]):
585                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
586            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
587            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
588            plt.draw()
589
590    def plot_history(self, expand=True):
591        """Plot derived Monte Carlo history for each ensemble
592
593        Parameters
594        ----------
595        expand : bool
596            show expanded history for irregular Monte Carlo chains (default: True).
597        """
598        for e, e_name in enumerate(self.mc_names):
599            plt.figure()
600            r_length = []
601            tmp = []
602            tmp_expanded = []
603            for r, r_name in enumerate(self.e_content[e_name]):
604                tmp.append(self.deltas[r_name] + self.r_values[r_name])
605                if expand:
606                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
607                    r_length.append(len(tmp_expanded[-1]))
608                else:
609                    r_length.append(len(tmp[-1]))
610            e_N = np.sum(r_length)
611            x = np.arange(e_N)
612            y_test = np.concatenate(tmp, axis=0)
613            if expand:
614                y = np.concatenate(tmp_expanded, axis=0)
615            else:
616                y = y_test
617            plt.errorbar(x, y, fmt='.', markersize=3)
618            plt.xlim(-0.5, e_N - 0.5)
619            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
620            plt.draw()
621
622    def plot_piechart(self, save=None):
623        """Plot piechart which shows the fractional contribution of each
624        ensemble to the error and returns a dictionary containing the fractions.
625
626        Parameters
627        ----------
628        save : str
629            saves the figure to a file named 'save' if.
630        """
631        if not hasattr(self, 'e_dvalue'):
632            raise Exception('Run the gamma method first.')
633        if np.isclose(0.0, self._dvalue, atol=1e-15):
634            raise Exception('Error is 0.0')
635        labels = self.e_names
636        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
637        fig1, ax1 = plt.subplots()
638        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
639        ax1.axis('equal')
640        plt.draw()
641        if save:
642            fig1.savefig(save)
643
644        return dict(zip(self.e_names, sizes))
645
646    def dump(self, filename, datatype="json.gz", description="", **kwargs):
647        """Dump the Obs to a file 'name' of chosen format.
648
649        Parameters
650        ----------
651        filename : str
652            name of the file to be saved.
653        datatype : str
654            Format of the exported file. Supported formats include
655            "json.gz" and "pickle"
656        description : str
657            Description for output file, only relevant for json.gz format.
658        path : str
659            specifies a custom path for the file (default '.')
660        """
661        if 'path' in kwargs:
662            file_name = kwargs.get('path') + '/' + filename
663        else:
664            file_name = filename
665
666        if datatype == "json.gz":
667            from .input.json import dump_to_json
668            dump_to_json([self], file_name, description=description)
669        elif datatype == "pickle":
670            with open(file_name + '.p', 'wb') as fb:
671                pickle.dump(self, fb)
672        else:
673            raise Exception("Unknown datatype " + str(datatype))
674
675    def export_jackknife(self):
676        """Export jackknife samples from the Obs
677
678        Returns
679        -------
680        numpy.ndarray
681            Returns a numpy array of length N + 1 where N is the number of samples
682            for the given ensemble and replicum. The zeroth entry of the array contains
683            the mean value of the Obs, entries 1 to N contain the N jackknife samples
684            derived from the Obs. The current implementation only works for observables
685            defined on exactly one ensemble and replicum. The derived jackknife samples
686            should agree with samples from a full jackknife analysis up to O(1/N).
687        """
688
689        if len(self.names) != 1:
690            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
691
692        name = self.names[0]
693        full_data = self.deltas[name] + self.r_values[name]
694        n = full_data.size
695        mean = self.value
696        tmp_jacks = np.zeros(n + 1)
697        tmp_jacks[0] = mean
698        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
699        return tmp_jacks
700
701    def __float__(self):
702        return float(self.value)
703
704    def __repr__(self):
705        return 'Obs[' + str(self) + ']'
706
707    def __str__(self):
708        return _format_uncertainty(self.value, self._dvalue)
709
710    def __hash__(self):
711        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
712        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
713        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
714        hash_tuple += tuple([o.encode() for o in self.names])
715        m = hashlib.md5()
716        [m.update(o) for o in hash_tuple]
717        return int(m.hexdigest(), 16) & 0xFFFFFFFF
718
719    # Overload comparisons
720    def __lt__(self, other):
721        return self.value < other
722
723    def __le__(self, other):
724        return self.value <= other
725
726    def __gt__(self, other):
727        return self.value > other
728
729    def __ge__(self, other):
730        return self.value >= other
731
732    def __eq__(self, other):
733        return (self - other).is_zero()
734
735    def __ne__(self, other):
736        return not (self - other).is_zero()
737
738    # Overload math operations
739    def __add__(self, y):
740        if isinstance(y, Obs):
741            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
742        else:
743            if isinstance(y, np.ndarray):
744                return np.array([self + o for o in y])
745            elif y.__class__.__name__ in ['Corr', 'CObs']:
746                return NotImplemented
747            else:
748                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
749
750    def __radd__(self, y):
751        return self + y
752
753    def __mul__(self, y):
754        if isinstance(y, Obs):
755            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
756        else:
757            if isinstance(y, np.ndarray):
758                return np.array([self * o for o in y])
759            elif isinstance(y, complex):
760                return CObs(self * y.real, self * y.imag)
761            elif y.__class__.__name__ in ['Corr', 'CObs']:
762                return NotImplemented
763            else:
764                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
765
766    def __rmul__(self, y):
767        return self * y
768
769    def __sub__(self, y):
770        if isinstance(y, Obs):
771            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
772        else:
773            if isinstance(y, np.ndarray):
774                return np.array([self - o for o in y])
775            elif y.__class__.__name__ in ['Corr', 'CObs']:
776                return NotImplemented
777            else:
778                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
779
780    def __rsub__(self, y):
781        return -1 * (self - y)
782
783    def __pos__(self):
784        return self
785
786    def __neg__(self):
787        return -1 * self
788
789    def __truediv__(self, y):
790        if isinstance(y, Obs):
791            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
792        else:
793            if isinstance(y, np.ndarray):
794                return np.array([self / o for o in y])
795            elif y.__class__.__name__ in ['Corr', 'CObs']:
796                return NotImplemented
797            else:
798                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
799
800    def __rtruediv__(self, y):
801        if isinstance(y, Obs):
802            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
803        else:
804            if isinstance(y, np.ndarray):
805                return np.array([o / self for o in y])
806            elif y.__class__.__name__ in ['Corr', 'CObs']:
807                return NotImplemented
808            else:
809                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
810
811    def __pow__(self, y):
812        if isinstance(y, Obs):
813            return derived_observable(lambda x: x[0] ** x[1], [self, y])
814        else:
815            return derived_observable(lambda x: x[0] ** y, [self])
816
817    def __rpow__(self, y):
818        if isinstance(y, Obs):
819            return derived_observable(lambda x: x[0] ** x[1], [y, self])
820        else:
821            return derived_observable(lambda x: y ** x[0], [self])
822
823    def __abs__(self):
824        return derived_observable(lambda x: anp.abs(x[0]), [self])
825
826    # Overload numpy functions
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
829
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
832
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
835
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
838
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
841
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
844
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
847
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
850
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
853
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
856
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
859
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
862
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
865
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
868
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 63    def __init__(self, samples, names, idl=None, **kwargs):
 64        """ Initialize Obs object.
 65
 66        Parameters
 67        ----------
 68        samples : list
 69            list of numpy arrays containing the Monte Carlo samples
 70        names : list
 71            list of strings labeling the individual samples
 72        idl : list, optional
 73            list of ranges or lists on which the samples are defined
 74        """
 75
 76        if kwargs.get("means") is None and len(samples):
 77            if len(samples) != len(names):
 78                raise Exception('Length of samples and names incompatible.')
 79            if idl is not None:
 80                if len(idl) != len(names):
 81                    raise Exception('Length of idl incompatible with samples and names.')
 82            name_length = len(names)
 83            if name_length > 1:
 84                if name_length != len(set(names)):
 85                    raise Exception('names are not unique.')
 86                if not all(isinstance(x, str) for x in names):
 87                    raise TypeError('All names have to be strings.')
 88            else:
 89                if not isinstance(names[0], str):
 90                    raise TypeError('All names have to be strings.')
 91            if min(len(x) for x in samples) <= 4:
 92                raise Exception('Samples have to have at least 5 entries.')
 93
 94        self.names = sorted(names)
 95        self.shape = {}
 96        self.r_values = {}
 97        self.deltas = {}
 98        self._covobs = {}
 99
100        self._value = 0
101        self.N = 0
102        self.is_merged = {}
103        self.idl = {}
104        if idl is not None:
105            for name, idx in sorted(zip(names, idl)):
106                if isinstance(idx, range):
107                    self.idl[name] = idx
108                elif isinstance(idx, (list, np.ndarray)):
109                    dc = np.unique(np.diff(idx))
110                    if np.any(dc < 0):
111                        raise Exception("Unsorted idx for idl[%s]" % (name))
112                    if len(dc) == 1:
113                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
114                    else:
115                        self.idl[name] = list(idx)
116                else:
117                    raise Exception('incompatible type for idl[%s].' % (name))
118        else:
119            for name, sample in sorted(zip(names, samples)):
120                self.idl[name] = range(1, len(sample) + 1)
121
122        if kwargs.get("means") is not None:
123            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
124                self.shape[name] = len(self.idl[name])
125                self.N += self.shape[name]
126                self.r_values[name] = mean
127                self.deltas[name] = sample
128        else:
129            for name, sample in sorted(zip(names, samples)):
130                self.shape[name] = len(self.idl[name])
131                self.N += self.shape[name]
132                if len(sample) != self.shape[name]:
133                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
134                self.r_values[name] = np.mean(sample)
135                self.deltas[name] = sample - self.r_values[name]
136                self._value += self.shape[name] * self.r_values[name]
137            self._value /= self.N
138
139        self._dvalue = 0.0
140        self.ddvalue = 0.0
141        self.reweighted = False
142
143        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
178    def gamma_method(self, **kwargs):
179        """Estimate the error and related properties of the Obs.
180
181        Parameters
182        ----------
183        S : float
184            specifies a custom value for the parameter S (default 2.0).
185            If set to 0 it is assumed that the data exhibits no
186            autocorrelation. In this case the error estimates coincides
187            with the sample standard error.
188        tau_exp : float
189            positive value triggers the critical slowing down analysis
190            (default 0.0).
191        N_sigma : float
192            number of standard deviations from zero until the tail is
193            attached to the autocorrelation function (default 1).
194        fft : bool
195            determines whether the fft algorithm is used for the computation
196            of the autocorrelation function (default True)
197        """
198
199        e_content = self.e_content
200        self.e_dvalue = {}
201        self.e_ddvalue = {}
202        self.e_tauint = {}
203        self.e_dtauint = {}
204        self.e_windowsize = {}
205        self.e_n_tauint = {}
206        self.e_n_dtauint = {}
207        e_gamma = {}
208        self.e_rho = {}
209        self.e_drho = {}
210        self._dvalue = 0
211        self.ddvalue = 0
212
213        self.S = {}
214        self.tau_exp = {}
215        self.N_sigma = {}
216
217        if kwargs.get('fft') is False:
218            fft = False
219        else:
220            fft = True
221
222        def _parse_kwarg(kwarg_name):
223            if kwarg_name in kwargs:
224                tmp = kwargs.get(kwarg_name)
225                if isinstance(tmp, (int, float)):
226                    if tmp < 0:
227                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
228                    for e, e_name in enumerate(self.e_names):
229                        getattr(self, kwarg_name)[e_name] = tmp
230                else:
231                    raise TypeError(kwarg_name + ' is not in proper format.')
232            else:
233                for e, e_name in enumerate(self.e_names):
234                    if e_name in getattr(Obs, kwarg_name + '_dict'):
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
236                    else:
237                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
238
239        _parse_kwarg('S')
240        _parse_kwarg('tau_exp')
241        _parse_kwarg('N_sigma')
242
243        for e, e_name in enumerate(self.mc_names):
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]))
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            gaps = []
275            for r_name in e_content[e_name]:
276                if isinstance(self.idl[r_name], range):
277                    gaps.append(1)
278                else:
279                    gaps.append(np.min(np.diff(self.idl[r_name])))
280
281            if not np.all([gi == gaps[0] for gi in gaps]):
282                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
283            else:
284                gapsize = gaps[0]
285
286            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
287            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
288            # Make sure no entry of tauint is smaller than 0.5
289            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
290            # hep-lat/0306017 eq. (42)
291            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
292            self.e_n_dtauint[e_name][0] = 0.0
293
294            def _compute_drho(i):
295                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
296                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
297
298            _compute_drho(gapsize)
299            if self.tau_exp[e_name] > 0:
300                texp = self.tau_exp[e_name]
301                # Critical slowing down analysis
302                if w_max // 2 <= 1:
303                    raise Exception("Need at least 8 samples for tau_exp error analysis")
304                for n in range(gapsize, w_max // 2, gapsize):
305                    _compute_drho(n + gapsize)
306                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
307                        # Bias correction hep-lat/0306017 eq. (49) included
308                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
309                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
310                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
311                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
312                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
313                        self.e_windowsize[e_name] = n
314                        break
315            else:
316                if self.S[e_name] == 0.0:
317                    self.e_tauint[e_name] = 0.5
318                    self.e_dtauint[e_name] = 0.0
319                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
320                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
321                    self.e_windowsize[e_name] = 0
322                else:
323                    # Standard automatic windowing procedure
324                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
325                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
326                    for n in range(1, w_max):
327                        if n < w_max // 2 - 2:
328                            _compute_drho(gapsize * n + gapsize)
329                        if g_w[n - 1] < 0 or n >= w_max - 1:
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
178    def gamma_method(self, **kwargs):
179        """Estimate the error and related properties of the Obs.
180
181        Parameters
182        ----------
183        S : float
184            specifies a custom value for the parameter S (default 2.0).
185            If set to 0 it is assumed that the data exhibits no
186            autocorrelation. In this case the error estimates coincides
187            with the sample standard error.
188        tau_exp : float
189            positive value triggers the critical slowing down analysis
190            (default 0.0).
191        N_sigma : float
192            number of standard deviations from zero until the tail is
193            attached to the autocorrelation function (default 1).
194        fft : bool
195            determines whether the fft algorithm is used for the computation
196            of the autocorrelation function (default True)
197        """
198
199        e_content = self.e_content
200        self.e_dvalue = {}
201        self.e_ddvalue = {}
202        self.e_tauint = {}
203        self.e_dtauint = {}
204        self.e_windowsize = {}
205        self.e_n_tauint = {}
206        self.e_n_dtauint = {}
207        e_gamma = {}
208        self.e_rho = {}
209        self.e_drho = {}
210        self._dvalue = 0
211        self.ddvalue = 0
212
213        self.S = {}
214        self.tau_exp = {}
215        self.N_sigma = {}
216
217        if kwargs.get('fft') is False:
218            fft = False
219        else:
220            fft = True
221
222        def _parse_kwarg(kwarg_name):
223            if kwarg_name in kwargs:
224                tmp = kwargs.get(kwarg_name)
225                if isinstance(tmp, (int, float)):
226                    if tmp < 0:
227                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
228                    for e, e_name in enumerate(self.e_names):
229                        getattr(self, kwarg_name)[e_name] = tmp
230                else:
231                    raise TypeError(kwarg_name + ' is not in proper format.')
232            else:
233                for e, e_name in enumerate(self.e_names):
234                    if e_name in getattr(Obs, kwarg_name + '_dict'):
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
236                    else:
237                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
238
239        _parse_kwarg('S')
240        _parse_kwarg('tau_exp')
241        _parse_kwarg('N_sigma')
242
243        for e, e_name in enumerate(self.mc_names):
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]))
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            gaps = []
275            for r_name in e_content[e_name]:
276                if isinstance(self.idl[r_name], range):
277                    gaps.append(1)
278                else:
279                    gaps.append(np.min(np.diff(self.idl[r_name])))
280
281            if not np.all([gi == gaps[0] for gi in gaps]):
282                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
283            else:
284                gapsize = gaps[0]
285
286            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
287            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
288            # Make sure no entry of tauint is smaller than 0.5
289            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
290            # hep-lat/0306017 eq. (42)
291            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
292            self.e_n_dtauint[e_name][0] = 0.0
293
294            def _compute_drho(i):
295                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
296                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
297
298            _compute_drho(gapsize)
299            if self.tau_exp[e_name] > 0:
300                texp = self.tau_exp[e_name]
301                # Critical slowing down analysis
302                if w_max // 2 <= 1:
303                    raise Exception("Need at least 8 samples for tau_exp error analysis")
304                for n in range(gapsize, w_max // 2, gapsize):
305                    _compute_drho(n + gapsize)
306                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
307                        # Bias correction hep-lat/0306017 eq. (49) included
308                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
309                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
310                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
311                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
312                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
313                        self.e_windowsize[e_name] = n
314                        break
315            else:
316                if self.S[e_name] == 0.0:
317                    self.e_tauint[e_name] = 0.5
318                    self.e_dtauint[e_name] = 0.0
319                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
320                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
321                    self.e_windowsize[e_name] = 0
322                else:
323                    # Standard automatic windowing procedure
324                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
325                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
326                    for n in range(1, w_max):
327                        if n < w_max // 2 - 2:
328                            _compute_drho(gapsize * n + gapsize)
329                        if g_w[n - 1] < 0 or n >= w_max - 1:
330                            n *= gapsize
331                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
332                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
333                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
334                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
335                            self.e_windowsize[e_name] = n
336                            break
337
338            self._dvalue += self.e_dvalue[e_name] ** 2
339            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
340
341        for e_name in self.cov_names:
342            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
343            self.e_ddvalue[e_name] = 0
344            self._dvalue += self.e_dvalue[e_name]**2
345
346        self._dvalue = np.sqrt(self._dvalue)
347        if self._dvalue == 0.0:
348            self.ddvalue = 0.0
349        else:
350            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
351        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
388    def details(self, ens_content=True):
389        """Output detailed properties of the Obs.
390
391        Parameters
392        ----------
393        ens_content : bool
394            print details about the ensembles and replica if true.
395        """
396        if self.tag is not None:
397            print("Description:", self.tag)
398        if not hasattr(self, 'e_dvalue'):
399            print('Result\t %3.8e' % (self.value))
400        else:
401            if self.value == 0.0:
402                percentage = np.nan
403            else:
404                percentage = np.abs(self._dvalue / self.value) * 100
405            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
406            if len(self.e_names) > 1:
407                print(' Ensemble errors:')
408            e_content = self.e_content
409            for e_name in self.mc_names:
410                if isinstance(self.idl[e_content[e_name][0]], range):
411                    gap = self.idl[e_content[e_name][0]].step
412                else:
413                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
414
415                if len(self.e_names) > 1:
416                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
417                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
418                tau_string += f" in units of {gap} config"
419                if gap > 1:
420                    tau_string += "s"
421                if self.tau_exp[e_name] > 0:
422                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
423                else:
424                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
425                print(tau_string)
426            for e_name in self.cov_names:
427                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
428        if ens_content is True:
429            if len(self.e_names) == 1:
430                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
431            else:
432                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
433            my_string_list = []
434            for key, value in sorted(self.e_content.items()):
435                if key not in self.covobs:
436                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
437                    if len(value) == 1:
438                        my_string += f': {self.shape[value[0]]} configurations'
439                        if isinstance(self.idl[value[0]], range):
440                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
441                        else:
442                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
443                    else:
444                        sublist = []
445                        for v in value:
446                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
447                            my_substring += f': {self.shape[v]} configurations'
448                            if isinstance(self.idl[v], range):
449                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
450                            else:
451                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
452                            sublist.append(my_substring)
453
454                        my_string += '\n' + '\n'.join(sublist)
455                else:
456                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
457                my_string_list.append(my_string)
458            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
460    def reweight(self, weight):
461        """Reweight the obs with given rewighting factors.
462
463        Parameters
464        ----------
465        weight : Obs
466            Reweighting factor. An Observable that has to be defined on a superset of the
467            configurations in obs[i].idl for all i.
468        all_configs : bool
469            if True, the reweighted observables are normalized by the average of
470            the reweighting factor on all configurations in weight.idl and not
471            on the configurations in obs[i].idl. Default False.
472        """
473        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
475    def is_zero_within_error(self, sigma=1):
476        """Checks whether the observable is zero within 'sigma' standard errors.
477
478        Parameters
479        ----------
480        sigma : int
481            Number of standard errors used for the check.
482
483        Works only properly when the gamma method was run.
484        """
485        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
487    def is_zero(self, atol=1e-10):
488        """Checks whether the observable is zero within a given tolerance.
489
490        Parameters
491        ----------
492        atol : float
493            Absolute tolerance (for details see numpy documentation).
494        """
495        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
497    def plot_tauint(self, save=None):
498        """Plot integrated autocorrelation time for each ensemble.
499
500        Parameters
501        ----------
502        save : str
503            saves the figure to a file named 'save' if.
504        """
505        if not hasattr(self, 'e_dvalue'):
506            raise Exception('Run the gamma method first.')
507
508        for e, e_name in enumerate(self.mc_names):
509            fig = plt.figure()
510            plt.xlabel(r'$W$')
511            plt.ylabel(r'$\tau_\mathrm{int}$')
512            length = int(len(self.e_n_tauint[e_name]))
513            if self.tau_exp[e_name] > 0:
514                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
515                x_help = np.arange(2 * self.tau_exp[e_name])
516                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
517                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
518                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
519                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
520                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
521                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
522                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
523            else:
524                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
525                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
526
527            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
528            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
529            plt.legend()
530            plt.xlim(-0.5, xmax)
531            ylim = plt.ylim()
532            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
533            plt.draw()
534            if save:
535                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
537    def plot_rho(self, save=None):
538        """Plot normalized autocorrelation function time for each ensemble.
539
540        Parameters
541        ----------
542        save : str
543            saves the figure to a file named 'save' if.
544        """
545        if not hasattr(self, 'e_dvalue'):
546            raise Exception('Run the gamma method first.')
547        for e, e_name in enumerate(self.mc_names):
548            fig = plt.figure()
549            plt.xlabel('W')
550            plt.ylabel('rho')
551            length = int(len(self.e_drho[e_name]))
552            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
553            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
554            if self.tau_exp[e_name] > 0:
555                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
556                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
557                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
558                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
559            else:
560                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
561                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
562            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
563            plt.xlim(-0.5, xmax)
564            plt.draw()
565            if save:
566                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
568    def plot_rep_dist(self):
569        """Plot replica distribution for each ensemble with more than one replicum."""
570        if not hasattr(self, 'e_dvalue'):
571            raise Exception('Run the gamma method first.')
572        for e, e_name in enumerate(self.mc_names):
573            if len(self.e_content[e_name]) == 1:
574                print('No replica distribution for a single replicum (', e_name, ')')
575                continue
576            r_length = []
577            sub_r_mean = 0
578            for r, r_name in enumerate(self.e_content[e_name]):
579                r_length.append(len(self.deltas[r_name]))
580                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
581            e_N = np.sum(r_length)
582            sub_r_mean /= e_N
583            arr = np.zeros(len(self.e_content[e_name]))
584            for r, r_name in enumerate(self.e_content[e_name]):
585                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
586            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
587            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
588            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
590    def plot_history(self, expand=True):
591        """Plot derived Monte Carlo history for each ensemble
592
593        Parameters
594        ----------
595        expand : bool
596            show expanded history for irregular Monte Carlo chains (default: True).
597        """
598        for e, e_name in enumerate(self.mc_names):
599            plt.figure()
600            r_length = []
601            tmp = []
602            tmp_expanded = []
603            for r, r_name in enumerate(self.e_content[e_name]):
604                tmp.append(self.deltas[r_name] + self.r_values[r_name])
605                if expand:
606                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
607                    r_length.append(len(tmp_expanded[-1]))
608                else:
609                    r_length.append(len(tmp[-1]))
610            e_N = np.sum(r_length)
611            x = np.arange(e_N)
612            y_test = np.concatenate(tmp, axis=0)
613            if expand:
614                y = np.concatenate(tmp_expanded, axis=0)
615            else:
616                y = y_test
617            plt.errorbar(x, y, fmt='.', markersize=3)
618            plt.xlim(-0.5, e_N - 0.5)
619            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
620            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
622    def plot_piechart(self, save=None):
623        """Plot piechart which shows the fractional contribution of each
624        ensemble to the error and returns a dictionary containing the fractions.
625
626        Parameters
627        ----------
628        save : str
629            saves the figure to a file named 'save' if.
630        """
631        if not hasattr(self, 'e_dvalue'):
632            raise Exception('Run the gamma method first.')
633        if np.isclose(0.0, self._dvalue, atol=1e-15):
634            raise Exception('Error is 0.0')
635        labels = self.e_names
636        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
637        fig1, ax1 = plt.subplots()
638        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
639        ax1.axis('equal')
640        plt.draw()
641        if save:
642            fig1.savefig(save)
643
644        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
646    def dump(self, filename, datatype="json.gz", description="", **kwargs):
647        """Dump the Obs to a file 'name' of chosen format.
648
649        Parameters
650        ----------
651        filename : str
652            name of the file to be saved.
653        datatype : str
654            Format of the exported file. Supported formats include
655            "json.gz" and "pickle"
656        description : str
657            Description for output file, only relevant for json.gz format.
658        path : str
659            specifies a custom path for the file (default '.')
660        """
661        if 'path' in kwargs:
662            file_name = kwargs.get('path') + '/' + filename
663        else:
664            file_name = filename
665
666        if datatype == "json.gz":
667            from .input.json import dump_to_json
668            dump_to_json([self], file_name, description=description)
669        elif datatype == "pickle":
670            with open(file_name + '.p', 'wb') as fb:
671                pickle.dump(self, fb)
672        else:
673            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
675    def export_jackknife(self):
676        """Export jackknife samples from the Obs
677
678        Returns
679        -------
680        numpy.ndarray
681            Returns a numpy array of length N + 1 where N is the number of samples
682            for the given ensemble and replicum. The zeroth entry of the array contains
683            the mean value of the Obs, entries 1 to N contain the N jackknife samples
684            derived from the Obs. The current implementation only works for observables
685            defined on exactly one ensemble and replicum. The derived jackknife samples
686            should agree with samples from a full jackknife analysis up to O(1/N).
687        """
688
689        if len(self.names) != 1:
690            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
691
692        name = self.names[0]
693        full_data = self.deltas[name] + self.r_values[name]
694        n = full_data.size
695        mean = self.value
696        tmp_jacks = np.zeros(n + 1)
697        tmp_jacks[0] = mean
698        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
699        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
827    def sqrt(self):
828        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
830    def log(self):
831        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
833    def exp(self):
834        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
836    def sin(self):
837        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
839    def cos(self):
840        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
842    def tan(self):
843        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
845    def arcsin(self):
846        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
848    def arccos(self):
849        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
851    def arctan(self):
852        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
854    def sinh(self):
855        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
857    def cosh(self):
858        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
860    def tanh(self):
861        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
863    def arcsinh(self):
864        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
866    def arccosh(self):
867        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
869    def arctanh(self):
870        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
873class CObs:
874    """Class for a complex valued observable."""
875    __slots__ = ['_real', '_imag', 'tag']
876
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
881
882    @property
883    def real(self):
884        return self._real
885
886    @property
887    def imag(self):
888        return self._imag
889
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)
896
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0
900
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
903
904    def __add__(self, other):
905        if isinstance(other, np.ndarray):
906            return other + self
907        elif hasattr(other, 'real') and hasattr(other, 'imag'):
908            return CObs(self.real + other.real,
909                        self.imag + other.imag)
910        else:
911            return CObs(self.real + other, self.imag)
912
913    def __radd__(self, y):
914        return self + y
915
916    def __sub__(self, other):
917        if isinstance(other, np.ndarray):
918            return -1 * (other - self)
919        elif hasattr(other, 'real') and hasattr(other, 'imag'):
920            return CObs(self.real - other.real, self.imag - other.imag)
921        else:
922            return CObs(self.real - other, self.imag)
923
924    def __rsub__(self, other):
925        return -1 * (self - other)
926
927    def __mul__(self, other):
928        if isinstance(other, np.ndarray):
929            return other * self
930        elif hasattr(other, 'real') and hasattr(other, 'imag'):
931            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
932                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
933                                               [self.real, other.real, self.imag, other.imag],
934                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
935                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
936                                               [self.real, other.real, self.imag, other.imag],
937                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
938            elif getattr(other, 'imag', 0) != 0:
939                return CObs(self.real * other.real - self.imag * other.imag,
940                            self.imag * other.real + self.real * other.imag)
941            else:
942                return CObs(self.real * other.real, self.imag * other.real)
943        else:
944            return CObs(self.real * other, self.imag * other)
945
946    def __rmul__(self, other):
947        return self * other
948
949    def __truediv__(self, other):
950        if isinstance(other, np.ndarray):
951            return 1 / (other / self)
952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
953            r = other.real ** 2 + other.imag ** 2
954            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
955        else:
956            return CObs(self.real / other, self.imag / other)
957
958    def __rtruediv__(self, other):
959        r = self.real ** 2 + self.imag ** 2
960        if hasattr(other, 'real') and hasattr(other, 'imag'):
961            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
962        else:
963            return CObs(self.real * other / r, -self.imag * other / r)
964
965    def __abs__(self):
966        return np.sqrt(self.real**2 + self.imag**2)
967
968    def __pos__(self):
969        return self
970
971    def __neg__(self):
972        return -1 * self
973
974    def __eq__(self, other):
975        return self.real == other.real and self.imag == other.imag
976
977    def __str__(self):
978        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
979
980    def __repr__(self):
981        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
877    def __init__(self, real, imag=0.0):
878        self._real = real
879        self._imag = imag
880        self.tag = None
def gamma_method(self, **kwargs):
890    def gamma_method(self, **kwargs):
891        """Executes the gamma_method for the real and the imaginary part."""
892        if isinstance(self.real, Obs):
893            self.real.gamma_method(**kwargs)
894        if isinstance(self.imag, Obs):
895            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
897    def is_zero(self):
898        """Checks whether both real and imaginary part are zero within machine precision."""
899        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
901    def conjugate(self):
902        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1135def derived_observable(func, data, array_mode=False, **kwargs):
1136    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1137
1138    Parameters
1139    ----------
1140    func : object
1141        arbitrary function of the form func(data, **kwargs). For the
1142        automatic differentiation to work, all numpy functions have to have
1143        the autograd wrapper (use 'import autograd.numpy as anp').
1144    data : list
1145        list of Obs, e.g. [obs1, obs2, obs3].
1146    num_grad : bool
1147        if True, numerical derivatives are used instead of autograd
1148        (default False). To control the numerical differentiation the
1149        kwargs of numdifftools.step_generators.MaxStepGenerator
1150        can be used.
1151    man_grad : list
1152        manually supply a list or an array which contains the jacobian
1153        of func. Use cautiously, supplying the wrong derivative will
1154        not be intercepted.
1155
1156    Notes
1157    -----
1158    For simple mathematical operations it can be practical to use anonymous
1159    functions. For the ratio of two observables one can e.g. use
1160
1161    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1162    """
1163
1164    data = np.asarray(data)
1165    raveled_data = data.ravel()
1166
1167    # Workaround for matrix operations containing non Obs data
1168    if not all(isinstance(x, Obs) for x in raveled_data):
1169        for i in range(len(raveled_data)):
1170            if isinstance(raveled_data[i], (int, float)):
1171                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1172
1173    allcov = {}
1174    for o in raveled_data:
1175        for name in o.cov_names:
1176            if name in allcov:
1177                if not np.allclose(allcov[name], o.covobs[name].cov):
1178                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1179            else:
1180                allcov[name] = o.covobs[name].cov
1181
1182    n_obs = len(raveled_data)
1183    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1184    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1185    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1186
1187    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1188    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1189
1190    if data.ndim == 1:
1191        values = np.array([o.value for o in data])
1192    else:
1193        values = np.vectorize(lambda x: x.value)(data)
1194
1195    new_values = func(values, **kwargs)
1196
1197    multi = int(isinstance(new_values, np.ndarray))
1198
1199    new_r_values = {}
1200    new_idl_d = {}
1201    for name in new_sample_names:
1202        idl = []
1203        tmp_values = np.zeros(n_obs)
1204        for i, item in enumerate(raveled_data):
1205            tmp_values[i] = item.r_values.get(name, item.value)
1206            tmp_idl = item.idl.get(name)
1207            if tmp_idl is not None:
1208                idl.append(tmp_idl)
1209        if multi > 0:
1210            tmp_values = np.array(tmp_values).reshape(data.shape)
1211        new_r_values[name] = func(tmp_values, **kwargs)
1212        new_idl_d[name] = _merge_idx(idl)
1213        if not is_merged[name]:
1214            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1215
1216    if 'man_grad' in kwargs:
1217        deriv = np.asarray(kwargs.get('man_grad'))
1218        if new_values.shape + data.shape != deriv.shape:
1219            raise Exception('Manual derivative does not have correct shape.')
1220    elif kwargs.get('num_grad') is True:
1221        if multi > 0:
1222            raise Exception('Multi mode currently not supported for numerical derivative')
1223        options = {
1224            'base_step': 0.1,
1225            'step_ratio': 2.5}
1226        for key in options.keys():
1227            kwarg = kwargs.get(key)
1228            if kwarg is not None:
1229                options[key] = kwarg
1230        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1231        if tmp_df.size == 1:
1232            deriv = np.array([tmp_df.real])
1233        else:
1234            deriv = tmp_df.real
1235    else:
1236        deriv = jacobian(func)(values, **kwargs)
1237
1238    final_result = np.zeros(new_values.shape, dtype=object)
1239
1240    if array_mode is True:
1241
1242        class _Zero_grad():
1243            def __init__(self, N):
1244                self.grad = np.zeros((N, 1))
1245
1246        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1247        d_extracted = {}
1248        g_extracted = {}
1249        for name in new_sample_names:
1250            d_extracted[name] = []
1251            ens_length = len(new_idl_d[name])
1252            for i_dat, dat in enumerate(data):
1253                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1254        for name in new_cov_names:
1255            g_extracted[name] = []
1256            zero_grad = _Zero_grad(new_covobs_lengths[name])
1257            for i_dat, dat in enumerate(data):
1258                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1259
1260    for i_val, new_val in np.ndenumerate(new_values):
1261        new_deltas = {}
1262        new_grad = {}
1263        if array_mode is True:
1264            for name in new_sample_names:
1265                ens_length = d_extracted[name][0].shape[-1]
1266                new_deltas[name] = np.zeros(ens_length)
1267                for i_dat, dat in enumerate(d_extracted[name]):
1268                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1269            for name in new_cov_names:
1270                new_grad[name] = 0
1271                for i_dat, dat in enumerate(g_extracted[name]):
1272                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1273        else:
1274            for j_obs, obs in np.ndenumerate(data):
1275                for name in obs.names:
1276                    if name in obs.cov_names:
1277                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1278                    else:
1279                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1280
1281        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1282
1283        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1284            raise Exception('The same name has been used for deltas and covobs!')
1285        new_samples = []
1286        new_means = []
1287        new_idl = []
1288        new_names_obs = []
1289        for name in new_names:
1290            if name not in new_covobs:
1291                if is_merged[name]:
1292                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1293                else:
1294                    filtered_deltas = new_deltas[name]
1295                    filtered_idl_d = new_idl_d[name]
1296
1297                new_samples.append(filtered_deltas)
1298                new_idl.append(filtered_idl_d)
1299                new_means.append(new_r_values[name][i_val])
1300                new_names_obs.append(name)
1301        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1302        for name in new_covobs:
1303            final_result[i_val].names.append(name)
1304        final_result[i_val]._covobs = new_covobs
1305        final_result[i_val]._value = new_val
1306        final_result[i_val].is_merged = is_merged
1307        final_result[i_val].reweighted = reweighted
1308
1309    if multi == 0:
1310        final_result = final_result.item()
1311
1312    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1349def reweight(weight, obs, **kwargs):
1350    """Reweight a list of observables.
1351
1352    Parameters
1353    ----------
1354    weight : Obs
1355        Reweighting factor. An Observable that has to be defined on a superset of the
1356        configurations in obs[i].idl for all i.
1357    obs : list
1358        list of Obs, e.g. [obs1, obs2, obs3].
1359    all_configs : bool
1360        if True, the reweighted observables are normalized by the average of
1361        the reweighting factor on all configurations in weight.idl and not
1362        on the configurations in obs[i].idl. Default False.
1363    """
1364    result = []
1365    for i in range(len(obs)):
1366        if len(obs[i].cov_names):
1367            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1368        if not set(obs[i].names).issubset(weight.names):
1369            raise Exception('Error: Ensembles do not fit')
1370        for name in obs[i].names:
1371            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1372                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1373        new_samples = []
1374        w_deltas = {}
1375        for name in sorted(obs[i].names):
1376            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1377            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1378        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1379
1380        if kwargs.get('all_configs'):
1381            new_weight = weight
1382        else:
1383            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1384
1385        result.append(tmp_obs / new_weight)
1386        result[-1].reweighted = True
1387        result[-1].is_merged = obs[i].is_merged
1388
1389    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1392def correlate(obs_a, obs_b):
1393    """Correlate two observables.
1394
1395    Parameters
1396    ----------
1397    obs_a : Obs
1398        First observable
1399    obs_b : Obs
1400        Second observable
1401
1402    Notes
1403    -----
1404    Keep in mind to only correlate primary observables which have not been reweighted
1405    yet. The reweighting has to be applied after correlating the observables.
1406    Currently only works if ensembles are identical (this is not strictly necessary).
1407    """
1408
1409    if sorted(obs_a.names) != sorted(obs_b.names):
1410        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1411    if len(obs_a.cov_names) or len(obs_b.cov_names):
1412        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1413    for name in obs_a.names:
1414        if obs_a.shape[name] != obs_b.shape[name]:
1415            raise Exception('Shapes of ensemble', name, 'do not fit')
1416        if obs_a.idl[name] != obs_b.idl[name]:
1417            raise Exception('idl of ensemble', name, 'do not fit')
1418
1419    if obs_a.reweighted is True:
1420        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1421    if obs_b.reweighted is True:
1422        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1423
1424    new_samples = []
1425    new_idl = []
1426    for name in sorted(obs_a.names):
1427        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1428        new_idl.append(obs_a.idl[name])
1429
1430    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1431    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1432    o.reweighted = obs_a.reweighted or obs_b.reweighted
1433    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1436def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1437    r'''Calculates the error covariance matrix of a set of observables.
1438
1439    WARNING: This function should be used with care, especially for observables with support on multiple
1440             ensembles with differing autocorrelations. See the notes below for details.
1441
1442    The gamma method has to be applied first to all observables.
1443
1444    Parameters
1445    ----------
1446    obs : list or numpy.ndarray
1447        List or one dimensional array of Obs
1448    visualize : bool
1449        If True plots the corresponding normalized correlation matrix (default False).
1450    correlation : bool
1451        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1452    smooth : None or int
1453        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1454        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1455        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1456        small ones.
1457
1458    Notes
1459    -----
1460    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1461    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1462    in the absence of autocorrelation.
1463    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1464    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1465    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1466    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1467    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1468    '''
1469
1470    length = len(obs)
1471
1472    max_samples = np.max([o.N for o in obs])
1473    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1474        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1475
1476    cov = np.zeros((length, length))
1477    for i in range(length):
1478        for j in range(i, length):
1479            cov[i, j] = _covariance_element(obs[i], obs[j])
1480    cov = cov + cov.T - np.diag(np.diag(cov))
1481
1482    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1483
1484    if isinstance(smooth, int):
1485        corr = _smooth_eigenvalues(corr, smooth)
1486
1487    if visualize:
1488        plt.matshow(corr, vmin=-1, vmax=1)
1489        plt.set_cmap('RdBu')
1490        plt.colorbar()
1491        plt.draw()
1492
1493    if correlation is True:
1494        return corr
1495
1496    errors = [o.dvalue for o in obs]
1497    cov = np.diag(errors) @ corr @ np.diag(errors)
1498
1499    eigenvalues = np.linalg.eigh(cov)[0]
1500    if not np.all(eigenvalues >= 0):
1501        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1502
1503    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1583def import_jackknife(jacks, name, idl=None):
1584    """Imports jackknife samples and returns an Obs
1585
1586    Parameters
1587    ----------
1588    jacks : numpy.ndarray
1589        numpy array containing the mean value as zeroth entry and
1590        the N jackknife samples as first to Nth entry.
1591    name : str
1592        name of the ensemble the samples are defined on.
1593    """
1594    length = len(jacks) - 1
1595    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1596    samples = jacks[1:] @ prj
1597    mean = np.mean(samples)
1598    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1599    new_obs._value = jacks[0]
1600    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1603def merge_obs(list_of_obs):
1604    """Combine all observables in list_of_obs into one new observable
1605
1606    Parameters
1607    ----------
1608    list_of_obs : list
1609        list of the Obs object to be combined
1610
1611    Notes
1612    -----
1613    It is not possible to combine obs which are based on the same replicum
1614    """
1615    replist = [item for obs in list_of_obs for item in obs.names]
1616    if (len(replist) == len(set(replist))) is False:
1617        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1618    if any([len(o.cov_names) for o in list_of_obs]):
1619        raise Exception('Not possible to merge data that contains covobs!')
1620    new_dict = {}
1621    idl_dict = {}
1622    for o in list_of_obs:
1623        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1624                        for key in set(o.deltas) | set(o.r_values)})
1625        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1626
1627    names = sorted(new_dict.keys())
1628    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1629    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1630    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1631    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1634def cov_Obs(means, cov, name, grad=None):
1635    """Create an Obs based on mean(s) and a covariance matrix
1636
1637    Parameters
1638    ----------
1639    mean : list of floats or float
1640        N mean value(s) of the new Obs
1641    cov : list or array
1642        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1643    name : str
1644        identifier for the covariance matrix
1645    grad : list or array
1646        Gradient of the Covobs wrt. the means belonging to cov.
1647    """
1648
1649    def covobs_to_obs(co):
1650        """Make an Obs out of a Covobs
1651
1652        Parameters
1653        ----------
1654        co : Covobs
1655            Covobs to be embedded into the Obs
1656        """
1657        o = Obs([], [], means=[])
1658        o._value = co.value
1659        o.names.append(co.name)
1660        o._covobs[co.name] = co
1661        o._dvalue = np.sqrt(co.errsq())
1662        return o
1663
1664    ol = []
1665    if isinstance(means, (float, int)):
1666        means = [means]
1667
1668    for i in range(len(means)):
1669        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1670    if ol[0].covobs[name].N != len(means):
1671        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1672    if len(ol) == 1:
1673        return ol[0]
1674    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.