pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise ValueError('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise ValueError('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise ValueError('Names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise ValueError('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.idl = {}
 101        if idl is not None:
 102            for name, idx in sorted(zip(names, idl)):
 103                if isinstance(idx, range):
 104                    self.idl[name] = idx
 105                elif isinstance(idx, (list, np.ndarray)):
 106                    dc = np.unique(np.diff(idx))
 107                    if np.any(dc < 0):
 108                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 109                    if len(dc) == 1:
 110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 111                    else:
 112                        self.idl[name] = list(idx)
 113                else:
 114                    raise TypeError('incompatible type for idl[%s].' % (name))
 115        else:
 116            for name, sample in sorted(zip(names, samples)):
 117                self.idl[name] = range(1, len(sample) + 1)
 118
 119        if kwargs.get("means") is not None:
 120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 121                self.shape[name] = len(self.idl[name])
 122                self.N += self.shape[name]
 123                self.r_values[name] = mean
 124                self.deltas[name] = sample
 125        else:
 126            for name, sample in sorted(zip(names, samples)):
 127                self.shape[name] = len(self.idl[name])
 128                self.N += self.shape[name]
 129                if len(sample) != self.shape[name]:
 130                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 131                self.r_values[name] = np.mean(sample)
 132                self.deltas[name] = sample - self.r_values[name]
 133                self._value += self.shape[name] * self.r_values[name]
 134            self._value /= self.N
 135
 136        self._dvalue = 0.0
 137        self.ddvalue = 0.0
 138        self.reweighted = False
 139
 140        self.tag = None
 141
 142    @property
 143    def value(self):
 144        return self._value
 145
 146    @property
 147    def dvalue(self):
 148        return self._dvalue
 149
 150    @property
 151    def e_names(self):
 152        return sorted(set([o.split('|')[0] for o in self.names]))
 153
 154    @property
 155    def cov_names(self):
 156        return sorted(set([o for o in self.covobs.keys()]))
 157
 158    @property
 159    def mc_names(self):
 160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 161
 162    @property
 163    def e_content(self):
 164        res = {}
 165        for e, e_name in enumerate(self.e_names):
 166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 167            if e_name in self.names:
 168                res[e_name].append(e_name)
 169        return res
 170
 171    @property
 172    def covobs(self):
 173        return self._covobs
 174
 175    def gamma_method(self, **kwargs):
 176        """Estimate the error and related properties of the Obs.
 177
 178        Parameters
 179        ----------
 180        S : float
 181            specifies a custom value for the parameter S (default 2.0).
 182            If set to 0 it is assumed that the data exhibits no
 183            autocorrelation. In this case the error estimates coincides
 184            with the sample standard error.
 185        tau_exp : float
 186            positive value triggers the critical slowing down analysis
 187            (default 0.0).
 188        N_sigma : float
 189            number of standard deviations from zero until the tail is
 190            attached to the autocorrelation function (default 1).
 191        fft : bool
 192            determines whether the fft algorithm is used for the computation
 193            of the autocorrelation function (default True)
 194        """
 195
 196        e_content = self.e_content
 197        self.e_dvalue = {}
 198        self.e_ddvalue = {}
 199        self.e_tauint = {}
 200        self.e_dtauint = {}
 201        self.e_windowsize = {}
 202        self.e_n_tauint = {}
 203        self.e_n_dtauint = {}
 204        e_gamma = {}
 205        self.e_rho = {}
 206        self.e_drho = {}
 207        self._dvalue = 0
 208        self.ddvalue = 0
 209
 210        self.S = {}
 211        self.tau_exp = {}
 212        self.N_sigma = {}
 213
 214        if kwargs.get('fft') is False:
 215            fft = False
 216        else:
 217            fft = True
 218
 219        def _parse_kwarg(kwarg_name):
 220            if kwarg_name in kwargs:
 221                tmp = kwargs.get(kwarg_name)
 222                if isinstance(tmp, (int, float)):
 223                    if tmp < 0:
 224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 225                    for e, e_name in enumerate(self.e_names):
 226                        getattr(self, kwarg_name)[e_name] = tmp
 227                else:
 228                    raise TypeError(kwarg_name + ' is not in proper format.')
 229            else:
 230                for e, e_name in enumerate(self.e_names):
 231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 233                    else:
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 235
 236        _parse_kwarg('S')
 237        _parse_kwarg('tau_exp')
 238        _parse_kwarg('N_sigma')
 239
 240        for e, e_name in enumerate(self.mc_names):
 241            gapsize = _determine_gap(self, e_content, e_name)
 242
 243            r_length = []
 244            for r_name in e_content[e_name]:
 245                if isinstance(self.idl[r_name], range):
 246                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 247                else:
 248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 249
 250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 251            w_max = max(r_length) // 2
 252            e_gamma[e_name] = np.zeros(w_max)
 253            self.e_rho[e_name] = np.zeros(w_max)
 254            self.e_drho[e_name] = np.zeros(w_max)
 255
 256            for r_name in e_content[e_name]:
 257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 258
 259            gamma_div = np.zeros(w_max)
 260            for r_name in e_content[e_name]:
 261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 262            gamma_div[gamma_div < 1] = 1.0
 263            e_gamma[e_name] /= gamma_div[:w_max]
 264
 265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 266                self.e_tauint[e_name] = 0.5
 267                self.e_dtauint[e_name] = 0.0
 268                self.e_dvalue[e_name] = 0.0
 269                self.e_ddvalue[e_name] = 0.0
 270                self.e_windowsize[e_name] = 0
 271                continue
 272
 273            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 274            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 275            # Make sure no entry of tauint is smaller than 0.5
 276            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 277            # hep-lat/0306017 eq. (42)
 278            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 279            self.e_n_dtauint[e_name][0] = 0.0
 280
 281            def _compute_drho(i):
 282                tmp = (self.e_rho[e_name][i + 1:w_max]
 283                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
 284                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 285                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 286                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 287
 288            if self.tau_exp[e_name] > 0:
 289                _compute_drho(1)
 290                texp = self.tau_exp[e_name]
 291                # Critical slowing down analysis
 292                if w_max // 2 <= 1:
 293                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 294                for n in range(1, w_max // 2):
 295                    _compute_drho(n + 1)
 296                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 297                        # Bias correction hep-lat/0306017 eq. (49) included
 298                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 299                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 300                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 301                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 302                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 303                        self.e_windowsize[e_name] = n
 304                        break
 305            else:
 306                if self.S[e_name] == 0.0:
 307                    self.e_tauint[e_name] = 0.5
 308                    self.e_dtauint[e_name] = 0.0
 309                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 310                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 311                    self.e_windowsize[e_name] = 0
 312                else:
 313                    # Standard automatic windowing procedure
 314                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 315                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 316                    for n in range(1, w_max):
 317                        if g_w[n - 1] < 0 or n >= w_max - 1:
 318                            _compute_drho(n)
 319                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 320                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 321                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 322                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 323                            self.e_windowsize[e_name] = n
 324                            break
 325
 326            self._dvalue += self.e_dvalue[e_name] ** 2
 327            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 328
 329        for e_name in self.cov_names:
 330            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 331            self.e_ddvalue[e_name] = 0
 332            self._dvalue += self.e_dvalue[e_name]**2
 333
 334        self._dvalue = np.sqrt(self._dvalue)
 335        if self._dvalue == 0.0:
 336            self.ddvalue = 0.0
 337        else:
 338            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 339        return
 340
 341    gm = gamma_method
 342
 343    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 344        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 345           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 346
 347        Parameters
 348        ----------
 349        deltas : list
 350            List of fluctuations
 351        idx : list
 352            List or range of configurations on which the deltas are defined.
 353        shape : int
 354            Number of configurations in idx.
 355        w_max : int
 356            Upper bound for the summation window.
 357        fft : bool
 358            determines whether the fft algorithm is used for the computation
 359            of the autocorrelation function.
 360        gapsize : int
 361            The target distance between two configurations. If longer distances
 362            are found in idx, the data is expanded.
 363        """
 364        gamma = np.zeros(w_max)
 365        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 366        new_shape = len(deltas)
 367        if fft:
 368            max_gamma = min(new_shape, w_max)
 369            # The padding for the fft has to be even
 370            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 371            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 372        else:
 373            for n in range(w_max):
 374                if new_shape - n >= 0:
 375                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 376
 377        return gamma
 378
 379    def details(self, ens_content=True):
 380        """Output detailed properties of the Obs.
 381
 382        Parameters
 383        ----------
 384        ens_content : bool
 385            print details about the ensembles and replica if true.
 386        """
 387        if self.tag is not None:
 388            print("Description:", self.tag)
 389        if not hasattr(self, 'e_dvalue'):
 390            print('Result\t %3.8e' % (self.value))
 391        else:
 392            if self.value == 0.0:
 393                percentage = np.nan
 394            else:
 395                percentage = np.abs(self._dvalue / self.value) * 100
 396            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 397            if len(self.e_names) > 1:
 398                print(' Ensemble errors:')
 399            e_content = self.e_content
 400            for e_name in self.mc_names:
 401                gap = _determine_gap(self, e_content, e_name)
 402
 403                if len(self.e_names) > 1:
 404                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 405                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 406                tau_string += f" in units of {gap} config"
 407                if gap > 1:
 408                    tau_string += "s"
 409                if self.tau_exp[e_name] > 0:
 410                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 411                else:
 412                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 413                print(tau_string)
 414            for e_name in self.cov_names:
 415                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 416        if ens_content is True:
 417            if len(self.e_names) == 1:
 418                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 419            else:
 420                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 421            my_string_list = []
 422            for key, value in sorted(self.e_content.items()):
 423                if key not in self.covobs:
 424                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 425                    if len(value) == 1:
 426                        my_string += f': {self.shape[value[0]]} configurations'
 427                        if isinstance(self.idl[value[0]], range):
 428                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 429                        else:
 430                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 431                    else:
 432                        sublist = []
 433                        for v in value:
 434                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 435                            my_substring += f': {self.shape[v]} configurations'
 436                            if isinstance(self.idl[v], range):
 437                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 438                            else:
 439                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 440                            sublist.append(my_substring)
 441
 442                        my_string += '\n' + '\n'.join(sublist)
 443                else:
 444                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 445                my_string_list.append(my_string)
 446            print('\n'.join(my_string_list))
 447
 448    def reweight(self, weight):
 449        """Reweight the obs with given rewighting factors.
 450
 451        Parameters
 452        ----------
 453        weight : Obs
 454            Reweighting factor. An Observable that has to be defined on a superset of the
 455            configurations in obs[i].idl for all i.
 456        all_configs : bool
 457            if True, the reweighted observables are normalized by the average of
 458            the reweighting factor on all configurations in weight.idl and not
 459            on the configurations in obs[i].idl. Default False.
 460        """
 461        return reweight(weight, [self])[0]
 462
 463    def is_zero_within_error(self, sigma=1):
 464        """Checks whether the observable is zero within 'sigma' standard errors.
 465
 466        Parameters
 467        ----------
 468        sigma : int
 469            Number of standard errors used for the check.
 470
 471        Works only properly when the gamma method was run.
 472        """
 473        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 474
 475    def is_zero(self, atol=1e-10):
 476        """Checks whether the observable is zero within a given tolerance.
 477
 478        Parameters
 479        ----------
 480        atol : float
 481            Absolute tolerance (for details see numpy documentation).
 482        """
 483        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 484
 485    def plot_tauint(self, save=None):
 486        """Plot integrated autocorrelation time for each ensemble.
 487
 488        Parameters
 489        ----------
 490        save : str
 491            saves the figure to a file named 'save' if.
 492        """
 493        if not hasattr(self, 'e_dvalue'):
 494            raise Exception('Run the gamma method first.')
 495
 496        for e, e_name in enumerate(self.mc_names):
 497            fig = plt.figure()
 498            plt.xlabel(r'$W$')
 499            plt.ylabel(r'$\tau_\mathrm{int}$')
 500            length = int(len(self.e_n_tauint[e_name]))
 501            if self.tau_exp[e_name] > 0:
 502                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 503                x_help = np.arange(2 * self.tau_exp[e_name])
 504                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 505                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 506                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 507                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 508                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 510                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 511            else:
 512                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 514
 515            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 516            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 517            plt.legend()
 518            plt.xlim(-0.5, xmax)
 519            ylim = plt.ylim()
 520            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 521            plt.draw()
 522            if save:
 523                fig.savefig(save + "_" + str(e))
 524
 525    def plot_rho(self, save=None):
 526        """Plot normalized autocorrelation function time for each ensemble.
 527
 528        Parameters
 529        ----------
 530        save : str
 531            saves the figure to a file named 'save' if.
 532        """
 533        if not hasattr(self, 'e_dvalue'):
 534            raise Exception('Run the gamma method first.')
 535        for e, e_name in enumerate(self.mc_names):
 536            fig = plt.figure()
 537            plt.xlabel('W')
 538            plt.ylabel('rho')
 539            length = int(len(self.e_drho[e_name]))
 540            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 541            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 542            if self.tau_exp[e_name] > 0:
 543                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 544                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 545                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 546                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 547            else:
 548                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 549                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 550            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 551            plt.xlim(-0.5, xmax)
 552            plt.draw()
 553            if save:
 554                fig.savefig(save + "_" + str(e))
 555
 556    def plot_rep_dist(self):
 557        """Plot replica distribution for each ensemble with more than one replicum."""
 558        if not hasattr(self, 'e_dvalue'):
 559            raise Exception('Run the gamma method first.')
 560        for e, e_name in enumerate(self.mc_names):
 561            if len(self.e_content[e_name]) == 1:
 562                print('No replica distribution for a single replicum (', e_name, ')')
 563                continue
 564            r_length = []
 565            sub_r_mean = 0
 566            for r, r_name in enumerate(self.e_content[e_name]):
 567                r_length.append(len(self.deltas[r_name]))
 568                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 569            e_N = np.sum(r_length)
 570            sub_r_mean /= e_N
 571            arr = np.zeros(len(self.e_content[e_name]))
 572            for r, r_name in enumerate(self.e_content[e_name]):
 573                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 574            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 575            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 576            plt.draw()
 577
 578    def plot_history(self, expand=True):
 579        """Plot derived Monte Carlo history for each ensemble
 580
 581        Parameters
 582        ----------
 583        expand : bool
 584            show expanded history for irregular Monte Carlo chains (default: True).
 585        """
 586        for e, e_name in enumerate(self.mc_names):
 587            plt.figure()
 588            r_length = []
 589            tmp = []
 590            tmp_expanded = []
 591            for r, r_name in enumerate(self.e_content[e_name]):
 592                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 593                if expand:
 594                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 595                    r_length.append(len(tmp_expanded[-1]))
 596                else:
 597                    r_length.append(len(tmp[-1]))
 598            e_N = np.sum(r_length)
 599            x = np.arange(e_N)
 600            y_test = np.concatenate(tmp, axis=0)
 601            if expand:
 602                y = np.concatenate(tmp_expanded, axis=0)
 603            else:
 604                y = y_test
 605            plt.errorbar(x, y, fmt='.', markersize=3)
 606            plt.xlim(-0.5, e_N - 0.5)
 607            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 608            plt.draw()
 609
 610    def plot_piechart(self, save=None):
 611        """Plot piechart which shows the fractional contribution of each
 612        ensemble to the error and returns a dictionary containing the fractions.
 613
 614        Parameters
 615        ----------
 616        save : str
 617            saves the figure to a file named 'save' if.
 618        """
 619        if not hasattr(self, 'e_dvalue'):
 620            raise Exception('Run the gamma method first.')
 621        if np.isclose(0.0, self._dvalue, atol=1e-15):
 622            raise Exception('Error is 0.0')
 623        labels = self.e_names
 624        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 625        fig1, ax1 = plt.subplots()
 626        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 627        ax1.axis('equal')
 628        plt.draw()
 629        if save:
 630            fig1.savefig(save)
 631
 632        return dict(zip(self.e_names, sizes))
 633
 634    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 635        """Dump the Obs to a file 'name' of chosen format.
 636
 637        Parameters
 638        ----------
 639        filename : str
 640            name of the file to be saved.
 641        datatype : str
 642            Format of the exported file. Supported formats include
 643            "json.gz" and "pickle"
 644        description : str
 645            Description for output file, only relevant for json.gz format.
 646        path : str
 647            specifies a custom path for the file (default '.')
 648        """
 649        if 'path' in kwargs:
 650            file_name = kwargs.get('path') + '/' + filename
 651        else:
 652            file_name = filename
 653
 654        if datatype == "json.gz":
 655            from .input.json import dump_to_json
 656            dump_to_json([self], file_name, description=description)
 657        elif datatype == "pickle":
 658            with open(file_name + '.p', 'wb') as fb:
 659                pickle.dump(self, fb)
 660        else:
 661            raise Exception("Unknown datatype " + str(datatype))
 662
 663    def export_jackknife(self):
 664        """Export jackknife samples from the Obs
 665
 666        Returns
 667        -------
 668        numpy.ndarray
 669            Returns a numpy array of length N + 1 where N is the number of samples
 670            for the given ensemble and replicum. The zeroth entry of the array contains
 671            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 672            derived from the Obs. The current implementation only works for observables
 673            defined on exactly one ensemble and replicum. The derived jackknife samples
 674            should agree with samples from a full jackknife analysis up to O(1/N).
 675        """
 676
 677        if len(self.names) != 1:
 678            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 679
 680        name = self.names[0]
 681        full_data = self.deltas[name] + self.r_values[name]
 682        n = full_data.size
 683        mean = self.value
 684        tmp_jacks = np.zeros(n + 1)
 685        tmp_jacks[0] = mean
 686        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 687        return tmp_jacks
 688
 689    def __float__(self):
 690        return float(self.value)
 691
 692    def __repr__(self):
 693        return 'Obs[' + str(self) + ']'
 694
 695    def __str__(self):
 696        return _format_uncertainty(self.value, self._dvalue)
 697
 698    def __format__(self, format_type):
 699        my_str = _format_uncertainty(self.value, self._dvalue,
 700                                     significance=int(float(format_type.replace("+", "").replace("-", ""))))
 701        for char in ["+", " "]:
 702            if format_type.startswith(char):
 703                if my_str[0] != "-":
 704                    my_str = char + my_str
 705        return my_str
 706
 707    def __hash__(self):
 708        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 709        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 710        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 711        hash_tuple += tuple([o.encode() for o in self.names])
 712        m = hashlib.md5()
 713        [m.update(o) for o in hash_tuple]
 714        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 715
 716    # Overload comparisons
 717    def __lt__(self, other):
 718        return self.value < other
 719
 720    def __le__(self, other):
 721        return self.value <= other
 722
 723    def __gt__(self, other):
 724        return self.value > other
 725
 726    def __ge__(self, other):
 727        return self.value >= other
 728
 729    def __eq__(self, other):
 730        return (self - other).is_zero()
 731
 732    def __ne__(self, other):
 733        return not (self - other).is_zero()
 734
 735    # Overload math operations
 736    def __add__(self, y):
 737        if isinstance(y, Obs):
 738            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 739        else:
 740            if isinstance(y, np.ndarray):
 741                return np.array([self + o for o in y])
 742            elif y.__class__.__name__ in ['Corr', 'CObs']:
 743                return NotImplemented
 744            else:
 745                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 746
 747    def __radd__(self, y):
 748        return self + y
 749
 750    def __mul__(self, y):
 751        if isinstance(y, Obs):
 752            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 753        else:
 754            if isinstance(y, np.ndarray):
 755                return np.array([self * o for o in y])
 756            elif isinstance(y, complex):
 757                return CObs(self * y.real, self * y.imag)
 758            elif y.__class__.__name__ in ['Corr', 'CObs']:
 759                return NotImplemented
 760            else:
 761                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 762
 763    def __rmul__(self, y):
 764        return self * y
 765
 766    def __sub__(self, y):
 767        if isinstance(y, Obs):
 768            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 769        else:
 770            if isinstance(y, np.ndarray):
 771                return np.array([self - o for o in y])
 772            elif y.__class__.__name__ in ['Corr', 'CObs']:
 773                return NotImplemented
 774            else:
 775                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 776
 777    def __rsub__(self, y):
 778        return -1 * (self - y)
 779
 780    def __pos__(self):
 781        return self
 782
 783    def __neg__(self):
 784        return -1 * self
 785
 786    def __truediv__(self, y):
 787        if isinstance(y, Obs):
 788            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 789        else:
 790            if isinstance(y, np.ndarray):
 791                return np.array([self / o for o in y])
 792            elif y.__class__.__name__ in ['Corr', 'CObs']:
 793                return NotImplemented
 794            else:
 795                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 796
 797    def __rtruediv__(self, y):
 798        if isinstance(y, Obs):
 799            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 800        else:
 801            if isinstance(y, np.ndarray):
 802                return np.array([o / self for o in y])
 803            elif y.__class__.__name__ in ['Corr', 'CObs']:
 804                return NotImplemented
 805            else:
 806                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 807
 808    def __pow__(self, y):
 809        if isinstance(y, Obs):
 810            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 811        else:
 812            return derived_observable(lambda x: x[0] ** y, [self])
 813
 814    def __rpow__(self, y):
 815        if isinstance(y, Obs):
 816            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 817        else:
 818            return derived_observable(lambda x: y ** x[0], [self])
 819
 820    def __abs__(self):
 821        return derived_observable(lambda x: anp.abs(x[0]), [self])
 822
 823    # Overload numpy functions
 824    def sqrt(self):
 825        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 826
 827    def log(self):
 828        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 829
 830    def exp(self):
 831        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 832
 833    def sin(self):
 834        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 835
 836    def cos(self):
 837        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 838
 839    def tan(self):
 840        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 841
 842    def arcsin(self):
 843        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 844
 845    def arccos(self):
 846        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 847
 848    def arctan(self):
 849        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 850
 851    def sinh(self):
 852        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 853
 854    def cosh(self):
 855        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 856
 857    def tanh(self):
 858        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 859
 860    def arcsinh(self):
 861        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 862
 863    def arccosh(self):
 864        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 865
 866    def arctanh(self):
 867        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 868
 869
 870class CObs:
 871    """Class for a complex valued observable."""
 872    __slots__ = ['_real', '_imag', 'tag']
 873
 874    def __init__(self, real, imag=0.0):
 875        self._real = real
 876        self._imag = imag
 877        self.tag = None
 878
 879    @property
 880    def real(self):
 881        return self._real
 882
 883    @property
 884    def imag(self):
 885        return self._imag
 886
 887    def gamma_method(self, **kwargs):
 888        """Executes the gamma_method for the real and the imaginary part."""
 889        if isinstance(self.real, Obs):
 890            self.real.gamma_method(**kwargs)
 891        if isinstance(self.imag, Obs):
 892            self.imag.gamma_method(**kwargs)
 893
 894    def is_zero(self):
 895        """Checks whether both real and imaginary part are zero within machine precision."""
 896        return self.real == 0.0 and self.imag == 0.0
 897
 898    def conjugate(self):
 899        return CObs(self.real, -self.imag)
 900
 901    def __add__(self, other):
 902        if isinstance(other, np.ndarray):
 903            return other + self
 904        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 905            return CObs(self.real + other.real,
 906                        self.imag + other.imag)
 907        else:
 908            return CObs(self.real + other, self.imag)
 909
 910    def __radd__(self, y):
 911        return self + y
 912
 913    def __sub__(self, other):
 914        if isinstance(other, np.ndarray):
 915            return -1 * (other - self)
 916        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 917            return CObs(self.real - other.real, self.imag - other.imag)
 918        else:
 919            return CObs(self.real - other, self.imag)
 920
 921    def __rsub__(self, other):
 922        return -1 * (self - other)
 923
 924    def __mul__(self, other):
 925        if isinstance(other, np.ndarray):
 926            return other * self
 927        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 928            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 929                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 930                                               [self.real, other.real, self.imag, other.imag],
 931                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 932                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 933                                               [self.real, other.real, self.imag, other.imag],
 934                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 935            elif getattr(other, 'imag', 0) != 0:
 936                return CObs(self.real * other.real - self.imag * other.imag,
 937                            self.imag * other.real + self.real * other.imag)
 938            else:
 939                return CObs(self.real * other.real, self.imag * other.real)
 940        else:
 941            return CObs(self.real * other, self.imag * other)
 942
 943    def __rmul__(self, other):
 944        return self * other
 945
 946    def __truediv__(self, other):
 947        if isinstance(other, np.ndarray):
 948            return 1 / (other / self)
 949        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 950            r = other.real ** 2 + other.imag ** 2
 951            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 952        else:
 953            return CObs(self.real / other, self.imag / other)
 954
 955    def __rtruediv__(self, other):
 956        r = self.real ** 2 + self.imag ** 2
 957        if hasattr(other, 'real') and hasattr(other, 'imag'):
 958            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 959        else:
 960            return CObs(self.real * other / r, -self.imag * other / r)
 961
 962    def __abs__(self):
 963        return np.sqrt(self.real**2 + self.imag**2)
 964
 965    def __pos__(self):
 966        return self
 967
 968    def __neg__(self):
 969        return -1 * self
 970
 971    def __eq__(self, other):
 972        return self.real == other.real and self.imag == other.imag
 973
 974    def __str__(self):
 975        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 976
 977    def __repr__(self):
 978        return 'CObs[' + str(self) + ']'
 979
 980
 981def _format_uncertainty(value, dvalue, significance=2):
 982    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 983    if dvalue == 0.0:
 984        return str(value)
 985    if not isinstance(significance, int):
 986        raise TypeError("significance needs to be an integer.")
 987    if significance < 1:
 988        raise ValueError("significance needs to be larger than zero.")
 989    fexp = np.floor(np.log10(dvalue))
 990    if fexp < 0.0:
 991        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
 992    elif fexp == 0.0:
 993        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
 994    else:
 995        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
 996
 997
 998def _expand_deltas(deltas, idx, shape, gapsize):
 999    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1000       configurations and where holes are filled by 0.
1001       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1002
1003    Parameters
1004    ----------
1005    deltas : list
1006        List of fluctuations
1007    idx : list
1008        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1009    shape : int
1010        Number of configs in idx.
1011    gapsize : int
1012        The target distance between two configurations. If longer distances
1013        are found in idx, the data is expanded.
1014    """
1015    if isinstance(idx, range):
1016        if (idx.step == gapsize):
1017            return deltas
1018    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1019    for i in range(shape):
1020        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1021    return ret
1022
1023
1024def _merge_idx(idl):
1025    """Returns the union of all lists in idl as sorted list
1026
1027    Parameters
1028    ----------
1029    idl : list
1030        List of lists or ranges.
1031    """
1032
1033    # Use groupby to efficiently check whether all elements of idl are identical
1034    try:
1035        g = groupby(idl)
1036        if next(g, True) and not next(g, False):
1037            return idl[0]
1038    except Exception:
1039        pass
1040
1041    if np.all([type(idx) is range for idx in idl]):
1042        if len(set([idx[0] for idx in idl])) == 1:
1043            idstart = min([idx.start for idx in idl])
1044            idstop = max([idx.stop for idx in idl])
1045            idstep = min([idx.step for idx in idl])
1046            return range(idstart, idstop, idstep)
1047
1048    return sorted(set().union(*idl))
1049
1050
1051def _intersection_idx(idl):
1052    """Returns the intersection of all lists in idl as sorted list
1053
1054    Parameters
1055    ----------
1056    idl : list
1057        List of lists or ranges.
1058    """
1059
1060    def _lcm(*args):
1061        """Returns the lowest common multiple of args.
1062
1063        From python 3.9 onwards the math library contains an lcm function."""
1064        return reduce(lambda a, b: a * b // gcd(a, b), args)
1065
1066    # Use groupby to efficiently check whether all elements of idl are identical
1067    try:
1068        g = groupby(idl)
1069        if next(g, True) and not next(g, False):
1070            return idl[0]
1071    except Exception:
1072        pass
1073
1074    if np.all([type(idx) is range for idx in idl]):
1075        if len(set([idx[0] for idx in idl])) == 1:
1076            idstart = max([idx.start for idx in idl])
1077            idstop = min([idx.stop for idx in idl])
1078            idstep = _lcm(*[idx.step for idx in idl])
1079            return range(idstart, idstop, idstep)
1080
1081    return sorted(set.intersection(*[set(o) for o in idl]))
1082
1083
1084def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1085    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1086       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1087       common divisor of the step sizes is used as new step size.
1088
1089    Parameters
1090    ----------
1091    deltas : list
1092        List of fluctuations
1093    idx : list
1094        List or range of configs on which the deltas are defined.
1095        Has to be a subset of new_idx and has to be sorted in ascending order.
1096    shape : list
1097        Number of configs in idx.
1098    new_idx : list
1099        List of configs that defines the new range, has to be sorted in ascending order.
1100    """
1101
1102    if type(idx) is range and type(new_idx) is range:
1103        if idx == new_idx:
1104            return deltas
1105    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1106    for i in range(shape):
1107        ret[idx[i] - new_idx[0]] = deltas[i]
1108    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1109
1110
1111def derived_observable(func, data, array_mode=False, **kwargs):
1112    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1113
1114    Parameters
1115    ----------
1116    func : object
1117        arbitrary function of the form func(data, **kwargs). For the
1118        automatic differentiation to work, all numpy functions have to have
1119        the autograd wrapper (use 'import autograd.numpy as anp').
1120    data : list
1121        list of Obs, e.g. [obs1, obs2, obs3].
1122    num_grad : bool
1123        if True, numerical derivatives are used instead of autograd
1124        (default False). To control the numerical differentiation the
1125        kwargs of numdifftools.step_generators.MaxStepGenerator
1126        can be used.
1127    man_grad : list
1128        manually supply a list or an array which contains the jacobian
1129        of func. Use cautiously, supplying the wrong derivative will
1130        not be intercepted.
1131
1132    Notes
1133    -----
1134    For simple mathematical operations it can be practical to use anonymous
1135    functions. For the ratio of two observables one can e.g. use
1136
1137    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1138    """
1139
1140    data = np.asarray(data)
1141    raveled_data = data.ravel()
1142
1143    # Workaround for matrix operations containing non Obs data
1144    if not all(isinstance(x, Obs) for x in raveled_data):
1145        for i in range(len(raveled_data)):
1146            if isinstance(raveled_data[i], (int, float)):
1147                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1148
1149    allcov = {}
1150    for o in raveled_data:
1151        for name in o.cov_names:
1152            if name in allcov:
1153                if not np.allclose(allcov[name], o.covobs[name].cov):
1154                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1155            else:
1156                allcov[name] = o.covobs[name].cov
1157
1158    n_obs = len(raveled_data)
1159    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1160    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1161    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1162
1163    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1164
1165    if data.ndim == 1:
1166        values = np.array([o.value for o in data])
1167    else:
1168        values = np.vectorize(lambda x: x.value)(data)
1169
1170    new_values = func(values, **kwargs)
1171
1172    multi = int(isinstance(new_values, np.ndarray))
1173
1174    new_r_values = {}
1175    new_idl_d = {}
1176    for name in new_sample_names:
1177        idl = []
1178        tmp_values = np.zeros(n_obs)
1179        for i, item in enumerate(raveled_data):
1180            tmp_values[i] = item.r_values.get(name, item.value)
1181            tmp_idl = item.idl.get(name)
1182            if tmp_idl is not None:
1183                idl.append(tmp_idl)
1184        if multi > 0:
1185            tmp_values = np.array(tmp_values).reshape(data.shape)
1186        new_r_values[name] = func(tmp_values, **kwargs)
1187        new_idl_d[name] = _merge_idx(idl)
1188
1189    if 'man_grad' in kwargs:
1190        deriv = np.asarray(kwargs.get('man_grad'))
1191        if new_values.shape + data.shape != deriv.shape:
1192            raise Exception('Manual derivative does not have correct shape.')
1193    elif kwargs.get('num_grad') is True:
1194        if multi > 0:
1195            raise Exception('Multi mode currently not supported for numerical derivative')
1196        options = {
1197            'base_step': 0.1,
1198            'step_ratio': 2.5}
1199        for key in options.keys():
1200            kwarg = kwargs.get(key)
1201            if kwarg is not None:
1202                options[key] = kwarg
1203        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1204        if tmp_df.size == 1:
1205            deriv = np.array([tmp_df.real])
1206        else:
1207            deriv = tmp_df.real
1208    else:
1209        deriv = jacobian(func)(values, **kwargs)
1210
1211    final_result = np.zeros(new_values.shape, dtype=object)
1212
1213    if array_mode is True:
1214
1215        class _Zero_grad():
1216            def __init__(self, N):
1217                self.grad = np.zeros((N, 1))
1218
1219        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1220        d_extracted = {}
1221        g_extracted = {}
1222        for name in new_sample_names:
1223            d_extracted[name] = []
1224            ens_length = len(new_idl_d[name])
1225            for i_dat, dat in enumerate(data):
1226                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1227        for name in new_cov_names:
1228            g_extracted[name] = []
1229            zero_grad = _Zero_grad(new_covobs_lengths[name])
1230            for i_dat, dat in enumerate(data):
1231                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1232
1233    for i_val, new_val in np.ndenumerate(new_values):
1234        new_deltas = {}
1235        new_grad = {}
1236        if array_mode is True:
1237            for name in new_sample_names:
1238                ens_length = d_extracted[name][0].shape[-1]
1239                new_deltas[name] = np.zeros(ens_length)
1240                for i_dat, dat in enumerate(d_extracted[name]):
1241                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1242            for name in new_cov_names:
1243                new_grad[name] = 0
1244                for i_dat, dat in enumerate(g_extracted[name]):
1245                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1246        else:
1247            for j_obs, obs in np.ndenumerate(data):
1248                for name in obs.names:
1249                    if name in obs.cov_names:
1250                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1251                    else:
1252                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1253
1254        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1255
1256        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1257            raise Exception('The same name has been used for deltas and covobs!')
1258        new_samples = []
1259        new_means = []
1260        new_idl = []
1261        new_names_obs = []
1262        for name in new_names:
1263            if name not in new_covobs:
1264                new_samples.append(new_deltas[name])
1265                new_idl.append(new_idl_d[name])
1266                new_means.append(new_r_values[name][i_val])
1267                new_names_obs.append(name)
1268        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1269        for name in new_covobs:
1270            final_result[i_val].names.append(name)
1271        final_result[i_val]._covobs = new_covobs
1272        final_result[i_val]._value = new_val
1273        final_result[i_val].reweighted = reweighted
1274
1275    if multi == 0:
1276        final_result = final_result.item()
1277
1278    return final_result
1279
1280
1281def _reduce_deltas(deltas, idx_old, idx_new):
1282    """Extract deltas defined on idx_old on all configs of idx_new.
1283
1284    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1285    are ordered in an ascending order.
1286
1287    Parameters
1288    ----------
1289    deltas : list
1290        List of fluctuations
1291    idx_old : list
1292        List or range of configs on which the deltas are defined
1293    idx_new : list
1294        List of configs for which we want to extract the deltas.
1295        Has to be a subset of idx_old.
1296    """
1297    if not len(deltas) == len(idx_old):
1298        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1299    if type(idx_old) is range and type(idx_new) is range:
1300        if idx_old == idx_new:
1301            return deltas
1302    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1303    try:
1304        g = groupby([idx_old, idx_new])
1305        if next(g, True) and not next(g, False):
1306            return deltas
1307    except Exception:
1308        pass
1309    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1310    if len(indices) < len(idx_new):
1311        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1312    return np.array(deltas)[indices]
1313
1314
1315def reweight(weight, obs, **kwargs):
1316    """Reweight a list of observables.
1317
1318    Parameters
1319    ----------
1320    weight : Obs
1321        Reweighting factor. An Observable that has to be defined on a superset of the
1322        configurations in obs[i].idl for all i.
1323    obs : list
1324        list of Obs, e.g. [obs1, obs2, obs3].
1325    all_configs : bool
1326        if True, the reweighted observables are normalized by the average of
1327        the reweighting factor on all configurations in weight.idl and not
1328        on the configurations in obs[i].idl. Default False.
1329    """
1330    result = []
1331    for i in range(len(obs)):
1332        if len(obs[i].cov_names):
1333            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1334        if not set(obs[i].names).issubset(weight.names):
1335            raise Exception('Error: Ensembles do not fit')
1336        for name in obs[i].names:
1337            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1338                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1339        new_samples = []
1340        w_deltas = {}
1341        for name in sorted(obs[i].names):
1342            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1343            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1344        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1345
1346        if kwargs.get('all_configs'):
1347            new_weight = weight
1348        else:
1349            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1350
1351        result.append(tmp_obs / new_weight)
1352        result[-1].reweighted = True
1353
1354    return result
1355
1356
1357def correlate(obs_a, obs_b):
1358    """Correlate two observables.
1359
1360    Parameters
1361    ----------
1362    obs_a : Obs
1363        First observable
1364    obs_b : Obs
1365        Second observable
1366
1367    Notes
1368    -----
1369    Keep in mind to only correlate primary observables which have not been reweighted
1370    yet. The reweighting has to be applied after correlating the observables.
1371    Currently only works if ensembles are identical (this is not strictly necessary).
1372    """
1373
1374    if sorted(obs_a.names) != sorted(obs_b.names):
1375        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1376    if len(obs_a.cov_names) or len(obs_b.cov_names):
1377        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1378    for name in obs_a.names:
1379        if obs_a.shape[name] != obs_b.shape[name]:
1380            raise Exception('Shapes of ensemble', name, 'do not fit')
1381        if obs_a.idl[name] != obs_b.idl[name]:
1382            raise Exception('idl of ensemble', name, 'do not fit')
1383
1384    if obs_a.reweighted is True:
1385        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1386    if obs_b.reweighted is True:
1387        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1388
1389    new_samples = []
1390    new_idl = []
1391    for name in sorted(obs_a.names):
1392        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1393        new_idl.append(obs_a.idl[name])
1394
1395    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1396    o.reweighted = obs_a.reweighted or obs_b.reweighted
1397    return o
1398
1399
1400def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1401    r'''Calculates the error covariance matrix of a set of observables.
1402
1403    WARNING: This function should be used with care, especially for observables with support on multiple
1404             ensembles with differing autocorrelations. See the notes below for details.
1405
1406    The gamma method has to be applied first to all observables.
1407
1408    Parameters
1409    ----------
1410    obs : list or numpy.ndarray
1411        List or one dimensional array of Obs
1412    visualize : bool
1413        If True plots the corresponding normalized correlation matrix (default False).
1414    correlation : bool
1415        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1416    smooth : None or int
1417        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1418        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1419        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1420        small ones.
1421
1422    Notes
1423    -----
1424    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1425    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1426    in the absence of autocorrelation.
1427    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1428    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1429    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1430    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1431    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1432    '''
1433
1434    length = len(obs)
1435
1436    max_samples = np.max([o.N for o in obs])
1437    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1438        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1439
1440    cov = np.zeros((length, length))
1441    for i in range(length):
1442        for j in range(i, length):
1443            cov[i, j] = _covariance_element(obs[i], obs[j])
1444    cov = cov + cov.T - np.diag(np.diag(cov))
1445
1446    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1447
1448    if isinstance(smooth, int):
1449        corr = _smooth_eigenvalues(corr, smooth)
1450
1451    if visualize:
1452        plt.matshow(corr, vmin=-1, vmax=1)
1453        plt.set_cmap('RdBu')
1454        plt.colorbar()
1455        plt.draw()
1456
1457    if correlation is True:
1458        return corr
1459
1460    errors = [o.dvalue for o in obs]
1461    cov = np.diag(errors) @ corr @ np.diag(errors)
1462
1463    eigenvalues = np.linalg.eigh(cov)[0]
1464    if not np.all(eigenvalues >= 0):
1465        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1466
1467    return cov
1468
1469
1470def _smooth_eigenvalues(corr, E):
1471    """Eigenvalue smoothing as described in hep-lat/9412087
1472
1473    corr : np.ndarray
1474        correlation matrix
1475    E : integer
1476        Number of eigenvalues to be left substantially unchanged
1477    """
1478    if not (2 < E < corr.shape[0] - 1):
1479        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1480    vals, vec = np.linalg.eigh(corr)
1481    lambda_min = np.mean(vals[:-E])
1482    vals[vals < lambda_min] = lambda_min
1483    vals /= np.mean(vals)
1484    return vec @ np.diag(vals) @ vec.T
1485
1486
1487def _covariance_element(obs1, obs2):
1488    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1489
1490    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1491        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1492        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1493        return np.sum(deltas1 * deltas2)
1494
1495    if set(obs1.names).isdisjoint(set(obs2.names)):
1496        return 0.0
1497
1498    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1499        raise Exception('The gamma method has to be applied to both Obs first.')
1500
1501    dvalue = 0.0
1502
1503    for e_name in obs1.mc_names:
1504
1505        if e_name not in obs2.mc_names:
1506            continue
1507
1508        idl_d = {}
1509        for r_name in obs1.e_content[e_name]:
1510            if r_name not in obs2.e_content[e_name]:
1511                continue
1512            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1513
1514        gamma = 0.0
1515
1516        for r_name in obs1.e_content[e_name]:
1517            if r_name not in obs2.e_content[e_name]:
1518                continue
1519            if len(idl_d[r_name]) == 0:
1520                continue
1521            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1522
1523        if gamma == 0.0:
1524            continue
1525
1526        gamma_div = 0.0
1527        for r_name in obs1.e_content[e_name]:
1528            if r_name not in obs2.e_content[e_name]:
1529                continue
1530            if len(idl_d[r_name]) == 0:
1531                continue
1532            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1533        gamma /= gamma_div
1534
1535        dvalue += gamma
1536
1537    for e_name in obs1.cov_names:
1538
1539        if e_name not in obs2.cov_names:
1540            continue
1541
1542        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1543
1544    return dvalue
1545
1546
1547def import_jackknife(jacks, name, idl=None):
1548    """Imports jackknife samples and returns an Obs
1549
1550    Parameters
1551    ----------
1552    jacks : numpy.ndarray
1553        numpy array containing the mean value as zeroth entry and
1554        the N jackknife samples as first to Nth entry.
1555    name : str
1556        name of the ensemble the samples are defined on.
1557    """
1558    length = len(jacks) - 1
1559    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1560    samples = jacks[1:] @ prj
1561    mean = np.mean(samples)
1562    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1563    new_obs._value = jacks[0]
1564    return new_obs
1565
1566
1567def merge_obs(list_of_obs):
1568    """Combine all observables in list_of_obs into one new observable
1569
1570    Parameters
1571    ----------
1572    list_of_obs : list
1573        list of the Obs object to be combined
1574
1575    Notes
1576    -----
1577    It is not possible to combine obs which are based on the same replicum
1578    """
1579    replist = [item for obs in list_of_obs for item in obs.names]
1580    if (len(replist) == len(set(replist))) is False:
1581        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1582    if any([len(o.cov_names) for o in list_of_obs]):
1583        raise Exception('Not possible to merge data that contains covobs!')
1584    new_dict = {}
1585    idl_dict = {}
1586    for o in list_of_obs:
1587        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1588                        for key in set(o.deltas) | set(o.r_values)})
1589        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1590
1591    names = sorted(new_dict.keys())
1592    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1593    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1594    return o
1595
1596
1597def cov_Obs(means, cov, name, grad=None):
1598    """Create an Obs based on mean(s) and a covariance matrix
1599
1600    Parameters
1601    ----------
1602    mean : list of floats or float
1603        N mean value(s) of the new Obs
1604    cov : list or array
1605        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1606    name : str
1607        identifier for the covariance matrix
1608    grad : list or array
1609        Gradient of the Covobs wrt. the means belonging to cov.
1610    """
1611
1612    def covobs_to_obs(co):
1613        """Make an Obs out of a Covobs
1614
1615        Parameters
1616        ----------
1617        co : Covobs
1618            Covobs to be embedded into the Obs
1619        """
1620        o = Obs([], [], means=[])
1621        o._value = co.value
1622        o.names.append(co.name)
1623        o._covobs[co.name] = co
1624        o._dvalue = np.sqrt(co.errsq())
1625        return o
1626
1627    ol = []
1628    if isinstance(means, (float, int)):
1629        means = [means]
1630
1631    for i in range(len(means)):
1632        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1633    if ol[0].covobs[name].N != len(means):
1634        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1635    if len(ol) == 1:
1636        return ol[0]
1637    return ol
1638
1639
1640def _determine_gap(o, e_content, e_name):
1641    gaps = []
1642    for r_name in e_content[e_name]:
1643        if isinstance(o.idl[r_name], range):
1644            gaps.append(o.idl[r_name].step)
1645        else:
1646            gaps.append(np.min(np.diff(o.idl[r_name])))
1647
1648    gap = min(gaps)
1649    if not np.all([gi % gap == 0 for gi in gaps]):
1650        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1651
1652    return gap
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None
142
143    @property
144    def value(self):
145        return self._value
146
147    @property
148    def dvalue(self):
149        return self._dvalue
150
151    @property
152    def e_names(self):
153        return sorted(set([o.split('|')[0] for o in self.names]))
154
155    @property
156    def cov_names(self):
157        return sorted(set([o for o in self.covobs.keys()]))
158
159    @property
160    def mc_names(self):
161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
162
163    @property
164    def e_content(self):
165        res = {}
166        for e, e_name in enumerate(self.e_names):
167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
168            if e_name in self.names:
169                res[e_name].append(e_name)
170        return res
171
172    @property
173    def covobs(self):
174        return self._covobs
175
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return
341
342    gm = gamma_method
343
344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
347
348        Parameters
349        ----------
350        deltas : list
351            List of fluctuations
352        idx : list
353            List or range of configurations on which the deltas are defined.
354        shape : int
355            Number of configurations in idx.
356        w_max : int
357            Upper bound for the summation window.
358        fft : bool
359            determines whether the fft algorithm is used for the computation
360            of the autocorrelation function.
361        gapsize : int
362            The target distance between two configurations. If longer distances
363            are found in idx, the data is expanded.
364        """
365        gamma = np.zeros(w_max)
366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
367        new_shape = len(deltas)
368        if fft:
369            max_gamma = min(new_shape, w_max)
370            # The padding for the fft has to be even
371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
373        else:
374            for n in range(w_max):
375                if new_shape - n >= 0:
376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
377
378        return gamma
379
380    def details(self, ens_content=True):
381        """Output detailed properties of the Obs.
382
383        Parameters
384        ----------
385        ens_content : bool
386            print details about the ensembles and replica if true.
387        """
388        if self.tag is not None:
389            print("Description:", self.tag)
390        if not hasattr(self, 'e_dvalue'):
391            print('Result\t %3.8e' % (self.value))
392        else:
393            if self.value == 0.0:
394                percentage = np.nan
395            else:
396                percentage = np.abs(self._dvalue / self.value) * 100
397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
398            if len(self.e_names) > 1:
399                print(' Ensemble errors:')
400            e_content = self.e_content
401            for e_name in self.mc_names:
402                gap = _determine_gap(self, e_content, e_name)
403
404                if len(self.e_names) > 1:
405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
407                tau_string += f" in units of {gap} config"
408                if gap > 1:
409                    tau_string += "s"
410                if self.tau_exp[e_name] > 0:
411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
412                else:
413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
414                print(tau_string)
415            for e_name in self.cov_names:
416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
417        if ens_content is True:
418            if len(self.e_names) == 1:
419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
420            else:
421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
422            my_string_list = []
423            for key, value in sorted(self.e_content.items()):
424                if key not in self.covobs:
425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
426                    if len(value) == 1:
427                        my_string += f': {self.shape[value[0]]} configurations'
428                        if isinstance(self.idl[value[0]], range):
429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
430                        else:
431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
432                    else:
433                        sublist = []
434                        for v in value:
435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
436                            my_substring += f': {self.shape[v]} configurations'
437                            if isinstance(self.idl[v], range):
438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
439                            else:
440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
441                            sublist.append(my_substring)
442
443                        my_string += '\n' + '\n'.join(sublist)
444                else:
445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
446                my_string_list.append(my_string)
447            print('\n'.join(my_string_list))
448
449    def reweight(self, weight):
450        """Reweight the obs with given rewighting factors.
451
452        Parameters
453        ----------
454        weight : Obs
455            Reweighting factor. An Observable that has to be defined on a superset of the
456            configurations in obs[i].idl for all i.
457        all_configs : bool
458            if True, the reweighted observables are normalized by the average of
459            the reweighting factor on all configurations in weight.idl and not
460            on the configurations in obs[i].idl. Default False.
461        """
462        return reweight(weight, [self])[0]
463
464    def is_zero_within_error(self, sigma=1):
465        """Checks whether the observable is zero within 'sigma' standard errors.
466
467        Parameters
468        ----------
469        sigma : int
470            Number of standard errors used for the check.
471
472        Works only properly when the gamma method was run.
473        """
474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
475
476    def is_zero(self, atol=1e-10):
477        """Checks whether the observable is zero within a given tolerance.
478
479        Parameters
480        ----------
481        atol : float
482            Absolute tolerance (for details see numpy documentation).
483        """
484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
485
486    def plot_tauint(self, save=None):
487        """Plot integrated autocorrelation time for each ensemble.
488
489        Parameters
490        ----------
491        save : str
492            saves the figure to a file named 'save' if.
493        """
494        if not hasattr(self, 'e_dvalue'):
495            raise Exception('Run the gamma method first.')
496
497        for e, e_name in enumerate(self.mc_names):
498            fig = plt.figure()
499            plt.xlabel(r'$W$')
500            plt.ylabel(r'$\tau_\mathrm{int}$')
501            length = int(len(self.e_n_tauint[e_name]))
502            if self.tau_exp[e_name] > 0:
503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
504                x_help = np.arange(2 * self.tau_exp[e_name])
505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
512            else:
513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
515
516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
518            plt.legend()
519            plt.xlim(-0.5, xmax)
520            ylim = plt.ylim()
521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
522            plt.draw()
523            if save:
524                fig.savefig(save + "_" + str(e))
525
526    def plot_rho(self, save=None):
527        """Plot normalized autocorrelation function time for each ensemble.
528
529        Parameters
530        ----------
531        save : str
532            saves the figure to a file named 'save' if.
533        """
534        if not hasattr(self, 'e_dvalue'):
535            raise Exception('Run the gamma method first.')
536        for e, e_name in enumerate(self.mc_names):
537            fig = plt.figure()
538            plt.xlabel('W')
539            plt.ylabel('rho')
540            length = int(len(self.e_drho[e_name]))
541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
543            if self.tau_exp[e_name] > 0:
544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
548            else:
549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
552            plt.xlim(-0.5, xmax)
553            plt.draw()
554            if save:
555                fig.savefig(save + "_" + str(e))
556
557    def plot_rep_dist(self):
558        """Plot replica distribution for each ensemble with more than one replicum."""
559        if not hasattr(self, 'e_dvalue'):
560            raise Exception('Run the gamma method first.')
561        for e, e_name in enumerate(self.mc_names):
562            if len(self.e_content[e_name]) == 1:
563                print('No replica distribution for a single replicum (', e_name, ')')
564                continue
565            r_length = []
566            sub_r_mean = 0
567            for r, r_name in enumerate(self.e_content[e_name]):
568                r_length.append(len(self.deltas[r_name]))
569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
570            e_N = np.sum(r_length)
571            sub_r_mean /= e_N
572            arr = np.zeros(len(self.e_content[e_name]))
573            for r, r_name in enumerate(self.e_content[e_name]):
574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
577            plt.draw()
578
579    def plot_history(self, expand=True):
580        """Plot derived Monte Carlo history for each ensemble
581
582        Parameters
583        ----------
584        expand : bool
585            show expanded history for irregular Monte Carlo chains (default: True).
586        """
587        for e, e_name in enumerate(self.mc_names):
588            plt.figure()
589            r_length = []
590            tmp = []
591            tmp_expanded = []
592            for r, r_name in enumerate(self.e_content[e_name]):
593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
594                if expand:
595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
596                    r_length.append(len(tmp_expanded[-1]))
597                else:
598                    r_length.append(len(tmp[-1]))
599            e_N = np.sum(r_length)
600            x = np.arange(e_N)
601            y_test = np.concatenate(tmp, axis=0)
602            if expand:
603                y = np.concatenate(tmp_expanded, axis=0)
604            else:
605                y = y_test
606            plt.errorbar(x, y, fmt='.', markersize=3)
607            plt.xlim(-0.5, e_N - 0.5)
608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
609            plt.draw()
610
611    def plot_piechart(self, save=None):
612        """Plot piechart which shows the fractional contribution of each
613        ensemble to the error and returns a dictionary containing the fractions.
614
615        Parameters
616        ----------
617        save : str
618            saves the figure to a file named 'save' if.
619        """
620        if not hasattr(self, 'e_dvalue'):
621            raise Exception('Run the gamma method first.')
622        if np.isclose(0.0, self._dvalue, atol=1e-15):
623            raise Exception('Error is 0.0')
624        labels = self.e_names
625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
626        fig1, ax1 = plt.subplots()
627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
628        ax1.axis('equal')
629        plt.draw()
630        if save:
631            fig1.savefig(save)
632
633        return dict(zip(self.e_names, sizes))
634
635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
636        """Dump the Obs to a file 'name' of chosen format.
637
638        Parameters
639        ----------
640        filename : str
641            name of the file to be saved.
642        datatype : str
643            Format of the exported file. Supported formats include
644            "json.gz" and "pickle"
645        description : str
646            Description for output file, only relevant for json.gz format.
647        path : str
648            specifies a custom path for the file (default '.')
649        """
650        if 'path' in kwargs:
651            file_name = kwargs.get('path') + '/' + filename
652        else:
653            file_name = filename
654
655        if datatype == "json.gz":
656            from .input.json import dump_to_json
657            dump_to_json([self], file_name, description=description)
658        elif datatype == "pickle":
659            with open(file_name + '.p', 'wb') as fb:
660                pickle.dump(self, fb)
661        else:
662            raise Exception("Unknown datatype " + str(datatype))
663
664    def export_jackknife(self):
665        """Export jackknife samples from the Obs
666
667        Returns
668        -------
669        numpy.ndarray
670            Returns a numpy array of length N + 1 where N is the number of samples
671            for the given ensemble and replicum. The zeroth entry of the array contains
672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
673            derived from the Obs. The current implementation only works for observables
674            defined on exactly one ensemble and replicum. The derived jackknife samples
675            should agree with samples from a full jackknife analysis up to O(1/N).
676        """
677
678        if len(self.names) != 1:
679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
680
681        name = self.names[0]
682        full_data = self.deltas[name] + self.r_values[name]
683        n = full_data.size
684        mean = self.value
685        tmp_jacks = np.zeros(n + 1)
686        tmp_jacks[0] = mean
687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
688        return tmp_jacks
689
690    def __float__(self):
691        return float(self.value)
692
693    def __repr__(self):
694        return 'Obs[' + str(self) + ']'
695
696    def __str__(self):
697        return _format_uncertainty(self.value, self._dvalue)
698
699    def __format__(self, format_type):
700        my_str = _format_uncertainty(self.value, self._dvalue,
701                                     significance=int(float(format_type.replace("+", "").replace("-", ""))))
702        for char in ["+", " "]:
703            if format_type.startswith(char):
704                if my_str[0] != "-":
705                    my_str = char + my_str
706        return my_str
707
708    def __hash__(self):
709        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
710        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
711        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
712        hash_tuple += tuple([o.encode() for o in self.names])
713        m = hashlib.md5()
714        [m.update(o) for o in hash_tuple]
715        return int(m.hexdigest(), 16) & 0xFFFFFFFF
716
717    # Overload comparisons
718    def __lt__(self, other):
719        return self.value < other
720
721    def __le__(self, other):
722        return self.value <= other
723
724    def __gt__(self, other):
725        return self.value > other
726
727    def __ge__(self, other):
728        return self.value >= other
729
730    def __eq__(self, other):
731        return (self - other).is_zero()
732
733    def __ne__(self, other):
734        return not (self - other).is_zero()
735
736    # Overload math operations
737    def __add__(self, y):
738        if isinstance(y, Obs):
739            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
740        else:
741            if isinstance(y, np.ndarray):
742                return np.array([self + o for o in y])
743            elif y.__class__.__name__ in ['Corr', 'CObs']:
744                return NotImplemented
745            else:
746                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
747
748    def __radd__(self, y):
749        return self + y
750
751    def __mul__(self, y):
752        if isinstance(y, Obs):
753            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
754        else:
755            if isinstance(y, np.ndarray):
756                return np.array([self * o for o in y])
757            elif isinstance(y, complex):
758                return CObs(self * y.real, self * y.imag)
759            elif y.__class__.__name__ in ['Corr', 'CObs']:
760                return NotImplemented
761            else:
762                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
763
764    def __rmul__(self, y):
765        return self * y
766
767    def __sub__(self, y):
768        if isinstance(y, Obs):
769            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
770        else:
771            if isinstance(y, np.ndarray):
772                return np.array([self - o for o in y])
773            elif y.__class__.__name__ in ['Corr', 'CObs']:
774                return NotImplemented
775            else:
776                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
777
778    def __rsub__(self, y):
779        return -1 * (self - y)
780
781    def __pos__(self):
782        return self
783
784    def __neg__(self):
785        return -1 * self
786
787    def __truediv__(self, y):
788        if isinstance(y, Obs):
789            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
790        else:
791            if isinstance(y, np.ndarray):
792                return np.array([self / o for o in y])
793            elif y.__class__.__name__ in ['Corr', 'CObs']:
794                return NotImplemented
795            else:
796                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
797
798    def __rtruediv__(self, y):
799        if isinstance(y, Obs):
800            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
801        else:
802            if isinstance(y, np.ndarray):
803                return np.array([o / self for o in y])
804            elif y.__class__.__name__ in ['Corr', 'CObs']:
805                return NotImplemented
806            else:
807                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
808
809    def __pow__(self, y):
810        if isinstance(y, Obs):
811            return derived_observable(lambda x: x[0] ** x[1], [self, y])
812        else:
813            return derived_observable(lambda x: x[0] ** y, [self])
814
815    def __rpow__(self, y):
816        if isinstance(y, Obs):
817            return derived_observable(lambda x: x[0] ** x[1], [y, self])
818        else:
819            return derived_observable(lambda x: y ** x[0], [self])
820
821    def __abs__(self):
822        return derived_observable(lambda x: anp.abs(x[0]), [self])
823
824    # Overload numpy functions
825    def sqrt(self):
826        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
827
828    def log(self):
829        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
830
831    def exp(self):
832        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
833
834    def sin(self):
835        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
836
837    def cos(self):
838        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
839
840    def tan(self):
841        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
842
843    def arcsin(self):
844        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
845
846    def arccos(self):
847        return derived_observable(lambda x: anp.arccos(x[0]), [self])
848
849    def arctan(self):
850        return derived_observable(lambda x: anp.arctan(x[0]), [self])
851
852    def sinh(self):
853        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
854
855    def cosh(self):
856        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
857
858    def tanh(self):
859        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
860
861    def arcsinh(self):
862        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
863
864    def arccosh(self):
865        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
866
867    def arctanh(self):
868        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
380    def details(self, ens_content=True):
381        """Output detailed properties of the Obs.
382
383        Parameters
384        ----------
385        ens_content : bool
386            print details about the ensembles and replica if true.
387        """
388        if self.tag is not None:
389            print("Description:", self.tag)
390        if not hasattr(self, 'e_dvalue'):
391            print('Result\t %3.8e' % (self.value))
392        else:
393            if self.value == 0.0:
394                percentage = np.nan
395            else:
396                percentage = np.abs(self._dvalue / self.value) * 100
397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
398            if len(self.e_names) > 1:
399                print(' Ensemble errors:')
400            e_content = self.e_content
401            for e_name in self.mc_names:
402                gap = _determine_gap(self, e_content, e_name)
403
404                if len(self.e_names) > 1:
405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
407                tau_string += f" in units of {gap} config"
408                if gap > 1:
409                    tau_string += "s"
410                if self.tau_exp[e_name] > 0:
411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
412                else:
413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
414                print(tau_string)
415            for e_name in self.cov_names:
416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
417        if ens_content is True:
418            if len(self.e_names) == 1:
419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
420            else:
421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
422            my_string_list = []
423            for key, value in sorted(self.e_content.items()):
424                if key not in self.covobs:
425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
426                    if len(value) == 1:
427                        my_string += f': {self.shape[value[0]]} configurations'
428                        if isinstance(self.idl[value[0]], range):
429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
430                        else:
431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
432                    else:
433                        sublist = []
434                        for v in value:
435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
436                            my_substring += f': {self.shape[v]} configurations'
437                            if isinstance(self.idl[v], range):
438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
439                            else:
440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
441                            sublist.append(my_substring)
442
443                        my_string += '\n' + '\n'.join(sublist)
444                else:
445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
446                my_string_list.append(my_string)
447            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
449    def reweight(self, weight):
450        """Reweight the obs with given rewighting factors.
451
452        Parameters
453        ----------
454        weight : Obs
455            Reweighting factor. An Observable that has to be defined on a superset of the
456            configurations in obs[i].idl for all i.
457        all_configs : bool
458            if True, the reweighted observables are normalized by the average of
459            the reweighting factor on all configurations in weight.idl and not
460            on the configurations in obs[i].idl. Default False.
461        """
462        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
464    def is_zero_within_error(self, sigma=1):
465        """Checks whether the observable is zero within 'sigma' standard errors.
466
467        Parameters
468        ----------
469        sigma : int
470            Number of standard errors used for the check.
471
472        Works only properly when the gamma method was run.
473        """
474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
476    def is_zero(self, atol=1e-10):
477        """Checks whether the observable is zero within a given tolerance.
478
479        Parameters
480        ----------
481        atol : float
482            Absolute tolerance (for details see numpy documentation).
483        """
484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
486    def plot_tauint(self, save=None):
487        """Plot integrated autocorrelation time for each ensemble.
488
489        Parameters
490        ----------
491        save : str
492            saves the figure to a file named 'save' if.
493        """
494        if not hasattr(self, 'e_dvalue'):
495            raise Exception('Run the gamma method first.')
496
497        for e, e_name in enumerate(self.mc_names):
498            fig = plt.figure()
499            plt.xlabel(r'$W$')
500            plt.ylabel(r'$\tau_\mathrm{int}$')
501            length = int(len(self.e_n_tauint[e_name]))
502            if self.tau_exp[e_name] > 0:
503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
504                x_help = np.arange(2 * self.tau_exp[e_name])
505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
512            else:
513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
515
516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
518            plt.legend()
519            plt.xlim(-0.5, xmax)
520            ylim = plt.ylim()
521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
522            plt.draw()
523            if save:
524                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
526    def plot_rho(self, save=None):
527        """Plot normalized autocorrelation function time for each ensemble.
528
529        Parameters
530        ----------
531        save : str
532            saves the figure to a file named 'save' if.
533        """
534        if not hasattr(self, 'e_dvalue'):
535            raise Exception('Run the gamma method first.')
536        for e, e_name in enumerate(self.mc_names):
537            fig = plt.figure()
538            plt.xlabel('W')
539            plt.ylabel('rho')
540            length = int(len(self.e_drho[e_name]))
541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
543            if self.tau_exp[e_name] > 0:
544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
548            else:
549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
552            plt.xlim(-0.5, xmax)
553            plt.draw()
554            if save:
555                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
557    def plot_rep_dist(self):
558        """Plot replica distribution for each ensemble with more than one replicum."""
559        if not hasattr(self, 'e_dvalue'):
560            raise Exception('Run the gamma method first.')
561        for e, e_name in enumerate(self.mc_names):
562            if len(self.e_content[e_name]) == 1:
563                print('No replica distribution for a single replicum (', e_name, ')')
564                continue
565            r_length = []
566            sub_r_mean = 0
567            for r, r_name in enumerate(self.e_content[e_name]):
568                r_length.append(len(self.deltas[r_name]))
569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
570            e_N = np.sum(r_length)
571            sub_r_mean /= e_N
572            arr = np.zeros(len(self.e_content[e_name]))
573            for r, r_name in enumerate(self.e_content[e_name]):
574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
577            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
579    def plot_history(self, expand=True):
580        """Plot derived Monte Carlo history for each ensemble
581
582        Parameters
583        ----------
584        expand : bool
585            show expanded history for irregular Monte Carlo chains (default: True).
586        """
587        for e, e_name in enumerate(self.mc_names):
588            plt.figure()
589            r_length = []
590            tmp = []
591            tmp_expanded = []
592            for r, r_name in enumerate(self.e_content[e_name]):
593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
594                if expand:
595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
596                    r_length.append(len(tmp_expanded[-1]))
597                else:
598                    r_length.append(len(tmp[-1]))
599            e_N = np.sum(r_length)
600            x = np.arange(e_N)
601            y_test = np.concatenate(tmp, axis=0)
602            if expand:
603                y = np.concatenate(tmp_expanded, axis=0)
604            else:
605                y = y_test
606            plt.errorbar(x, y, fmt='.', markersize=3)
607            plt.xlim(-0.5, e_N - 0.5)
608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
609            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
611    def plot_piechart(self, save=None):
612        """Plot piechart which shows the fractional contribution of each
613        ensemble to the error and returns a dictionary containing the fractions.
614
615        Parameters
616        ----------
617        save : str
618            saves the figure to a file named 'save' if.
619        """
620        if not hasattr(self, 'e_dvalue'):
621            raise Exception('Run the gamma method first.')
622        if np.isclose(0.0, self._dvalue, atol=1e-15):
623            raise Exception('Error is 0.0')
624        labels = self.e_names
625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
626        fig1, ax1 = plt.subplots()
627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
628        ax1.axis('equal')
629        plt.draw()
630        if save:
631            fig1.savefig(save)
632
633        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
636        """Dump the Obs to a file 'name' of chosen format.
637
638        Parameters
639        ----------
640        filename : str
641            name of the file to be saved.
642        datatype : str
643            Format of the exported file. Supported formats include
644            "json.gz" and "pickle"
645        description : str
646            Description for output file, only relevant for json.gz format.
647        path : str
648            specifies a custom path for the file (default '.')
649        """
650        if 'path' in kwargs:
651            file_name = kwargs.get('path') + '/' + filename
652        else:
653            file_name = filename
654
655        if datatype == "json.gz":
656            from .input.json import dump_to_json
657            dump_to_json([self], file_name, description=description)
658        elif datatype == "pickle":
659            with open(file_name + '.p', 'wb') as fb:
660                pickle.dump(self, fb)
661        else:
662            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
664    def export_jackknife(self):
665        """Export jackknife samples from the Obs
666
667        Returns
668        -------
669        numpy.ndarray
670            Returns a numpy array of length N + 1 where N is the number of samples
671            for the given ensemble and replicum. The zeroth entry of the array contains
672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
673            derived from the Obs. The current implementation only works for observables
674            defined on exactly one ensemble and replicum. The derived jackknife samples
675            should agree with samples from a full jackknife analysis up to O(1/N).
676        """
677
678        if len(self.names) != 1:
679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
680
681        name = self.names[0]
682        full_data = self.deltas[name] + self.r_values[name]
683        n = full_data.size
684        mean = self.value
685        tmp_jacks = np.zeros(n + 1)
686        tmp_jacks[0] = mean
687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
688        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
825    def sqrt(self):
826        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
828    def log(self):
829        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
831    def exp(self):
832        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
834    def sin(self):
835        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
837    def cos(self):
838        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
840    def tan(self):
841        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
843    def arcsin(self):
844        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
846    def arccos(self):
847        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
849    def arctan(self):
850        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
852    def sinh(self):
853        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
855    def cosh(self):
856        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
858    def tanh(self):
859        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
861    def arcsinh(self):
862        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
864    def arccosh(self):
865        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
867    def arctanh(self):
868        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
871class CObs:
872    """Class for a complex valued observable."""
873    __slots__ = ['_real', '_imag', 'tag']
874
875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None
879
880    @property
881    def real(self):
882        return self._real
883
884    @property
885    def imag(self):
886        return self._imag
887
888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)
894
895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0
898
899    def conjugate(self):
900        return CObs(self.real, -self.imag)
901
902    def __add__(self, other):
903        if isinstance(other, np.ndarray):
904            return other + self
905        elif hasattr(other, 'real') and hasattr(other, 'imag'):
906            return CObs(self.real + other.real,
907                        self.imag + other.imag)
908        else:
909            return CObs(self.real + other, self.imag)
910
911    def __radd__(self, y):
912        return self + y
913
914    def __sub__(self, other):
915        if isinstance(other, np.ndarray):
916            return -1 * (other - self)
917        elif hasattr(other, 'real') and hasattr(other, 'imag'):
918            return CObs(self.real - other.real, self.imag - other.imag)
919        else:
920            return CObs(self.real - other, self.imag)
921
922    def __rsub__(self, other):
923        return -1 * (self - other)
924
925    def __mul__(self, other):
926        if isinstance(other, np.ndarray):
927            return other * self
928        elif hasattr(other, 'real') and hasattr(other, 'imag'):
929            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
930                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
931                                               [self.real, other.real, self.imag, other.imag],
932                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
933                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
934                                               [self.real, other.real, self.imag, other.imag],
935                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
936            elif getattr(other, 'imag', 0) != 0:
937                return CObs(self.real * other.real - self.imag * other.imag,
938                            self.imag * other.real + self.real * other.imag)
939            else:
940                return CObs(self.real * other.real, self.imag * other.real)
941        else:
942            return CObs(self.real * other, self.imag * other)
943
944    def __rmul__(self, other):
945        return self * other
946
947    def __truediv__(self, other):
948        if isinstance(other, np.ndarray):
949            return 1 / (other / self)
950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
951            r = other.real ** 2 + other.imag ** 2
952            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
953        else:
954            return CObs(self.real / other, self.imag / other)
955
956    def __rtruediv__(self, other):
957        r = self.real ** 2 + self.imag ** 2
958        if hasattr(other, 'real') and hasattr(other, 'imag'):
959            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
960        else:
961            return CObs(self.real * other / r, -self.imag * other / r)
962
963    def __abs__(self):
964        return np.sqrt(self.real**2 + self.imag**2)
965
966    def __pos__(self):
967        return self
968
969    def __neg__(self):
970        return -1 * self
971
972    def __eq__(self, other):
973        return self.real == other.real and self.imag == other.imag
974
975    def __str__(self):
976        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
977
978    def __repr__(self):
979        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None
def gamma_method(self, **kwargs):
888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
899    def conjugate(self):
900        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1112def derived_observable(func, data, array_mode=False, **kwargs):
1113    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1114
1115    Parameters
1116    ----------
1117    func : object
1118        arbitrary function of the form func(data, **kwargs). For the
1119        automatic differentiation to work, all numpy functions have to have
1120        the autograd wrapper (use 'import autograd.numpy as anp').
1121    data : list
1122        list of Obs, e.g. [obs1, obs2, obs3].
1123    num_grad : bool
1124        if True, numerical derivatives are used instead of autograd
1125        (default False). To control the numerical differentiation the
1126        kwargs of numdifftools.step_generators.MaxStepGenerator
1127        can be used.
1128    man_grad : list
1129        manually supply a list or an array which contains the jacobian
1130        of func. Use cautiously, supplying the wrong derivative will
1131        not be intercepted.
1132
1133    Notes
1134    -----
1135    For simple mathematical operations it can be practical to use anonymous
1136    functions. For the ratio of two observables one can e.g. use
1137
1138    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1139    """
1140
1141    data = np.asarray(data)
1142    raveled_data = data.ravel()
1143
1144    # Workaround for matrix operations containing non Obs data
1145    if not all(isinstance(x, Obs) for x in raveled_data):
1146        for i in range(len(raveled_data)):
1147            if isinstance(raveled_data[i], (int, float)):
1148                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1149
1150    allcov = {}
1151    for o in raveled_data:
1152        for name in o.cov_names:
1153            if name in allcov:
1154                if not np.allclose(allcov[name], o.covobs[name].cov):
1155                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1156            else:
1157                allcov[name] = o.covobs[name].cov
1158
1159    n_obs = len(raveled_data)
1160    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1161    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1162    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1163
1164    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1165
1166    if data.ndim == 1:
1167        values = np.array([o.value for o in data])
1168    else:
1169        values = np.vectorize(lambda x: x.value)(data)
1170
1171    new_values = func(values, **kwargs)
1172
1173    multi = int(isinstance(new_values, np.ndarray))
1174
1175    new_r_values = {}
1176    new_idl_d = {}
1177    for name in new_sample_names:
1178        idl = []
1179        tmp_values = np.zeros(n_obs)
1180        for i, item in enumerate(raveled_data):
1181            tmp_values[i] = item.r_values.get(name, item.value)
1182            tmp_idl = item.idl.get(name)
1183            if tmp_idl is not None:
1184                idl.append(tmp_idl)
1185        if multi > 0:
1186            tmp_values = np.array(tmp_values).reshape(data.shape)
1187        new_r_values[name] = func(tmp_values, **kwargs)
1188        new_idl_d[name] = _merge_idx(idl)
1189
1190    if 'man_grad' in kwargs:
1191        deriv = np.asarray(kwargs.get('man_grad'))
1192        if new_values.shape + data.shape != deriv.shape:
1193            raise Exception('Manual derivative does not have correct shape.')
1194    elif kwargs.get('num_grad') is True:
1195        if multi > 0:
1196            raise Exception('Multi mode currently not supported for numerical derivative')
1197        options = {
1198            'base_step': 0.1,
1199            'step_ratio': 2.5}
1200        for key in options.keys():
1201            kwarg = kwargs.get(key)
1202            if kwarg is not None:
1203                options[key] = kwarg
1204        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1205        if tmp_df.size == 1:
1206            deriv = np.array([tmp_df.real])
1207        else:
1208            deriv = tmp_df.real
1209    else:
1210        deriv = jacobian(func)(values, **kwargs)
1211
1212    final_result = np.zeros(new_values.shape, dtype=object)
1213
1214    if array_mode is True:
1215
1216        class _Zero_grad():
1217            def __init__(self, N):
1218                self.grad = np.zeros((N, 1))
1219
1220        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1221        d_extracted = {}
1222        g_extracted = {}
1223        for name in new_sample_names:
1224            d_extracted[name] = []
1225            ens_length = len(new_idl_d[name])
1226            for i_dat, dat in enumerate(data):
1227                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1228        for name in new_cov_names:
1229            g_extracted[name] = []
1230            zero_grad = _Zero_grad(new_covobs_lengths[name])
1231            for i_dat, dat in enumerate(data):
1232                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1233
1234    for i_val, new_val in np.ndenumerate(new_values):
1235        new_deltas = {}
1236        new_grad = {}
1237        if array_mode is True:
1238            for name in new_sample_names:
1239                ens_length = d_extracted[name][0].shape[-1]
1240                new_deltas[name] = np.zeros(ens_length)
1241                for i_dat, dat in enumerate(d_extracted[name]):
1242                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1243            for name in new_cov_names:
1244                new_grad[name] = 0
1245                for i_dat, dat in enumerate(g_extracted[name]):
1246                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1247        else:
1248            for j_obs, obs in np.ndenumerate(data):
1249                for name in obs.names:
1250                    if name in obs.cov_names:
1251                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1252                    else:
1253                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1254
1255        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1256
1257        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1258            raise Exception('The same name has been used for deltas and covobs!')
1259        new_samples = []
1260        new_means = []
1261        new_idl = []
1262        new_names_obs = []
1263        for name in new_names:
1264            if name not in new_covobs:
1265                new_samples.append(new_deltas[name])
1266                new_idl.append(new_idl_d[name])
1267                new_means.append(new_r_values[name][i_val])
1268                new_names_obs.append(name)
1269        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1270        for name in new_covobs:
1271            final_result[i_val].names.append(name)
1272        final_result[i_val]._covobs = new_covobs
1273        final_result[i_val]._value = new_val
1274        final_result[i_val].reweighted = reweighted
1275
1276    if multi == 0:
1277        final_result = final_result.item()
1278
1279    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1316def reweight(weight, obs, **kwargs):
1317    """Reweight a list of observables.
1318
1319    Parameters
1320    ----------
1321    weight : Obs
1322        Reweighting factor. An Observable that has to be defined on a superset of the
1323        configurations in obs[i].idl for all i.
1324    obs : list
1325        list of Obs, e.g. [obs1, obs2, obs3].
1326    all_configs : bool
1327        if True, the reweighted observables are normalized by the average of
1328        the reweighting factor on all configurations in weight.idl and not
1329        on the configurations in obs[i].idl. Default False.
1330    """
1331    result = []
1332    for i in range(len(obs)):
1333        if len(obs[i].cov_names):
1334            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1335        if not set(obs[i].names).issubset(weight.names):
1336            raise Exception('Error: Ensembles do not fit')
1337        for name in obs[i].names:
1338            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1339                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1340        new_samples = []
1341        w_deltas = {}
1342        for name in sorted(obs[i].names):
1343            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1344            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1345        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1346
1347        if kwargs.get('all_configs'):
1348            new_weight = weight
1349        else:
1350            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1351
1352        result.append(tmp_obs / new_weight)
1353        result[-1].reweighted = True
1354
1355    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1358def correlate(obs_a, obs_b):
1359    """Correlate two observables.
1360
1361    Parameters
1362    ----------
1363    obs_a : Obs
1364        First observable
1365    obs_b : Obs
1366        Second observable
1367
1368    Notes
1369    -----
1370    Keep in mind to only correlate primary observables which have not been reweighted
1371    yet. The reweighting has to be applied after correlating the observables.
1372    Currently only works if ensembles are identical (this is not strictly necessary).
1373    """
1374
1375    if sorted(obs_a.names) != sorted(obs_b.names):
1376        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1377    if len(obs_a.cov_names) or len(obs_b.cov_names):
1378        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1379    for name in obs_a.names:
1380        if obs_a.shape[name] != obs_b.shape[name]:
1381            raise Exception('Shapes of ensemble', name, 'do not fit')
1382        if obs_a.idl[name] != obs_b.idl[name]:
1383            raise Exception('idl of ensemble', name, 'do not fit')
1384
1385    if obs_a.reweighted is True:
1386        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1387    if obs_b.reweighted is True:
1388        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1389
1390    new_samples = []
1391    new_idl = []
1392    for name in sorted(obs_a.names):
1393        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1394        new_idl.append(obs_a.idl[name])
1395
1396    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1397    o.reweighted = obs_a.reweighted or obs_b.reweighted
1398    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1401def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1402    r'''Calculates the error covariance matrix of a set of observables.
1403
1404    WARNING: This function should be used with care, especially for observables with support on multiple
1405             ensembles with differing autocorrelations. See the notes below for details.
1406
1407    The gamma method has to be applied first to all observables.
1408
1409    Parameters
1410    ----------
1411    obs : list or numpy.ndarray
1412        List or one dimensional array of Obs
1413    visualize : bool
1414        If True plots the corresponding normalized correlation matrix (default False).
1415    correlation : bool
1416        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1417    smooth : None or int
1418        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1419        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1420        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1421        small ones.
1422
1423    Notes
1424    -----
1425    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1426    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1427    in the absence of autocorrelation.
1428    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1429    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1430    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1431    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1432    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1433    '''
1434
1435    length = len(obs)
1436
1437    max_samples = np.max([o.N for o in obs])
1438    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1439        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1440
1441    cov = np.zeros((length, length))
1442    for i in range(length):
1443        for j in range(i, length):
1444            cov[i, j] = _covariance_element(obs[i], obs[j])
1445    cov = cov + cov.T - np.diag(np.diag(cov))
1446
1447    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1448
1449    if isinstance(smooth, int):
1450        corr = _smooth_eigenvalues(corr, smooth)
1451
1452    if visualize:
1453        plt.matshow(corr, vmin=-1, vmax=1)
1454        plt.set_cmap('RdBu')
1455        plt.colorbar()
1456        plt.draw()
1457
1458    if correlation is True:
1459        return corr
1460
1461    errors = [o.dvalue for o in obs]
1462    cov = np.diag(errors) @ corr @ np.diag(errors)
1463
1464    eigenvalues = np.linalg.eigh(cov)[0]
1465    if not np.all(eigenvalues >= 0):
1466        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1467
1468    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1548def import_jackknife(jacks, name, idl=None):
1549    """Imports jackknife samples and returns an Obs
1550
1551    Parameters
1552    ----------
1553    jacks : numpy.ndarray
1554        numpy array containing the mean value as zeroth entry and
1555        the N jackknife samples as first to Nth entry.
1556    name : str
1557        name of the ensemble the samples are defined on.
1558    """
1559    length = len(jacks) - 1
1560    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1561    samples = jacks[1:] @ prj
1562    mean = np.mean(samples)
1563    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1564    new_obs._value = jacks[0]
1565    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1568def merge_obs(list_of_obs):
1569    """Combine all observables in list_of_obs into one new observable
1570
1571    Parameters
1572    ----------
1573    list_of_obs : list
1574        list of the Obs object to be combined
1575
1576    Notes
1577    -----
1578    It is not possible to combine obs which are based on the same replicum
1579    """
1580    replist = [item for obs in list_of_obs for item in obs.names]
1581    if (len(replist) == len(set(replist))) is False:
1582        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1583    if any([len(o.cov_names) for o in list_of_obs]):
1584        raise Exception('Not possible to merge data that contains covobs!')
1585    new_dict = {}
1586    idl_dict = {}
1587    for o in list_of_obs:
1588        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1589                        for key in set(o.deltas) | set(o.r_values)})
1590        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1591
1592    names = sorted(new_dict.keys())
1593    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1594    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1595    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1598def cov_Obs(means, cov, name, grad=None):
1599    """Create an Obs based on mean(s) and a covariance matrix
1600
1601    Parameters
1602    ----------
1603    mean : list of floats or float
1604        N mean value(s) of the new Obs
1605    cov : list or array
1606        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1607    name : str
1608        identifier for the covariance matrix
1609    grad : list or array
1610        Gradient of the Covobs wrt. the means belonging to cov.
1611    """
1612
1613    def covobs_to_obs(co):
1614        """Make an Obs out of a Covobs
1615
1616        Parameters
1617        ----------
1618        co : Covobs
1619            Covobs to be embedded into the Obs
1620        """
1621        o = Obs([], [], means=[])
1622        o._value = co.value
1623        o.names.append(co.name)
1624        o._covobs[co.name] = co
1625        o._dvalue = np.sqrt(co.errsq())
1626        return o
1627
1628    ol = []
1629    if isinstance(means, (float, int)):
1630        means = [means]
1631
1632    for i in range(len(means)):
1633        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1634    if ol[0].covobs[name].N != len(means):
1635        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1636    if len(ol) == 1:
1637        return ol[0]
1638    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.