pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise ValueError('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise ValueError('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise ValueError('Names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise ValueError('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.idl = {}
 101        if idl is not None:
 102            for name, idx in sorted(zip(names, idl)):
 103                if isinstance(idx, range):
 104                    self.idl[name] = idx
 105                elif isinstance(idx, (list, np.ndarray)):
 106                    dc = np.unique(np.diff(idx))
 107                    if np.any(dc < 0):
 108                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 109                    if len(dc) == 1:
 110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 111                    else:
 112                        self.idl[name] = list(idx)
 113                else:
 114                    raise TypeError('incompatible type for idl[%s].' % (name))
 115        else:
 116            for name, sample in sorted(zip(names, samples)):
 117                self.idl[name] = range(1, len(sample) + 1)
 118
 119        if kwargs.get("means") is not None:
 120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 121                self.shape[name] = len(self.idl[name])
 122                self.N += self.shape[name]
 123                self.r_values[name] = mean
 124                self.deltas[name] = sample
 125        else:
 126            for name, sample in sorted(zip(names, samples)):
 127                self.shape[name] = len(self.idl[name])
 128                self.N += self.shape[name]
 129                if len(sample) != self.shape[name]:
 130                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 131                self.r_values[name] = np.mean(sample)
 132                self.deltas[name] = sample - self.r_values[name]
 133                self._value += self.shape[name] * self.r_values[name]
 134            self._value /= self.N
 135
 136        self._dvalue = 0.0
 137        self.ddvalue = 0.0
 138        self.reweighted = False
 139
 140        self.tag = None
 141
 142    @property
 143    def value(self):
 144        return self._value
 145
 146    @property
 147    def dvalue(self):
 148        return self._dvalue
 149
 150    @property
 151    def e_names(self):
 152        return sorted(set([o.split('|')[0] for o in self.names]))
 153
 154    @property
 155    def cov_names(self):
 156        return sorted(set([o for o in self.covobs.keys()]))
 157
 158    @property
 159    def mc_names(self):
 160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 161
 162    @property
 163    def e_content(self):
 164        res = {}
 165        for e, e_name in enumerate(self.e_names):
 166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 167            if e_name in self.names:
 168                res[e_name].append(e_name)
 169        return res
 170
 171    @property
 172    def covobs(self):
 173        return self._covobs
 174
 175    def gamma_method(self, **kwargs):
 176        """Estimate the error and related properties of the Obs.
 177
 178        Parameters
 179        ----------
 180        S : float
 181            specifies a custom value for the parameter S (default 2.0).
 182            If set to 0 it is assumed that the data exhibits no
 183            autocorrelation. In this case the error estimates coincides
 184            with the sample standard error.
 185        tau_exp : float
 186            positive value triggers the critical slowing down analysis
 187            (default 0.0).
 188        N_sigma : float
 189            number of standard deviations from zero until the tail is
 190            attached to the autocorrelation function (default 1).
 191        fft : bool
 192            determines whether the fft algorithm is used for the computation
 193            of the autocorrelation function (default True)
 194        """
 195
 196        e_content = self.e_content
 197        self.e_dvalue = {}
 198        self.e_ddvalue = {}
 199        self.e_tauint = {}
 200        self.e_dtauint = {}
 201        self.e_windowsize = {}
 202        self.e_n_tauint = {}
 203        self.e_n_dtauint = {}
 204        e_gamma = {}
 205        self.e_rho = {}
 206        self.e_drho = {}
 207        self._dvalue = 0
 208        self.ddvalue = 0
 209
 210        self.S = {}
 211        self.tau_exp = {}
 212        self.N_sigma = {}
 213
 214        if kwargs.get('fft') is False:
 215            fft = False
 216        else:
 217            fft = True
 218
 219        def _parse_kwarg(kwarg_name):
 220            if kwarg_name in kwargs:
 221                tmp = kwargs.get(kwarg_name)
 222                if isinstance(tmp, (int, float)):
 223                    if tmp < 0:
 224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 225                    for e, e_name in enumerate(self.e_names):
 226                        getattr(self, kwarg_name)[e_name] = tmp
 227                else:
 228                    raise TypeError(kwarg_name + ' is not in proper format.')
 229            else:
 230                for e, e_name in enumerate(self.e_names):
 231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 233                    else:
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 235
 236        _parse_kwarg('S')
 237        _parse_kwarg('tau_exp')
 238        _parse_kwarg('N_sigma')
 239
 240        for e, e_name in enumerate(self.mc_names):
 241            gapsize = _determine_gap(self, e_content, e_name)
 242
 243            r_length = []
 244            for r_name in e_content[e_name]:
 245                if isinstance(self.idl[r_name], range):
 246                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 247                else:
 248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 249
 250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 251            w_max = max(r_length) // 2
 252            e_gamma[e_name] = np.zeros(w_max)
 253            self.e_rho[e_name] = np.zeros(w_max)
 254            self.e_drho[e_name] = np.zeros(w_max)
 255
 256            for r_name in e_content[e_name]:
 257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 258
 259            gamma_div = np.zeros(w_max)
 260            for r_name in e_content[e_name]:
 261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 262            gamma_div[gamma_div < 1] = 1.0
 263            e_gamma[e_name] /= gamma_div[:w_max]
 264
 265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 266                self.e_tauint[e_name] = 0.5
 267                self.e_dtauint[e_name] = 0.0
 268                self.e_dvalue[e_name] = 0.0
 269                self.e_ddvalue[e_name] = 0.0
 270                self.e_windowsize[e_name] = 0
 271                continue
 272
 273            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 274            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 275            # Make sure no entry of tauint is smaller than 0.5
 276            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 277            # hep-lat/0306017 eq. (42)
 278            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 279            self.e_n_dtauint[e_name][0] = 0.0
 280
 281            def _compute_drho(i):
 282                tmp = (self.e_rho[e_name][i + 1:w_max]
 283                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
 284                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 285                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 286                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 287
 288            if self.tau_exp[e_name] > 0:
 289                _compute_drho(1)
 290                texp = self.tau_exp[e_name]
 291                # Critical slowing down analysis
 292                if w_max // 2 <= 1:
 293                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 294                for n in range(1, w_max // 2):
 295                    _compute_drho(n + 1)
 296                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 297                        # Bias correction hep-lat/0306017 eq. (49) included
 298                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 299                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 300                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 301                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 302                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 303                        self.e_windowsize[e_name] = n
 304                        break
 305            else:
 306                if self.S[e_name] == 0.0:
 307                    self.e_tauint[e_name] = 0.5
 308                    self.e_dtauint[e_name] = 0.0
 309                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 310                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 311                    self.e_windowsize[e_name] = 0
 312                else:
 313                    # Standard automatic windowing procedure
 314                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 315                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 316                    for n in range(1, w_max):
 317                        if g_w[n - 1] < 0 or n >= w_max - 1:
 318                            _compute_drho(n)
 319                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 320                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 321                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 322                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 323                            self.e_windowsize[e_name] = n
 324                            break
 325
 326            self._dvalue += self.e_dvalue[e_name] ** 2
 327            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 328
 329        for e_name in self.cov_names:
 330            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 331            self.e_ddvalue[e_name] = 0
 332            self._dvalue += self.e_dvalue[e_name]**2
 333
 334        self._dvalue = np.sqrt(self._dvalue)
 335        if self._dvalue == 0.0:
 336            self.ddvalue = 0.0
 337        else:
 338            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 339        return
 340
 341    gm = gamma_method
 342
 343    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 344        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 345           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 346
 347        Parameters
 348        ----------
 349        deltas : list
 350            List of fluctuations
 351        idx : list
 352            List or range of configurations on which the deltas are defined.
 353        shape : int
 354            Number of configurations in idx.
 355        w_max : int
 356            Upper bound for the summation window.
 357        fft : bool
 358            determines whether the fft algorithm is used for the computation
 359            of the autocorrelation function.
 360        gapsize : int
 361            The target distance between two configurations. If longer distances
 362            are found in idx, the data is expanded.
 363        """
 364        gamma = np.zeros(w_max)
 365        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 366        new_shape = len(deltas)
 367        if fft:
 368            max_gamma = min(new_shape, w_max)
 369            # The padding for the fft has to be even
 370            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 371            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 372        else:
 373            for n in range(w_max):
 374                if new_shape - n >= 0:
 375                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 376
 377        return gamma
 378
 379    def details(self, ens_content=True):
 380        """Output detailed properties of the Obs.
 381
 382        Parameters
 383        ----------
 384        ens_content : bool
 385            print details about the ensembles and replica if true.
 386        """
 387        if self.tag is not None:
 388            print("Description:", self.tag)
 389        if not hasattr(self, 'e_dvalue'):
 390            print('Result\t %3.8e' % (self.value))
 391        else:
 392            if self.value == 0.0:
 393                percentage = np.nan
 394            else:
 395                percentage = np.abs(self._dvalue / self.value) * 100
 396            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 397            if len(self.e_names) > 1:
 398                print(' Ensemble errors:')
 399            e_content = self.e_content
 400            for e_name in self.mc_names:
 401                gap = _determine_gap(self, e_content, e_name)
 402
 403                if len(self.e_names) > 1:
 404                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 405                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 406                tau_string += f" in units of {gap} config"
 407                if gap > 1:
 408                    tau_string += "s"
 409                if self.tau_exp[e_name] > 0:
 410                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 411                else:
 412                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 413                print(tau_string)
 414            for e_name in self.cov_names:
 415                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 416        if ens_content is True:
 417            if len(self.e_names) == 1:
 418                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 419            else:
 420                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 421            my_string_list = []
 422            for key, value in sorted(self.e_content.items()):
 423                if key not in self.covobs:
 424                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 425                    if len(value) == 1:
 426                        my_string += f': {self.shape[value[0]]} configurations'
 427                        if isinstance(self.idl[value[0]], range):
 428                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 429                        else:
 430                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 431                    else:
 432                        sublist = []
 433                        for v in value:
 434                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 435                            my_substring += f': {self.shape[v]} configurations'
 436                            if isinstance(self.idl[v], range):
 437                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 438                            else:
 439                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 440                            sublist.append(my_substring)
 441
 442                        my_string += '\n' + '\n'.join(sublist)
 443                else:
 444                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 445                my_string_list.append(my_string)
 446            print('\n'.join(my_string_list))
 447
 448    def reweight(self, weight):
 449        """Reweight the obs with given rewighting factors.
 450
 451        Parameters
 452        ----------
 453        weight : Obs
 454            Reweighting factor. An Observable that has to be defined on a superset of the
 455            configurations in obs[i].idl for all i.
 456        all_configs : bool
 457            if True, the reweighted observables are normalized by the average of
 458            the reweighting factor on all configurations in weight.idl and not
 459            on the configurations in obs[i].idl. Default False.
 460        """
 461        return reweight(weight, [self])[0]
 462
 463    def is_zero_within_error(self, sigma=1):
 464        """Checks whether the observable is zero within 'sigma' standard errors.
 465
 466        Parameters
 467        ----------
 468        sigma : int
 469            Number of standard errors used for the check.
 470
 471        Works only properly when the gamma method was run.
 472        """
 473        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 474
 475    def is_zero(self, atol=1e-10):
 476        """Checks whether the observable is zero within a given tolerance.
 477
 478        Parameters
 479        ----------
 480        atol : float
 481            Absolute tolerance (for details see numpy documentation).
 482        """
 483        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 484
 485    def plot_tauint(self, save=None):
 486        """Plot integrated autocorrelation time for each ensemble.
 487
 488        Parameters
 489        ----------
 490        save : str
 491            saves the figure to a file named 'save' if.
 492        """
 493        if not hasattr(self, 'e_dvalue'):
 494            raise Exception('Run the gamma method first.')
 495
 496        for e, e_name in enumerate(self.mc_names):
 497            fig = plt.figure()
 498            plt.xlabel(r'$W$')
 499            plt.ylabel(r'$\tau_\mathrm{int}$')
 500            length = int(len(self.e_n_tauint[e_name]))
 501            if self.tau_exp[e_name] > 0:
 502                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 503                x_help = np.arange(2 * self.tau_exp[e_name])
 504                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 505                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 506                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 507                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 508                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 509                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 510                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 511            else:
 512                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 513                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 514
 515            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 516            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 517            plt.legend()
 518            plt.xlim(-0.5, xmax)
 519            ylim = plt.ylim()
 520            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 521            plt.draw()
 522            if save:
 523                fig.savefig(save + "_" + str(e))
 524
 525    def plot_rho(self, save=None):
 526        """Plot normalized autocorrelation function time for each ensemble.
 527
 528        Parameters
 529        ----------
 530        save : str
 531            saves the figure to a file named 'save' if.
 532        """
 533        if not hasattr(self, 'e_dvalue'):
 534            raise Exception('Run the gamma method first.')
 535        for e, e_name in enumerate(self.mc_names):
 536            fig = plt.figure()
 537            plt.xlabel('W')
 538            plt.ylabel('rho')
 539            length = int(len(self.e_drho[e_name]))
 540            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 541            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 542            if self.tau_exp[e_name] > 0:
 543                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 544                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 545                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 546                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 547            else:
 548                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 549                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 550            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 551            plt.xlim(-0.5, xmax)
 552            plt.draw()
 553            if save:
 554                fig.savefig(save + "_" + str(e))
 555
 556    def plot_rep_dist(self):
 557        """Plot replica distribution for each ensemble with more than one replicum."""
 558        if not hasattr(self, 'e_dvalue'):
 559            raise Exception('Run the gamma method first.')
 560        for e, e_name in enumerate(self.mc_names):
 561            if len(self.e_content[e_name]) == 1:
 562                print('No replica distribution for a single replicum (', e_name, ')')
 563                continue
 564            r_length = []
 565            sub_r_mean = 0
 566            for r, r_name in enumerate(self.e_content[e_name]):
 567                r_length.append(len(self.deltas[r_name]))
 568                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 569            e_N = np.sum(r_length)
 570            sub_r_mean /= e_N
 571            arr = np.zeros(len(self.e_content[e_name]))
 572            for r, r_name in enumerate(self.e_content[e_name]):
 573                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 574            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 575            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 576            plt.draw()
 577
 578    def plot_history(self, expand=True):
 579        """Plot derived Monte Carlo history for each ensemble
 580
 581        Parameters
 582        ----------
 583        expand : bool
 584            show expanded history for irregular Monte Carlo chains (default: True).
 585        """
 586        for e, e_name in enumerate(self.mc_names):
 587            plt.figure()
 588            r_length = []
 589            tmp = []
 590            tmp_expanded = []
 591            for r, r_name in enumerate(self.e_content[e_name]):
 592                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 593                if expand:
 594                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 595                    r_length.append(len(tmp_expanded[-1]))
 596                else:
 597                    r_length.append(len(tmp[-1]))
 598            e_N = np.sum(r_length)
 599            x = np.arange(e_N)
 600            y_test = np.concatenate(tmp, axis=0)
 601            if expand:
 602                y = np.concatenate(tmp_expanded, axis=0)
 603            else:
 604                y = y_test
 605            plt.errorbar(x, y, fmt='.', markersize=3)
 606            plt.xlim(-0.5, e_N - 0.5)
 607            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 608            plt.draw()
 609
 610    def plot_piechart(self, save=None):
 611        """Plot piechart which shows the fractional contribution of each
 612        ensemble to the error and returns a dictionary containing the fractions.
 613
 614        Parameters
 615        ----------
 616        save : str
 617            saves the figure to a file named 'save' if.
 618        """
 619        if not hasattr(self, 'e_dvalue'):
 620            raise Exception('Run the gamma method first.')
 621        if np.isclose(0.0, self._dvalue, atol=1e-15):
 622            raise Exception('Error is 0.0')
 623        labels = self.e_names
 624        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 625        fig1, ax1 = plt.subplots()
 626        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 627        ax1.axis('equal')
 628        plt.draw()
 629        if save:
 630            fig1.savefig(save)
 631
 632        return dict(zip(self.e_names, sizes))
 633
 634    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 635        """Dump the Obs to a file 'name' of chosen format.
 636
 637        Parameters
 638        ----------
 639        filename : str
 640            name of the file to be saved.
 641        datatype : str
 642            Format of the exported file. Supported formats include
 643            "json.gz" and "pickle"
 644        description : str
 645            Description for output file, only relevant for json.gz format.
 646        path : str
 647            specifies a custom path for the file (default '.')
 648        """
 649        if 'path' in kwargs:
 650            file_name = kwargs.get('path') + '/' + filename
 651        else:
 652            file_name = filename
 653
 654        if datatype == "json.gz":
 655            from .input.json import dump_to_json
 656            dump_to_json([self], file_name, description=description)
 657        elif datatype == "pickle":
 658            with open(file_name + '.p', 'wb') as fb:
 659                pickle.dump(self, fb)
 660        else:
 661            raise Exception("Unknown datatype " + str(datatype))
 662
 663    def export_jackknife(self):
 664        """Export jackknife samples from the Obs
 665
 666        Returns
 667        -------
 668        numpy.ndarray
 669            Returns a numpy array of length N + 1 where N is the number of samples
 670            for the given ensemble and replicum. The zeroth entry of the array contains
 671            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 672            derived from the Obs. The current implementation only works for observables
 673            defined on exactly one ensemble and replicum. The derived jackknife samples
 674            should agree with samples from a full jackknife analysis up to O(1/N).
 675        """
 676
 677        if len(self.names) != 1:
 678            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 679
 680        name = self.names[0]
 681        full_data = self.deltas[name] + self.r_values[name]
 682        n = full_data.size
 683        mean = self.value
 684        tmp_jacks = np.zeros(n + 1)
 685        tmp_jacks[0] = mean
 686        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 687        return tmp_jacks
 688
 689    def __float__(self):
 690        return float(self.value)
 691
 692    def __repr__(self):
 693        return 'Obs[' + str(self) + ']'
 694
 695    def __str__(self):
 696        return _format_uncertainty(self.value, self._dvalue)
 697
 698    def __hash__(self):
 699        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 700        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 701        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 702        hash_tuple += tuple([o.encode() for o in self.names])
 703        m = hashlib.md5()
 704        [m.update(o) for o in hash_tuple]
 705        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 706
 707    # Overload comparisons
 708    def __lt__(self, other):
 709        return self.value < other
 710
 711    def __le__(self, other):
 712        return self.value <= other
 713
 714    def __gt__(self, other):
 715        return self.value > other
 716
 717    def __ge__(self, other):
 718        return self.value >= other
 719
 720    def __eq__(self, other):
 721        return (self - other).is_zero()
 722
 723    def __ne__(self, other):
 724        return not (self - other).is_zero()
 725
 726    # Overload math operations
 727    def __add__(self, y):
 728        if isinstance(y, Obs):
 729            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 730        else:
 731            if isinstance(y, np.ndarray):
 732                return np.array([self + o for o in y])
 733            elif y.__class__.__name__ in ['Corr', 'CObs']:
 734                return NotImplemented
 735            else:
 736                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 737
 738    def __radd__(self, y):
 739        return self + y
 740
 741    def __mul__(self, y):
 742        if isinstance(y, Obs):
 743            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 744        else:
 745            if isinstance(y, np.ndarray):
 746                return np.array([self * o for o in y])
 747            elif isinstance(y, complex):
 748                return CObs(self * y.real, self * y.imag)
 749            elif y.__class__.__name__ in ['Corr', 'CObs']:
 750                return NotImplemented
 751            else:
 752                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 753
 754    def __rmul__(self, y):
 755        return self * y
 756
 757    def __sub__(self, y):
 758        if isinstance(y, Obs):
 759            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 760        else:
 761            if isinstance(y, np.ndarray):
 762                return np.array([self - o for o in y])
 763            elif y.__class__.__name__ in ['Corr', 'CObs']:
 764                return NotImplemented
 765            else:
 766                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 767
 768    def __rsub__(self, y):
 769        return -1 * (self - y)
 770
 771    def __pos__(self):
 772        return self
 773
 774    def __neg__(self):
 775        return -1 * self
 776
 777    def __truediv__(self, y):
 778        if isinstance(y, Obs):
 779            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 780        else:
 781            if isinstance(y, np.ndarray):
 782                return np.array([self / o for o in y])
 783            elif y.__class__.__name__ in ['Corr', 'CObs']:
 784                return NotImplemented
 785            else:
 786                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 787
 788    def __rtruediv__(self, y):
 789        if isinstance(y, Obs):
 790            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 791        else:
 792            if isinstance(y, np.ndarray):
 793                return np.array([o / self for o in y])
 794            elif y.__class__.__name__ in ['Corr', 'CObs']:
 795                return NotImplemented
 796            else:
 797                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 798
 799    def __pow__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 802        else:
 803            return derived_observable(lambda x: x[0] ** y, [self])
 804
 805    def __rpow__(self, y):
 806        if isinstance(y, Obs):
 807            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 808        else:
 809            return derived_observable(lambda x: y ** x[0], [self])
 810
 811    def __abs__(self):
 812        return derived_observable(lambda x: anp.abs(x[0]), [self])
 813
 814    # Overload numpy functions
 815    def sqrt(self):
 816        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 817
 818    def log(self):
 819        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 820
 821    def exp(self):
 822        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 823
 824    def sin(self):
 825        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 826
 827    def cos(self):
 828        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 829
 830    def tan(self):
 831        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 832
 833    def arcsin(self):
 834        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 835
 836    def arccos(self):
 837        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 838
 839    def arctan(self):
 840        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 841
 842    def sinh(self):
 843        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 844
 845    def cosh(self):
 846        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 847
 848    def tanh(self):
 849        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 850
 851    def arcsinh(self):
 852        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 853
 854    def arccosh(self):
 855        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 856
 857    def arctanh(self):
 858        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 859
 860
 861class CObs:
 862    """Class for a complex valued observable."""
 863    __slots__ = ['_real', '_imag', 'tag']
 864
 865    def __init__(self, real, imag=0.0):
 866        self._real = real
 867        self._imag = imag
 868        self.tag = None
 869
 870    @property
 871    def real(self):
 872        return self._real
 873
 874    @property
 875    def imag(self):
 876        return self._imag
 877
 878    def gamma_method(self, **kwargs):
 879        """Executes the gamma_method for the real and the imaginary part."""
 880        if isinstance(self.real, Obs):
 881            self.real.gamma_method(**kwargs)
 882        if isinstance(self.imag, Obs):
 883            self.imag.gamma_method(**kwargs)
 884
 885    def is_zero(self):
 886        """Checks whether both real and imaginary part are zero within machine precision."""
 887        return self.real == 0.0 and self.imag == 0.0
 888
 889    def conjugate(self):
 890        return CObs(self.real, -self.imag)
 891
 892    def __add__(self, other):
 893        if isinstance(other, np.ndarray):
 894            return other + self
 895        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 896            return CObs(self.real + other.real,
 897                        self.imag + other.imag)
 898        else:
 899            return CObs(self.real + other, self.imag)
 900
 901    def __radd__(self, y):
 902        return self + y
 903
 904    def __sub__(self, other):
 905        if isinstance(other, np.ndarray):
 906            return -1 * (other - self)
 907        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 908            return CObs(self.real - other.real, self.imag - other.imag)
 909        else:
 910            return CObs(self.real - other, self.imag)
 911
 912    def __rsub__(self, other):
 913        return -1 * (self - other)
 914
 915    def __mul__(self, other):
 916        if isinstance(other, np.ndarray):
 917            return other * self
 918        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 919            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 920                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 921                                               [self.real, other.real, self.imag, other.imag],
 922                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 923                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 924                                               [self.real, other.real, self.imag, other.imag],
 925                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 926            elif getattr(other, 'imag', 0) != 0:
 927                return CObs(self.real * other.real - self.imag * other.imag,
 928                            self.imag * other.real + self.real * other.imag)
 929            else:
 930                return CObs(self.real * other.real, self.imag * other.real)
 931        else:
 932            return CObs(self.real * other, self.imag * other)
 933
 934    def __rmul__(self, other):
 935        return self * other
 936
 937    def __truediv__(self, other):
 938        if isinstance(other, np.ndarray):
 939            return 1 / (other / self)
 940        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 941            r = other.real ** 2 + other.imag ** 2
 942            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 943        else:
 944            return CObs(self.real / other, self.imag / other)
 945
 946    def __rtruediv__(self, other):
 947        r = self.real ** 2 + self.imag ** 2
 948        if hasattr(other, 'real') and hasattr(other, 'imag'):
 949            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 950        else:
 951            return CObs(self.real * other / r, -self.imag * other / r)
 952
 953    def __abs__(self):
 954        return np.sqrt(self.real**2 + self.imag**2)
 955
 956    def __pos__(self):
 957        return self
 958
 959    def __neg__(self):
 960        return -1 * self
 961
 962    def __eq__(self, other):
 963        return self.real == other.real and self.imag == other.imag
 964
 965    def __str__(self):
 966        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 967
 968    def __repr__(self):
 969        return 'CObs[' + str(self) + ']'
 970
 971
 972def _format_uncertainty(value, dvalue):
 973    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 974    if dvalue == 0.0:
 975        return str(value)
 976    fexp = np.floor(np.log10(dvalue))
 977    if fexp < 0.0:
 978        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 979    elif fexp == 0.0:
 980        return '{:.1f}({:1.1f})'.format(value, dvalue)
 981    else:
 982        return '{:.0f}({:2.0f})'.format(value, dvalue)
 983
 984
 985def _expand_deltas(deltas, idx, shape, gapsize):
 986    """Expand deltas defined on idx to a regular range with spacing gapsize between two
 987       configurations and where holes are filled by 0.
 988       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
 989
 990    Parameters
 991    ----------
 992    deltas : list
 993        List of fluctuations
 994    idx : list
 995        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
 996    shape : int
 997        Number of configs in idx.
 998    gapsize : int
 999        The target distance between two configurations. If longer distances
1000        are found in idx, the data is expanded.
1001    """
1002    if isinstance(idx, range):
1003        if (idx.step == gapsize):
1004            return deltas
1005    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1006    for i in range(shape):
1007        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1008    return ret
1009
1010
1011def _merge_idx(idl):
1012    """Returns the union of all lists in idl as sorted list
1013
1014    Parameters
1015    ----------
1016    idl : list
1017        List of lists or ranges.
1018    """
1019
1020    # Use groupby to efficiently check whether all elements of idl are identical
1021    try:
1022        g = groupby(idl)
1023        if next(g, True) and not next(g, False):
1024            return idl[0]
1025    except Exception:
1026        pass
1027
1028    if np.all([type(idx) is range for idx in idl]):
1029        if len(set([idx[0] for idx in idl])) == 1:
1030            idstart = min([idx.start for idx in idl])
1031            idstop = max([idx.stop for idx in idl])
1032            idstep = min([idx.step for idx in idl])
1033            return range(idstart, idstop, idstep)
1034
1035    return sorted(set().union(*idl))
1036
1037
1038def _intersection_idx(idl):
1039    """Returns the intersection of all lists in idl as sorted list
1040
1041    Parameters
1042    ----------
1043    idl : list
1044        List of lists or ranges.
1045    """
1046
1047    def _lcm(*args):
1048        """Returns the lowest common multiple of args.
1049
1050        From python 3.9 onwards the math library contains an lcm function."""
1051        return reduce(lambda a, b: a * b // gcd(a, b), args)
1052
1053    # Use groupby to efficiently check whether all elements of idl are identical
1054    try:
1055        g = groupby(idl)
1056        if next(g, True) and not next(g, False):
1057            return idl[0]
1058    except Exception:
1059        pass
1060
1061    if np.all([type(idx) is range for idx in idl]):
1062        if len(set([idx[0] for idx in idl])) == 1:
1063            idstart = max([idx.start for idx in idl])
1064            idstop = min([idx.stop for idx in idl])
1065            idstep = _lcm(*[idx.step for idx in idl])
1066            return range(idstart, idstop, idstep)
1067
1068    return sorted(set.intersection(*[set(o) for o in idl]))
1069
1070
1071def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1072    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1073       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1074       common divisor of the step sizes is used as new step size.
1075
1076    Parameters
1077    ----------
1078    deltas : list
1079        List of fluctuations
1080    idx : list
1081        List or range of configs on which the deltas are defined.
1082        Has to be a subset of new_idx and has to be sorted in ascending order.
1083    shape : list
1084        Number of configs in idx.
1085    new_idx : list
1086        List of configs that defines the new range, has to be sorted in ascending order.
1087    """
1088
1089    if type(idx) is range and type(new_idx) is range:
1090        if idx == new_idx:
1091            return deltas
1092    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1093    for i in range(shape):
1094        ret[idx[i] - new_idx[0]] = deltas[i]
1095    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1096
1097
1098def derived_observable(func, data, array_mode=False, **kwargs):
1099    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1100
1101    Parameters
1102    ----------
1103    func : object
1104        arbitrary function of the form func(data, **kwargs). For the
1105        automatic differentiation to work, all numpy functions have to have
1106        the autograd wrapper (use 'import autograd.numpy as anp').
1107    data : list
1108        list of Obs, e.g. [obs1, obs2, obs3].
1109    num_grad : bool
1110        if True, numerical derivatives are used instead of autograd
1111        (default False). To control the numerical differentiation the
1112        kwargs of numdifftools.step_generators.MaxStepGenerator
1113        can be used.
1114    man_grad : list
1115        manually supply a list or an array which contains the jacobian
1116        of func. Use cautiously, supplying the wrong derivative will
1117        not be intercepted.
1118
1119    Notes
1120    -----
1121    For simple mathematical operations it can be practical to use anonymous
1122    functions. For the ratio of two observables one can e.g. use
1123
1124    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1125    """
1126
1127    data = np.asarray(data)
1128    raveled_data = data.ravel()
1129
1130    # Workaround for matrix operations containing non Obs data
1131    if not all(isinstance(x, Obs) for x in raveled_data):
1132        for i in range(len(raveled_data)):
1133            if isinstance(raveled_data[i], (int, float)):
1134                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1135
1136    allcov = {}
1137    for o in raveled_data:
1138        for name in o.cov_names:
1139            if name in allcov:
1140                if not np.allclose(allcov[name], o.covobs[name].cov):
1141                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1142            else:
1143                allcov[name] = o.covobs[name].cov
1144
1145    n_obs = len(raveled_data)
1146    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1147    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1148    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1149
1150    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1151
1152    if data.ndim == 1:
1153        values = np.array([o.value for o in data])
1154    else:
1155        values = np.vectorize(lambda x: x.value)(data)
1156
1157    new_values = func(values, **kwargs)
1158
1159    multi = int(isinstance(new_values, np.ndarray))
1160
1161    new_r_values = {}
1162    new_idl_d = {}
1163    for name in new_sample_names:
1164        idl = []
1165        tmp_values = np.zeros(n_obs)
1166        for i, item in enumerate(raveled_data):
1167            tmp_values[i] = item.r_values.get(name, item.value)
1168            tmp_idl = item.idl.get(name)
1169            if tmp_idl is not None:
1170                idl.append(tmp_idl)
1171        if multi > 0:
1172            tmp_values = np.array(tmp_values).reshape(data.shape)
1173        new_r_values[name] = func(tmp_values, **kwargs)
1174        new_idl_d[name] = _merge_idx(idl)
1175
1176    if 'man_grad' in kwargs:
1177        deriv = np.asarray(kwargs.get('man_grad'))
1178        if new_values.shape + data.shape != deriv.shape:
1179            raise Exception('Manual derivative does not have correct shape.')
1180    elif kwargs.get('num_grad') is True:
1181        if multi > 0:
1182            raise Exception('Multi mode currently not supported for numerical derivative')
1183        options = {
1184            'base_step': 0.1,
1185            'step_ratio': 2.5}
1186        for key in options.keys():
1187            kwarg = kwargs.get(key)
1188            if kwarg is not None:
1189                options[key] = kwarg
1190        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1191        if tmp_df.size == 1:
1192            deriv = np.array([tmp_df.real])
1193        else:
1194            deriv = tmp_df.real
1195    else:
1196        deriv = jacobian(func)(values, **kwargs)
1197
1198    final_result = np.zeros(new_values.shape, dtype=object)
1199
1200    if array_mode is True:
1201
1202        class _Zero_grad():
1203            def __init__(self, N):
1204                self.grad = np.zeros((N, 1))
1205
1206        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1207        d_extracted = {}
1208        g_extracted = {}
1209        for name in new_sample_names:
1210            d_extracted[name] = []
1211            ens_length = len(new_idl_d[name])
1212            for i_dat, dat in enumerate(data):
1213                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1214        for name in new_cov_names:
1215            g_extracted[name] = []
1216            zero_grad = _Zero_grad(new_covobs_lengths[name])
1217            for i_dat, dat in enumerate(data):
1218                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1219
1220    for i_val, new_val in np.ndenumerate(new_values):
1221        new_deltas = {}
1222        new_grad = {}
1223        if array_mode is True:
1224            for name in new_sample_names:
1225                ens_length = d_extracted[name][0].shape[-1]
1226                new_deltas[name] = np.zeros(ens_length)
1227                for i_dat, dat in enumerate(d_extracted[name]):
1228                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1229            for name in new_cov_names:
1230                new_grad[name] = 0
1231                for i_dat, dat in enumerate(g_extracted[name]):
1232                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1233        else:
1234            for j_obs, obs in np.ndenumerate(data):
1235                for name in obs.names:
1236                    if name in obs.cov_names:
1237                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1238                    else:
1239                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1240
1241        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1242
1243        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1244            raise Exception('The same name has been used for deltas and covobs!')
1245        new_samples = []
1246        new_means = []
1247        new_idl = []
1248        new_names_obs = []
1249        for name in new_names:
1250            if name not in new_covobs:
1251                new_samples.append(new_deltas[name])
1252                new_idl.append(new_idl_d[name])
1253                new_means.append(new_r_values[name][i_val])
1254                new_names_obs.append(name)
1255        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1256        for name in new_covobs:
1257            final_result[i_val].names.append(name)
1258        final_result[i_val]._covobs = new_covobs
1259        final_result[i_val]._value = new_val
1260        final_result[i_val].reweighted = reweighted
1261
1262    if multi == 0:
1263        final_result = final_result.item()
1264
1265    return final_result
1266
1267
1268def _reduce_deltas(deltas, idx_old, idx_new):
1269    """Extract deltas defined on idx_old on all configs of idx_new.
1270
1271    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1272    are ordered in an ascending order.
1273
1274    Parameters
1275    ----------
1276    deltas : list
1277        List of fluctuations
1278    idx_old : list
1279        List or range of configs on which the deltas are defined
1280    idx_new : list
1281        List of configs for which we want to extract the deltas.
1282        Has to be a subset of idx_old.
1283    """
1284    if not len(deltas) == len(idx_old):
1285        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1286    if type(idx_old) is range and type(idx_new) is range:
1287        if idx_old == idx_new:
1288            return deltas
1289    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1290    try:
1291        g = groupby([idx_old, idx_new])
1292        if next(g, True) and not next(g, False):
1293            return deltas
1294    except Exception:
1295        pass
1296    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1297    if len(indices) < len(idx_new):
1298        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1299    return np.array(deltas)[indices]
1300
1301
1302def reweight(weight, obs, **kwargs):
1303    """Reweight a list of observables.
1304
1305    Parameters
1306    ----------
1307    weight : Obs
1308        Reweighting factor. An Observable that has to be defined on a superset of the
1309        configurations in obs[i].idl for all i.
1310    obs : list
1311        list of Obs, e.g. [obs1, obs2, obs3].
1312    all_configs : bool
1313        if True, the reweighted observables are normalized by the average of
1314        the reweighting factor on all configurations in weight.idl and not
1315        on the configurations in obs[i].idl. Default False.
1316    """
1317    result = []
1318    for i in range(len(obs)):
1319        if len(obs[i].cov_names):
1320            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1321        if not set(obs[i].names).issubset(weight.names):
1322            raise Exception('Error: Ensembles do not fit')
1323        for name in obs[i].names:
1324            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1325                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1326        new_samples = []
1327        w_deltas = {}
1328        for name in sorted(obs[i].names):
1329            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1330            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1331        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1332
1333        if kwargs.get('all_configs'):
1334            new_weight = weight
1335        else:
1336            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1337
1338        result.append(tmp_obs / new_weight)
1339        result[-1].reweighted = True
1340
1341    return result
1342
1343
1344def correlate(obs_a, obs_b):
1345    """Correlate two observables.
1346
1347    Parameters
1348    ----------
1349    obs_a : Obs
1350        First observable
1351    obs_b : Obs
1352        Second observable
1353
1354    Notes
1355    -----
1356    Keep in mind to only correlate primary observables which have not been reweighted
1357    yet. The reweighting has to be applied after correlating the observables.
1358    Currently only works if ensembles are identical (this is not strictly necessary).
1359    """
1360
1361    if sorted(obs_a.names) != sorted(obs_b.names):
1362        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1363    if len(obs_a.cov_names) or len(obs_b.cov_names):
1364        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1365    for name in obs_a.names:
1366        if obs_a.shape[name] != obs_b.shape[name]:
1367            raise Exception('Shapes of ensemble', name, 'do not fit')
1368        if obs_a.idl[name] != obs_b.idl[name]:
1369            raise Exception('idl of ensemble', name, 'do not fit')
1370
1371    if obs_a.reweighted is True:
1372        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1373    if obs_b.reweighted is True:
1374        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1375
1376    new_samples = []
1377    new_idl = []
1378    for name in sorted(obs_a.names):
1379        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1380        new_idl.append(obs_a.idl[name])
1381
1382    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1383    o.reweighted = obs_a.reweighted or obs_b.reweighted
1384    return o
1385
1386
1387def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1388    r'''Calculates the error covariance matrix of a set of observables.
1389
1390    WARNING: This function should be used with care, especially for observables with support on multiple
1391             ensembles with differing autocorrelations. See the notes below for details.
1392
1393    The gamma method has to be applied first to all observables.
1394
1395    Parameters
1396    ----------
1397    obs : list or numpy.ndarray
1398        List or one dimensional array of Obs
1399    visualize : bool
1400        If True plots the corresponding normalized correlation matrix (default False).
1401    correlation : bool
1402        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1403    smooth : None or int
1404        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1405        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1406        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1407        small ones.
1408
1409    Notes
1410    -----
1411    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1412    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1413    in the absence of autocorrelation.
1414    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1415    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1416    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1417    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1418    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1419    '''
1420
1421    length = len(obs)
1422
1423    max_samples = np.max([o.N for o in obs])
1424    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1425        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1426
1427    cov = np.zeros((length, length))
1428    for i in range(length):
1429        for j in range(i, length):
1430            cov[i, j] = _covariance_element(obs[i], obs[j])
1431    cov = cov + cov.T - np.diag(np.diag(cov))
1432
1433    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1434
1435    if isinstance(smooth, int):
1436        corr = _smooth_eigenvalues(corr, smooth)
1437
1438    if visualize:
1439        plt.matshow(corr, vmin=-1, vmax=1)
1440        plt.set_cmap('RdBu')
1441        plt.colorbar()
1442        plt.draw()
1443
1444    if correlation is True:
1445        return corr
1446
1447    errors = [o.dvalue for o in obs]
1448    cov = np.diag(errors) @ corr @ np.diag(errors)
1449
1450    eigenvalues = np.linalg.eigh(cov)[0]
1451    if not np.all(eigenvalues >= 0):
1452        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1453
1454    return cov
1455
1456
1457def _smooth_eigenvalues(corr, E):
1458    """Eigenvalue smoothing as described in hep-lat/9412087
1459
1460    corr : np.ndarray
1461        correlation matrix
1462    E : integer
1463        Number of eigenvalues to be left substantially unchanged
1464    """
1465    if not (2 < E < corr.shape[0] - 1):
1466        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1467    vals, vec = np.linalg.eigh(corr)
1468    lambda_min = np.mean(vals[:-E])
1469    vals[vals < lambda_min] = lambda_min
1470    vals /= np.mean(vals)
1471    return vec @ np.diag(vals) @ vec.T
1472
1473
1474def _covariance_element(obs1, obs2):
1475    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1476
1477    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1478        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1479        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1480        return np.sum(deltas1 * deltas2)
1481
1482    if set(obs1.names).isdisjoint(set(obs2.names)):
1483        return 0.0
1484
1485    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1486        raise Exception('The gamma method has to be applied to both Obs first.')
1487
1488    dvalue = 0.0
1489
1490    for e_name in obs1.mc_names:
1491
1492        if e_name not in obs2.mc_names:
1493            continue
1494
1495        idl_d = {}
1496        for r_name in obs1.e_content[e_name]:
1497            if r_name not in obs2.e_content[e_name]:
1498                continue
1499            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1500
1501        gamma = 0.0
1502
1503        for r_name in obs1.e_content[e_name]:
1504            if r_name not in obs2.e_content[e_name]:
1505                continue
1506            if len(idl_d[r_name]) == 0:
1507                continue
1508            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1509
1510        if gamma == 0.0:
1511            continue
1512
1513        gamma_div = 0.0
1514        for r_name in obs1.e_content[e_name]:
1515            if r_name not in obs2.e_content[e_name]:
1516                continue
1517            if len(idl_d[r_name]) == 0:
1518                continue
1519            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1520        gamma /= gamma_div
1521
1522        dvalue += gamma
1523
1524    for e_name in obs1.cov_names:
1525
1526        if e_name not in obs2.cov_names:
1527            continue
1528
1529        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1530
1531    return dvalue
1532
1533
1534def import_jackknife(jacks, name, idl=None):
1535    """Imports jackknife samples and returns an Obs
1536
1537    Parameters
1538    ----------
1539    jacks : numpy.ndarray
1540        numpy array containing the mean value as zeroth entry and
1541        the N jackknife samples as first to Nth entry.
1542    name : str
1543        name of the ensemble the samples are defined on.
1544    """
1545    length = len(jacks) - 1
1546    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1547    samples = jacks[1:] @ prj
1548    mean = np.mean(samples)
1549    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1550    new_obs._value = jacks[0]
1551    return new_obs
1552
1553
1554def merge_obs(list_of_obs):
1555    """Combine all observables in list_of_obs into one new observable
1556
1557    Parameters
1558    ----------
1559    list_of_obs : list
1560        list of the Obs object to be combined
1561
1562    Notes
1563    -----
1564    It is not possible to combine obs which are based on the same replicum
1565    """
1566    replist = [item for obs in list_of_obs for item in obs.names]
1567    if (len(replist) == len(set(replist))) is False:
1568        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1569    if any([len(o.cov_names) for o in list_of_obs]):
1570        raise Exception('Not possible to merge data that contains covobs!')
1571    new_dict = {}
1572    idl_dict = {}
1573    for o in list_of_obs:
1574        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1575                        for key in set(o.deltas) | set(o.r_values)})
1576        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1577
1578    names = sorted(new_dict.keys())
1579    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1580    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1581    return o
1582
1583
1584def cov_Obs(means, cov, name, grad=None):
1585    """Create an Obs based on mean(s) and a covariance matrix
1586
1587    Parameters
1588    ----------
1589    mean : list of floats or float
1590        N mean value(s) of the new Obs
1591    cov : list or array
1592        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1593    name : str
1594        identifier for the covariance matrix
1595    grad : list or array
1596        Gradient of the Covobs wrt. the means belonging to cov.
1597    """
1598
1599    def covobs_to_obs(co):
1600        """Make an Obs out of a Covobs
1601
1602        Parameters
1603        ----------
1604        co : Covobs
1605            Covobs to be embedded into the Obs
1606        """
1607        o = Obs([], [], means=[])
1608        o._value = co.value
1609        o.names.append(co.name)
1610        o._covobs[co.name] = co
1611        o._dvalue = np.sqrt(co.errsq())
1612        return o
1613
1614    ol = []
1615    if isinstance(means, (float, int)):
1616        means = [means]
1617
1618    for i in range(len(means)):
1619        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1620    if ol[0].covobs[name].N != len(means):
1621        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1622    if len(ol) == 1:
1623        return ol[0]
1624    return ol
1625
1626
1627def _determine_gap(o, e_content, e_name):
1628    gaps = []
1629    for r_name in e_content[e_name]:
1630        if isinstance(o.idl[r_name], range):
1631            gaps.append(o.idl[r_name].step)
1632        else:
1633            gaps.append(np.min(np.diff(o.idl[r_name])))
1634
1635    gap = min(gaps)
1636    if not np.all([gi % gap == 0 for gi in gaps]):
1637        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1638
1639    return gap
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None
142
143    @property
144    def value(self):
145        return self._value
146
147    @property
148    def dvalue(self):
149        return self._dvalue
150
151    @property
152    def e_names(self):
153        return sorted(set([o.split('|')[0] for o in self.names]))
154
155    @property
156    def cov_names(self):
157        return sorted(set([o for o in self.covobs.keys()]))
158
159    @property
160    def mc_names(self):
161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
162
163    @property
164    def e_content(self):
165        res = {}
166        for e, e_name in enumerate(self.e_names):
167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
168            if e_name in self.names:
169                res[e_name].append(e_name)
170        return res
171
172    @property
173    def covobs(self):
174        return self._covobs
175
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return
341
342    gm = gamma_method
343
344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
347
348        Parameters
349        ----------
350        deltas : list
351            List of fluctuations
352        idx : list
353            List or range of configurations on which the deltas are defined.
354        shape : int
355            Number of configurations in idx.
356        w_max : int
357            Upper bound for the summation window.
358        fft : bool
359            determines whether the fft algorithm is used for the computation
360            of the autocorrelation function.
361        gapsize : int
362            The target distance between two configurations. If longer distances
363            are found in idx, the data is expanded.
364        """
365        gamma = np.zeros(w_max)
366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
367        new_shape = len(deltas)
368        if fft:
369            max_gamma = min(new_shape, w_max)
370            # The padding for the fft has to be even
371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
373        else:
374            for n in range(w_max):
375                if new_shape - n >= 0:
376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
377
378        return gamma
379
380    def details(self, ens_content=True):
381        """Output detailed properties of the Obs.
382
383        Parameters
384        ----------
385        ens_content : bool
386            print details about the ensembles and replica if true.
387        """
388        if self.tag is not None:
389            print("Description:", self.tag)
390        if not hasattr(self, 'e_dvalue'):
391            print('Result\t %3.8e' % (self.value))
392        else:
393            if self.value == 0.0:
394                percentage = np.nan
395            else:
396                percentage = np.abs(self._dvalue / self.value) * 100
397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
398            if len(self.e_names) > 1:
399                print(' Ensemble errors:')
400            e_content = self.e_content
401            for e_name in self.mc_names:
402                gap = _determine_gap(self, e_content, e_name)
403
404                if len(self.e_names) > 1:
405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
407                tau_string += f" in units of {gap} config"
408                if gap > 1:
409                    tau_string += "s"
410                if self.tau_exp[e_name] > 0:
411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
412                else:
413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
414                print(tau_string)
415            for e_name in self.cov_names:
416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
417        if ens_content is True:
418            if len(self.e_names) == 1:
419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
420            else:
421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
422            my_string_list = []
423            for key, value in sorted(self.e_content.items()):
424                if key not in self.covobs:
425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
426                    if len(value) == 1:
427                        my_string += f': {self.shape[value[0]]} configurations'
428                        if isinstance(self.idl[value[0]], range):
429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
430                        else:
431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
432                    else:
433                        sublist = []
434                        for v in value:
435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
436                            my_substring += f': {self.shape[v]} configurations'
437                            if isinstance(self.idl[v], range):
438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
439                            else:
440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
441                            sublist.append(my_substring)
442
443                        my_string += '\n' + '\n'.join(sublist)
444                else:
445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
446                my_string_list.append(my_string)
447            print('\n'.join(my_string_list))
448
449    def reweight(self, weight):
450        """Reweight the obs with given rewighting factors.
451
452        Parameters
453        ----------
454        weight : Obs
455            Reweighting factor. An Observable that has to be defined on a superset of the
456            configurations in obs[i].idl for all i.
457        all_configs : bool
458            if True, the reweighted observables are normalized by the average of
459            the reweighting factor on all configurations in weight.idl and not
460            on the configurations in obs[i].idl. Default False.
461        """
462        return reweight(weight, [self])[0]
463
464    def is_zero_within_error(self, sigma=1):
465        """Checks whether the observable is zero within 'sigma' standard errors.
466
467        Parameters
468        ----------
469        sigma : int
470            Number of standard errors used for the check.
471
472        Works only properly when the gamma method was run.
473        """
474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
475
476    def is_zero(self, atol=1e-10):
477        """Checks whether the observable is zero within a given tolerance.
478
479        Parameters
480        ----------
481        atol : float
482            Absolute tolerance (for details see numpy documentation).
483        """
484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
485
486    def plot_tauint(self, save=None):
487        """Plot integrated autocorrelation time for each ensemble.
488
489        Parameters
490        ----------
491        save : str
492            saves the figure to a file named 'save' if.
493        """
494        if not hasattr(self, 'e_dvalue'):
495            raise Exception('Run the gamma method first.')
496
497        for e, e_name in enumerate(self.mc_names):
498            fig = plt.figure()
499            plt.xlabel(r'$W$')
500            plt.ylabel(r'$\tau_\mathrm{int}$')
501            length = int(len(self.e_n_tauint[e_name]))
502            if self.tau_exp[e_name] > 0:
503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
504                x_help = np.arange(2 * self.tau_exp[e_name])
505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
512            else:
513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
515
516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
518            plt.legend()
519            plt.xlim(-0.5, xmax)
520            ylim = plt.ylim()
521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
522            plt.draw()
523            if save:
524                fig.savefig(save + "_" + str(e))
525
526    def plot_rho(self, save=None):
527        """Plot normalized autocorrelation function time for each ensemble.
528
529        Parameters
530        ----------
531        save : str
532            saves the figure to a file named 'save' if.
533        """
534        if not hasattr(self, 'e_dvalue'):
535            raise Exception('Run the gamma method first.')
536        for e, e_name in enumerate(self.mc_names):
537            fig = plt.figure()
538            plt.xlabel('W')
539            plt.ylabel('rho')
540            length = int(len(self.e_drho[e_name]))
541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
543            if self.tau_exp[e_name] > 0:
544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
548            else:
549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
552            plt.xlim(-0.5, xmax)
553            plt.draw()
554            if save:
555                fig.savefig(save + "_" + str(e))
556
557    def plot_rep_dist(self):
558        """Plot replica distribution for each ensemble with more than one replicum."""
559        if not hasattr(self, 'e_dvalue'):
560            raise Exception('Run the gamma method first.')
561        for e, e_name in enumerate(self.mc_names):
562            if len(self.e_content[e_name]) == 1:
563                print('No replica distribution for a single replicum (', e_name, ')')
564                continue
565            r_length = []
566            sub_r_mean = 0
567            for r, r_name in enumerate(self.e_content[e_name]):
568                r_length.append(len(self.deltas[r_name]))
569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
570            e_N = np.sum(r_length)
571            sub_r_mean /= e_N
572            arr = np.zeros(len(self.e_content[e_name]))
573            for r, r_name in enumerate(self.e_content[e_name]):
574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
577            plt.draw()
578
579    def plot_history(self, expand=True):
580        """Plot derived Monte Carlo history for each ensemble
581
582        Parameters
583        ----------
584        expand : bool
585            show expanded history for irregular Monte Carlo chains (default: True).
586        """
587        for e, e_name in enumerate(self.mc_names):
588            plt.figure()
589            r_length = []
590            tmp = []
591            tmp_expanded = []
592            for r, r_name in enumerate(self.e_content[e_name]):
593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
594                if expand:
595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
596                    r_length.append(len(tmp_expanded[-1]))
597                else:
598                    r_length.append(len(tmp[-1]))
599            e_N = np.sum(r_length)
600            x = np.arange(e_N)
601            y_test = np.concatenate(tmp, axis=0)
602            if expand:
603                y = np.concatenate(tmp_expanded, axis=0)
604            else:
605                y = y_test
606            plt.errorbar(x, y, fmt='.', markersize=3)
607            plt.xlim(-0.5, e_N - 0.5)
608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
609            plt.draw()
610
611    def plot_piechart(self, save=None):
612        """Plot piechart which shows the fractional contribution of each
613        ensemble to the error and returns a dictionary containing the fractions.
614
615        Parameters
616        ----------
617        save : str
618            saves the figure to a file named 'save' if.
619        """
620        if not hasattr(self, 'e_dvalue'):
621            raise Exception('Run the gamma method first.')
622        if np.isclose(0.0, self._dvalue, atol=1e-15):
623            raise Exception('Error is 0.0')
624        labels = self.e_names
625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
626        fig1, ax1 = plt.subplots()
627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
628        ax1.axis('equal')
629        plt.draw()
630        if save:
631            fig1.savefig(save)
632
633        return dict(zip(self.e_names, sizes))
634
635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
636        """Dump the Obs to a file 'name' of chosen format.
637
638        Parameters
639        ----------
640        filename : str
641            name of the file to be saved.
642        datatype : str
643            Format of the exported file. Supported formats include
644            "json.gz" and "pickle"
645        description : str
646            Description for output file, only relevant for json.gz format.
647        path : str
648            specifies a custom path for the file (default '.')
649        """
650        if 'path' in kwargs:
651            file_name = kwargs.get('path') + '/' + filename
652        else:
653            file_name = filename
654
655        if datatype == "json.gz":
656            from .input.json import dump_to_json
657            dump_to_json([self], file_name, description=description)
658        elif datatype == "pickle":
659            with open(file_name + '.p', 'wb') as fb:
660                pickle.dump(self, fb)
661        else:
662            raise Exception("Unknown datatype " + str(datatype))
663
664    def export_jackknife(self):
665        """Export jackknife samples from the Obs
666
667        Returns
668        -------
669        numpy.ndarray
670            Returns a numpy array of length N + 1 where N is the number of samples
671            for the given ensemble and replicum. The zeroth entry of the array contains
672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
673            derived from the Obs. The current implementation only works for observables
674            defined on exactly one ensemble and replicum. The derived jackknife samples
675            should agree with samples from a full jackknife analysis up to O(1/N).
676        """
677
678        if len(self.names) != 1:
679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
680
681        name = self.names[0]
682        full_data = self.deltas[name] + self.r_values[name]
683        n = full_data.size
684        mean = self.value
685        tmp_jacks = np.zeros(n + 1)
686        tmp_jacks[0] = mean
687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
688        return tmp_jacks
689
690    def __float__(self):
691        return float(self.value)
692
693    def __repr__(self):
694        return 'Obs[' + str(self) + ']'
695
696    def __str__(self):
697        return _format_uncertainty(self.value, self._dvalue)
698
699    def __hash__(self):
700        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
701        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
702        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
703        hash_tuple += tuple([o.encode() for o in self.names])
704        m = hashlib.md5()
705        [m.update(o) for o in hash_tuple]
706        return int(m.hexdigest(), 16) & 0xFFFFFFFF
707
708    # Overload comparisons
709    def __lt__(self, other):
710        return self.value < other
711
712    def __le__(self, other):
713        return self.value <= other
714
715    def __gt__(self, other):
716        return self.value > other
717
718    def __ge__(self, other):
719        return self.value >= other
720
721    def __eq__(self, other):
722        return (self - other).is_zero()
723
724    def __ne__(self, other):
725        return not (self - other).is_zero()
726
727    # Overload math operations
728    def __add__(self, y):
729        if isinstance(y, Obs):
730            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
731        else:
732            if isinstance(y, np.ndarray):
733                return np.array([self + o for o in y])
734            elif y.__class__.__name__ in ['Corr', 'CObs']:
735                return NotImplemented
736            else:
737                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
738
739    def __radd__(self, y):
740        return self + y
741
742    def __mul__(self, y):
743        if isinstance(y, Obs):
744            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
745        else:
746            if isinstance(y, np.ndarray):
747                return np.array([self * o for o in y])
748            elif isinstance(y, complex):
749                return CObs(self * y.real, self * y.imag)
750            elif y.__class__.__name__ in ['Corr', 'CObs']:
751                return NotImplemented
752            else:
753                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
754
755    def __rmul__(self, y):
756        return self * y
757
758    def __sub__(self, y):
759        if isinstance(y, Obs):
760            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
761        else:
762            if isinstance(y, np.ndarray):
763                return np.array([self - o for o in y])
764            elif y.__class__.__name__ in ['Corr', 'CObs']:
765                return NotImplemented
766            else:
767                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
768
769    def __rsub__(self, y):
770        return -1 * (self - y)
771
772    def __pos__(self):
773        return self
774
775    def __neg__(self):
776        return -1 * self
777
778    def __truediv__(self, y):
779        if isinstance(y, Obs):
780            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
781        else:
782            if isinstance(y, np.ndarray):
783                return np.array([self / o for o in y])
784            elif y.__class__.__name__ in ['Corr', 'CObs']:
785                return NotImplemented
786            else:
787                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
788
789    def __rtruediv__(self, y):
790        if isinstance(y, Obs):
791            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
792        else:
793            if isinstance(y, np.ndarray):
794                return np.array([o / self for o in y])
795            elif y.__class__.__name__ in ['Corr', 'CObs']:
796                return NotImplemented
797            else:
798                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
799
800    def __pow__(self, y):
801        if isinstance(y, Obs):
802            return derived_observable(lambda x: x[0] ** x[1], [self, y])
803        else:
804            return derived_observable(lambda x: x[0] ** y, [self])
805
806    def __rpow__(self, y):
807        if isinstance(y, Obs):
808            return derived_observable(lambda x: x[0] ** x[1], [y, self])
809        else:
810            return derived_observable(lambda x: y ** x[0], [self])
811
812    def __abs__(self):
813        return derived_observable(lambda x: anp.abs(x[0]), [self])
814
815    # Overload numpy functions
816    def sqrt(self):
817        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
818
819    def log(self):
820        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
821
822    def exp(self):
823        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
824
825    def sin(self):
826        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
827
828    def cos(self):
829        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
830
831    def tan(self):
832        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
833
834    def arcsin(self):
835        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
836
837    def arccos(self):
838        return derived_observable(lambda x: anp.arccos(x[0]), [self])
839
840    def arctan(self):
841        return derived_observable(lambda x: anp.arctan(x[0]), [self])
842
843    def sinh(self):
844        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
845
846    def cosh(self):
847        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
848
849    def tanh(self):
850        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
851
852    def arcsinh(self):
853        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
854
855    def arccosh(self):
856        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
857
858    def arctanh(self):
859        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise ValueError('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise ValueError('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise ValueError('Names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise ValueError('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise ValueError("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise TypeError('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            gapsize = _determine_gap(self, e_content, e_name)
243
244            r_length = []
245            for r_name in e_content[e_name]:
246                if isinstance(self.idl[r_name], range):
247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
248                else:
249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
250
251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
252            w_max = max(r_length) // 2
253            e_gamma[e_name] = np.zeros(w_max)
254            self.e_rho[e_name] = np.zeros(w_max)
255            self.e_drho[e_name] = np.zeros(w_max)
256
257            for r_name in e_content[e_name]:
258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
259
260            gamma_div = np.zeros(w_max)
261            for r_name in e_content[e_name]:
262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
263            gamma_div[gamma_div < 1] = 1.0
264            e_gamma[e_name] /= gamma_div[:w_max]
265
266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
267                self.e_tauint[e_name] = 0.5
268                self.e_dtauint[e_name] = 0.0
269                self.e_dvalue[e_name] = 0.0
270                self.e_ddvalue[e_name] = 0.0
271                self.e_windowsize[e_name] = 0
272                continue
273
274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
276            # Make sure no entry of tauint is smaller than 0.5
277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
278            # hep-lat/0306017 eq. (42)
279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
280            self.e_n_dtauint[e_name][0] = 0.0
281
282            def _compute_drho(i):
283                tmp = (self.e_rho[e_name][i + 1:w_max]
284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1],
285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
288
289            if self.tau_exp[e_name] > 0:
290                _compute_drho(1)
291                texp = self.tau_exp[e_name]
292                # Critical slowing down analysis
293                if w_max // 2 <= 1:
294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
295                for n in range(1, w_max // 2):
296                    _compute_drho(n + 1)
297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
298                        # Bias correction hep-lat/0306017 eq. (49) included
299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
304                        self.e_windowsize[e_name] = n
305                        break
306            else:
307                if self.S[e_name] == 0.0:
308                    self.e_tauint[e_name] = 0.5
309                    self.e_dtauint[e_name] = 0.0
310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
312                    self.e_windowsize[e_name] = 0
313                else:
314                    # Standard automatic windowing procedure
315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
317                    for n in range(1, w_max):
318                        if g_w[n - 1] < 0 or n >= w_max - 1:
319                            _compute_drho(n)
320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
324                            self.e_windowsize[e_name] = n
325                            break
326
327            self._dvalue += self.e_dvalue[e_name] ** 2
328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
329
330        for e_name in self.cov_names:
331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
332            self.e_ddvalue[e_name] = 0
333            self._dvalue += self.e_dvalue[e_name]**2
334
335        self._dvalue = np.sqrt(self._dvalue)
336        if self._dvalue == 0.0:
337            self.ddvalue = 0.0
338        else:
339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
340        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
380    def details(self, ens_content=True):
381        """Output detailed properties of the Obs.
382
383        Parameters
384        ----------
385        ens_content : bool
386            print details about the ensembles and replica if true.
387        """
388        if self.tag is not None:
389            print("Description:", self.tag)
390        if not hasattr(self, 'e_dvalue'):
391            print('Result\t %3.8e' % (self.value))
392        else:
393            if self.value == 0.0:
394                percentage = np.nan
395            else:
396                percentage = np.abs(self._dvalue / self.value) * 100
397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
398            if len(self.e_names) > 1:
399                print(' Ensemble errors:')
400            e_content = self.e_content
401            for e_name in self.mc_names:
402                gap = _determine_gap(self, e_content, e_name)
403
404                if len(self.e_names) > 1:
405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
407                tau_string += f" in units of {gap} config"
408                if gap > 1:
409                    tau_string += "s"
410                if self.tau_exp[e_name] > 0:
411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
412                else:
413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
414                print(tau_string)
415            for e_name in self.cov_names:
416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
417        if ens_content is True:
418            if len(self.e_names) == 1:
419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
420            else:
421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
422            my_string_list = []
423            for key, value in sorted(self.e_content.items()):
424                if key not in self.covobs:
425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
426                    if len(value) == 1:
427                        my_string += f': {self.shape[value[0]]} configurations'
428                        if isinstance(self.idl[value[0]], range):
429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
430                        else:
431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
432                    else:
433                        sublist = []
434                        for v in value:
435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
436                            my_substring += f': {self.shape[v]} configurations'
437                            if isinstance(self.idl[v], range):
438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
439                            else:
440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
441                            sublist.append(my_substring)
442
443                        my_string += '\n' + '\n'.join(sublist)
444                else:
445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
446                my_string_list.append(my_string)
447            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
449    def reweight(self, weight):
450        """Reweight the obs with given rewighting factors.
451
452        Parameters
453        ----------
454        weight : Obs
455            Reweighting factor. An Observable that has to be defined on a superset of the
456            configurations in obs[i].idl for all i.
457        all_configs : bool
458            if True, the reweighted observables are normalized by the average of
459            the reweighting factor on all configurations in weight.idl and not
460            on the configurations in obs[i].idl. Default False.
461        """
462        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
464    def is_zero_within_error(self, sigma=1):
465        """Checks whether the observable is zero within 'sigma' standard errors.
466
467        Parameters
468        ----------
469        sigma : int
470            Number of standard errors used for the check.
471
472        Works only properly when the gamma method was run.
473        """
474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
476    def is_zero(self, atol=1e-10):
477        """Checks whether the observable is zero within a given tolerance.
478
479        Parameters
480        ----------
481        atol : float
482            Absolute tolerance (for details see numpy documentation).
483        """
484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
486    def plot_tauint(self, save=None):
487        """Plot integrated autocorrelation time for each ensemble.
488
489        Parameters
490        ----------
491        save : str
492            saves the figure to a file named 'save' if.
493        """
494        if not hasattr(self, 'e_dvalue'):
495            raise Exception('Run the gamma method first.')
496
497        for e, e_name in enumerate(self.mc_names):
498            fig = plt.figure()
499            plt.xlabel(r'$W$')
500            plt.ylabel(r'$\tau_\mathrm{int}$')
501            length = int(len(self.e_n_tauint[e_name]))
502            if self.tau_exp[e_name] > 0:
503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
504                x_help = np.arange(2 * self.tau_exp[e_name])
505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
512            else:
513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
515
516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
518            plt.legend()
519            plt.xlim(-0.5, xmax)
520            ylim = plt.ylim()
521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
522            plt.draw()
523            if save:
524                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
526    def plot_rho(self, save=None):
527        """Plot normalized autocorrelation function time for each ensemble.
528
529        Parameters
530        ----------
531        save : str
532            saves the figure to a file named 'save' if.
533        """
534        if not hasattr(self, 'e_dvalue'):
535            raise Exception('Run the gamma method first.')
536        for e, e_name in enumerate(self.mc_names):
537            fig = plt.figure()
538            plt.xlabel('W')
539            plt.ylabel('rho')
540            length = int(len(self.e_drho[e_name]))
541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
543            if self.tau_exp[e_name] > 0:
544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
548            else:
549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
552            plt.xlim(-0.5, xmax)
553            plt.draw()
554            if save:
555                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
557    def plot_rep_dist(self):
558        """Plot replica distribution for each ensemble with more than one replicum."""
559        if not hasattr(self, 'e_dvalue'):
560            raise Exception('Run the gamma method first.')
561        for e, e_name in enumerate(self.mc_names):
562            if len(self.e_content[e_name]) == 1:
563                print('No replica distribution for a single replicum (', e_name, ')')
564                continue
565            r_length = []
566            sub_r_mean = 0
567            for r, r_name in enumerate(self.e_content[e_name]):
568                r_length.append(len(self.deltas[r_name]))
569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
570            e_N = np.sum(r_length)
571            sub_r_mean /= e_N
572            arr = np.zeros(len(self.e_content[e_name]))
573            for r, r_name in enumerate(self.e_content[e_name]):
574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
577            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
579    def plot_history(self, expand=True):
580        """Plot derived Monte Carlo history for each ensemble
581
582        Parameters
583        ----------
584        expand : bool
585            show expanded history for irregular Monte Carlo chains (default: True).
586        """
587        for e, e_name in enumerate(self.mc_names):
588            plt.figure()
589            r_length = []
590            tmp = []
591            tmp_expanded = []
592            for r, r_name in enumerate(self.e_content[e_name]):
593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
594                if expand:
595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
596                    r_length.append(len(tmp_expanded[-1]))
597                else:
598                    r_length.append(len(tmp[-1]))
599            e_N = np.sum(r_length)
600            x = np.arange(e_N)
601            y_test = np.concatenate(tmp, axis=0)
602            if expand:
603                y = np.concatenate(tmp_expanded, axis=0)
604            else:
605                y = y_test
606            plt.errorbar(x, y, fmt='.', markersize=3)
607            plt.xlim(-0.5, e_N - 0.5)
608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
609            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
611    def plot_piechart(self, save=None):
612        """Plot piechart which shows the fractional contribution of each
613        ensemble to the error and returns a dictionary containing the fractions.
614
615        Parameters
616        ----------
617        save : str
618            saves the figure to a file named 'save' if.
619        """
620        if not hasattr(self, 'e_dvalue'):
621            raise Exception('Run the gamma method first.')
622        if np.isclose(0.0, self._dvalue, atol=1e-15):
623            raise Exception('Error is 0.0')
624        labels = self.e_names
625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
626        fig1, ax1 = plt.subplots()
627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
628        ax1.axis('equal')
629        plt.draw()
630        if save:
631            fig1.savefig(save)
632
633        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
636        """Dump the Obs to a file 'name' of chosen format.
637
638        Parameters
639        ----------
640        filename : str
641            name of the file to be saved.
642        datatype : str
643            Format of the exported file. Supported formats include
644            "json.gz" and "pickle"
645        description : str
646            Description for output file, only relevant for json.gz format.
647        path : str
648            specifies a custom path for the file (default '.')
649        """
650        if 'path' in kwargs:
651            file_name = kwargs.get('path') + '/' + filename
652        else:
653            file_name = filename
654
655        if datatype == "json.gz":
656            from .input.json import dump_to_json
657            dump_to_json([self], file_name, description=description)
658        elif datatype == "pickle":
659            with open(file_name + '.p', 'wb') as fb:
660                pickle.dump(self, fb)
661        else:
662            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
664    def export_jackknife(self):
665        """Export jackknife samples from the Obs
666
667        Returns
668        -------
669        numpy.ndarray
670            Returns a numpy array of length N + 1 where N is the number of samples
671            for the given ensemble and replicum. The zeroth entry of the array contains
672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
673            derived from the Obs. The current implementation only works for observables
674            defined on exactly one ensemble and replicum. The derived jackknife samples
675            should agree with samples from a full jackknife analysis up to O(1/N).
676        """
677
678        if len(self.names) != 1:
679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
680
681        name = self.names[0]
682        full_data = self.deltas[name] + self.r_values[name]
683        n = full_data.size
684        mean = self.value
685        tmp_jacks = np.zeros(n + 1)
686        tmp_jacks[0] = mean
687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
688        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
816    def sqrt(self):
817        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
819    def log(self):
820        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
822    def exp(self):
823        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
825    def sin(self):
826        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
828    def cos(self):
829        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
831    def tan(self):
832        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
834    def arcsin(self):
835        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
837    def arccos(self):
838        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
840    def arctan(self):
841        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
843    def sinh(self):
844        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
846    def cosh(self):
847        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
849    def tanh(self):
850        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
852    def arcsinh(self):
853        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
855    def arccosh(self):
856        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
858    def arctanh(self):
859        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
862class CObs:
863    """Class for a complex valued observable."""
864    __slots__ = ['_real', '_imag', 'tag']
865
866    def __init__(self, real, imag=0.0):
867        self._real = real
868        self._imag = imag
869        self.tag = None
870
871    @property
872    def real(self):
873        return self._real
874
875    @property
876    def imag(self):
877        return self._imag
878
879    def gamma_method(self, **kwargs):
880        """Executes the gamma_method for the real and the imaginary part."""
881        if isinstance(self.real, Obs):
882            self.real.gamma_method(**kwargs)
883        if isinstance(self.imag, Obs):
884            self.imag.gamma_method(**kwargs)
885
886    def is_zero(self):
887        """Checks whether both real and imaginary part are zero within machine precision."""
888        return self.real == 0.0 and self.imag == 0.0
889
890    def conjugate(self):
891        return CObs(self.real, -self.imag)
892
893    def __add__(self, other):
894        if isinstance(other, np.ndarray):
895            return other + self
896        elif hasattr(other, 'real') and hasattr(other, 'imag'):
897            return CObs(self.real + other.real,
898                        self.imag + other.imag)
899        else:
900            return CObs(self.real + other, self.imag)
901
902    def __radd__(self, y):
903        return self + y
904
905    def __sub__(self, other):
906        if isinstance(other, np.ndarray):
907            return -1 * (other - self)
908        elif hasattr(other, 'real') and hasattr(other, 'imag'):
909            return CObs(self.real - other.real, self.imag - other.imag)
910        else:
911            return CObs(self.real - other, self.imag)
912
913    def __rsub__(self, other):
914        return -1 * (self - other)
915
916    def __mul__(self, other):
917        if isinstance(other, np.ndarray):
918            return other * self
919        elif hasattr(other, 'real') and hasattr(other, 'imag'):
920            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
921                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
922                                               [self.real, other.real, self.imag, other.imag],
923                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
924                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
925                                               [self.real, other.real, self.imag, other.imag],
926                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
927            elif getattr(other, 'imag', 0) != 0:
928                return CObs(self.real * other.real - self.imag * other.imag,
929                            self.imag * other.real + self.real * other.imag)
930            else:
931                return CObs(self.real * other.real, self.imag * other.real)
932        else:
933            return CObs(self.real * other, self.imag * other)
934
935    def __rmul__(self, other):
936        return self * other
937
938    def __truediv__(self, other):
939        if isinstance(other, np.ndarray):
940            return 1 / (other / self)
941        elif hasattr(other, 'real') and hasattr(other, 'imag'):
942            r = other.real ** 2 + other.imag ** 2
943            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
944        else:
945            return CObs(self.real / other, self.imag / other)
946
947    def __rtruediv__(self, other):
948        r = self.real ** 2 + self.imag ** 2
949        if hasattr(other, 'real') and hasattr(other, 'imag'):
950            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
951        else:
952            return CObs(self.real * other / r, -self.imag * other / r)
953
954    def __abs__(self):
955        return np.sqrt(self.real**2 + self.imag**2)
956
957    def __pos__(self):
958        return self
959
960    def __neg__(self):
961        return -1 * self
962
963    def __eq__(self, other):
964        return self.real == other.real and self.imag == other.imag
965
966    def __str__(self):
967        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
968
969    def __repr__(self):
970        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
866    def __init__(self, real, imag=0.0):
867        self._real = real
868        self._imag = imag
869        self.tag = None
def gamma_method(self, **kwargs):
879    def gamma_method(self, **kwargs):
880        """Executes the gamma_method for the real and the imaginary part."""
881        if isinstance(self.real, Obs):
882            self.real.gamma_method(**kwargs)
883        if isinstance(self.imag, Obs):
884            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
886    def is_zero(self):
887        """Checks whether both real and imaginary part are zero within machine precision."""
888        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
890    def conjugate(self):
891        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1099def derived_observable(func, data, array_mode=False, **kwargs):
1100    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1101
1102    Parameters
1103    ----------
1104    func : object
1105        arbitrary function of the form func(data, **kwargs). For the
1106        automatic differentiation to work, all numpy functions have to have
1107        the autograd wrapper (use 'import autograd.numpy as anp').
1108    data : list
1109        list of Obs, e.g. [obs1, obs2, obs3].
1110    num_grad : bool
1111        if True, numerical derivatives are used instead of autograd
1112        (default False). To control the numerical differentiation the
1113        kwargs of numdifftools.step_generators.MaxStepGenerator
1114        can be used.
1115    man_grad : list
1116        manually supply a list or an array which contains the jacobian
1117        of func. Use cautiously, supplying the wrong derivative will
1118        not be intercepted.
1119
1120    Notes
1121    -----
1122    For simple mathematical operations it can be practical to use anonymous
1123    functions. For the ratio of two observables one can e.g. use
1124
1125    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1126    """
1127
1128    data = np.asarray(data)
1129    raveled_data = data.ravel()
1130
1131    # Workaround for matrix operations containing non Obs data
1132    if not all(isinstance(x, Obs) for x in raveled_data):
1133        for i in range(len(raveled_data)):
1134            if isinstance(raveled_data[i], (int, float)):
1135                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1136
1137    allcov = {}
1138    for o in raveled_data:
1139        for name in o.cov_names:
1140            if name in allcov:
1141                if not np.allclose(allcov[name], o.covobs[name].cov):
1142                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1143            else:
1144                allcov[name] = o.covobs[name].cov
1145
1146    n_obs = len(raveled_data)
1147    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1148    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1149    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1150
1151    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1152
1153    if data.ndim == 1:
1154        values = np.array([o.value for o in data])
1155    else:
1156        values = np.vectorize(lambda x: x.value)(data)
1157
1158    new_values = func(values, **kwargs)
1159
1160    multi = int(isinstance(new_values, np.ndarray))
1161
1162    new_r_values = {}
1163    new_idl_d = {}
1164    for name in new_sample_names:
1165        idl = []
1166        tmp_values = np.zeros(n_obs)
1167        for i, item in enumerate(raveled_data):
1168            tmp_values[i] = item.r_values.get(name, item.value)
1169            tmp_idl = item.idl.get(name)
1170            if tmp_idl is not None:
1171                idl.append(tmp_idl)
1172        if multi > 0:
1173            tmp_values = np.array(tmp_values).reshape(data.shape)
1174        new_r_values[name] = func(tmp_values, **kwargs)
1175        new_idl_d[name] = _merge_idx(idl)
1176
1177    if 'man_grad' in kwargs:
1178        deriv = np.asarray(kwargs.get('man_grad'))
1179        if new_values.shape + data.shape != deriv.shape:
1180            raise Exception('Manual derivative does not have correct shape.')
1181    elif kwargs.get('num_grad') is True:
1182        if multi > 0:
1183            raise Exception('Multi mode currently not supported for numerical derivative')
1184        options = {
1185            'base_step': 0.1,
1186            'step_ratio': 2.5}
1187        for key in options.keys():
1188            kwarg = kwargs.get(key)
1189            if kwarg is not None:
1190                options[key] = kwarg
1191        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1192        if tmp_df.size == 1:
1193            deriv = np.array([tmp_df.real])
1194        else:
1195            deriv = tmp_df.real
1196    else:
1197        deriv = jacobian(func)(values, **kwargs)
1198
1199    final_result = np.zeros(new_values.shape, dtype=object)
1200
1201    if array_mode is True:
1202
1203        class _Zero_grad():
1204            def __init__(self, N):
1205                self.grad = np.zeros((N, 1))
1206
1207        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1208        d_extracted = {}
1209        g_extracted = {}
1210        for name in new_sample_names:
1211            d_extracted[name] = []
1212            ens_length = len(new_idl_d[name])
1213            for i_dat, dat in enumerate(data):
1214                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1215        for name in new_cov_names:
1216            g_extracted[name] = []
1217            zero_grad = _Zero_grad(new_covobs_lengths[name])
1218            for i_dat, dat in enumerate(data):
1219                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1220
1221    for i_val, new_val in np.ndenumerate(new_values):
1222        new_deltas = {}
1223        new_grad = {}
1224        if array_mode is True:
1225            for name in new_sample_names:
1226                ens_length = d_extracted[name][0].shape[-1]
1227                new_deltas[name] = np.zeros(ens_length)
1228                for i_dat, dat in enumerate(d_extracted[name]):
1229                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1230            for name in new_cov_names:
1231                new_grad[name] = 0
1232                for i_dat, dat in enumerate(g_extracted[name]):
1233                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1234        else:
1235            for j_obs, obs in np.ndenumerate(data):
1236                for name in obs.names:
1237                    if name in obs.cov_names:
1238                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1239                    else:
1240                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1241
1242        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1243
1244        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1245            raise Exception('The same name has been used for deltas and covobs!')
1246        new_samples = []
1247        new_means = []
1248        new_idl = []
1249        new_names_obs = []
1250        for name in new_names:
1251            if name not in new_covobs:
1252                new_samples.append(new_deltas[name])
1253                new_idl.append(new_idl_d[name])
1254                new_means.append(new_r_values[name][i_val])
1255                new_names_obs.append(name)
1256        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1257        for name in new_covobs:
1258            final_result[i_val].names.append(name)
1259        final_result[i_val]._covobs = new_covobs
1260        final_result[i_val]._value = new_val
1261        final_result[i_val].reweighted = reweighted
1262
1263    if multi == 0:
1264        final_result = final_result.item()
1265
1266    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1303def reweight(weight, obs, **kwargs):
1304    """Reweight a list of observables.
1305
1306    Parameters
1307    ----------
1308    weight : Obs
1309        Reweighting factor. An Observable that has to be defined on a superset of the
1310        configurations in obs[i].idl for all i.
1311    obs : list
1312        list of Obs, e.g. [obs1, obs2, obs3].
1313    all_configs : bool
1314        if True, the reweighted observables are normalized by the average of
1315        the reweighting factor on all configurations in weight.idl and not
1316        on the configurations in obs[i].idl. Default False.
1317    """
1318    result = []
1319    for i in range(len(obs)):
1320        if len(obs[i].cov_names):
1321            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1322        if not set(obs[i].names).issubset(weight.names):
1323            raise Exception('Error: Ensembles do not fit')
1324        for name in obs[i].names:
1325            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1326                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1327        new_samples = []
1328        w_deltas = {}
1329        for name in sorted(obs[i].names):
1330            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1331            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1332        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1333
1334        if kwargs.get('all_configs'):
1335            new_weight = weight
1336        else:
1337            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1338
1339        result.append(tmp_obs / new_weight)
1340        result[-1].reweighted = True
1341
1342    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1345def correlate(obs_a, obs_b):
1346    """Correlate two observables.
1347
1348    Parameters
1349    ----------
1350    obs_a : Obs
1351        First observable
1352    obs_b : Obs
1353        Second observable
1354
1355    Notes
1356    -----
1357    Keep in mind to only correlate primary observables which have not been reweighted
1358    yet. The reweighting has to be applied after correlating the observables.
1359    Currently only works if ensembles are identical (this is not strictly necessary).
1360    """
1361
1362    if sorted(obs_a.names) != sorted(obs_b.names):
1363        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1364    if len(obs_a.cov_names) or len(obs_b.cov_names):
1365        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1366    for name in obs_a.names:
1367        if obs_a.shape[name] != obs_b.shape[name]:
1368            raise Exception('Shapes of ensemble', name, 'do not fit')
1369        if obs_a.idl[name] != obs_b.idl[name]:
1370            raise Exception('idl of ensemble', name, 'do not fit')
1371
1372    if obs_a.reweighted is True:
1373        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1374    if obs_b.reweighted is True:
1375        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1376
1377    new_samples = []
1378    new_idl = []
1379    for name in sorted(obs_a.names):
1380        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1381        new_idl.append(obs_a.idl[name])
1382
1383    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1384    o.reweighted = obs_a.reweighted or obs_b.reweighted
1385    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1388def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1389    r'''Calculates the error covariance matrix of a set of observables.
1390
1391    WARNING: This function should be used with care, especially for observables with support on multiple
1392             ensembles with differing autocorrelations. See the notes below for details.
1393
1394    The gamma method has to be applied first to all observables.
1395
1396    Parameters
1397    ----------
1398    obs : list or numpy.ndarray
1399        List or one dimensional array of Obs
1400    visualize : bool
1401        If True plots the corresponding normalized correlation matrix (default False).
1402    correlation : bool
1403        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1404    smooth : None or int
1405        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1406        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1407        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1408        small ones.
1409
1410    Notes
1411    -----
1412    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1413    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1414    in the absence of autocorrelation.
1415    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1416    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1417    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1418    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1419    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1420    '''
1421
1422    length = len(obs)
1423
1424    max_samples = np.max([o.N for o in obs])
1425    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1426        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1427
1428    cov = np.zeros((length, length))
1429    for i in range(length):
1430        for j in range(i, length):
1431            cov[i, j] = _covariance_element(obs[i], obs[j])
1432    cov = cov + cov.T - np.diag(np.diag(cov))
1433
1434    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1435
1436    if isinstance(smooth, int):
1437        corr = _smooth_eigenvalues(corr, smooth)
1438
1439    if visualize:
1440        plt.matshow(corr, vmin=-1, vmax=1)
1441        plt.set_cmap('RdBu')
1442        plt.colorbar()
1443        plt.draw()
1444
1445    if correlation is True:
1446        return corr
1447
1448    errors = [o.dvalue for o in obs]
1449    cov = np.diag(errors) @ corr @ np.diag(errors)
1450
1451    eigenvalues = np.linalg.eigh(cov)[0]
1452    if not np.all(eigenvalues >= 0):
1453        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1454
1455    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1535def import_jackknife(jacks, name, idl=None):
1536    """Imports jackknife samples and returns an Obs
1537
1538    Parameters
1539    ----------
1540    jacks : numpy.ndarray
1541        numpy array containing the mean value as zeroth entry and
1542        the N jackknife samples as first to Nth entry.
1543    name : str
1544        name of the ensemble the samples are defined on.
1545    """
1546    length = len(jacks) - 1
1547    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1548    samples = jacks[1:] @ prj
1549    mean = np.mean(samples)
1550    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1551    new_obs._value = jacks[0]
1552    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1555def merge_obs(list_of_obs):
1556    """Combine all observables in list_of_obs into one new observable
1557
1558    Parameters
1559    ----------
1560    list_of_obs : list
1561        list of the Obs object to be combined
1562
1563    Notes
1564    -----
1565    It is not possible to combine obs which are based on the same replicum
1566    """
1567    replist = [item for obs in list_of_obs for item in obs.names]
1568    if (len(replist) == len(set(replist))) is False:
1569        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1570    if any([len(o.cov_names) for o in list_of_obs]):
1571        raise Exception('Not possible to merge data that contains covobs!')
1572    new_dict = {}
1573    idl_dict = {}
1574    for o in list_of_obs:
1575        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1576                        for key in set(o.deltas) | set(o.r_values)})
1577        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1578
1579    names = sorted(new_dict.keys())
1580    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1581    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1582    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1585def cov_Obs(means, cov, name, grad=None):
1586    """Create an Obs based on mean(s) and a covariance matrix
1587
1588    Parameters
1589    ----------
1590    mean : list of floats or float
1591        N mean value(s) of the new Obs
1592    cov : list or array
1593        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1594    name : str
1595        identifier for the covariance matrix
1596    grad : list or array
1597        Gradient of the Covobs wrt. the means belonging to cov.
1598    """
1599
1600    def covobs_to_obs(co):
1601        """Make an Obs out of a Covobs
1602
1603        Parameters
1604        ----------
1605        co : Covobs
1606            Covobs to be embedded into the Obs
1607        """
1608        o = Obs([], [], means=[])
1609        o._value = co.value
1610        o.names.append(co.name)
1611        o._covobs[co.name] = co
1612        o._dvalue = np.sqrt(co.errsq())
1613        return o
1614
1615    ol = []
1616    if isinstance(means, (float, int)):
1617        means = [means]
1618
1619    for i in range(len(means)):
1620        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1621    if ol[0].covobs[name].N != len(means):
1622        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1623    if len(ol) == 1:
1624        return ol[0]
1625    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.