pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s]" % (name))
 108                    if len(dc) == 1:
 109                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 110                    else:
 111                        self.idl[name] = list(idx)
 112                else:
 113                    raise TypeError('incompatible type for idl[%s].' % (name))
 114        else:
 115            for name, sample in sorted(zip(names, samples)):
 116                self.idl[name] = range(1, len(sample) + 1)
 117
 118        if kwargs.get("means") is not None:
 119            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 120                self.shape[name] = len(self.idl[name])
 121                self.N += self.shape[name]
 122                self.r_values[name] = mean
 123                self.deltas[name] = sample
 124        else:
 125            for name, sample in sorted(zip(names, samples)):
 126                self.shape[name] = len(self.idl[name])
 127                self.N += self.shape[name]
 128                if len(sample) != self.shape[name]:
 129                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 130                self.r_values[name] = np.mean(sample)
 131                self.deltas[name] = sample - self.r_values[name]
 132                self._value += self.shape[name] * self.r_values[name]
 133            self._value /= self.N
 134
 135        self._dvalue = 0.0
 136        self.ddvalue = 0.0
 137        self.reweighted = False
 138
 139        self.tag = None
 140
 141    @property
 142    def value(self):
 143        return self._value
 144
 145    @property
 146    def dvalue(self):
 147        return self._dvalue
 148
 149    @property
 150    def e_names(self):
 151        return sorted(set([o.split('|')[0] for o in self.names]))
 152
 153    @property
 154    def cov_names(self):
 155        return sorted(set([o for o in self.covobs.keys()]))
 156
 157    @property
 158    def mc_names(self):
 159        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 160
 161    @property
 162    def e_content(self):
 163        res = {}
 164        for e, e_name in enumerate(self.e_names):
 165            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 166            if e_name in self.names:
 167                res[e_name].append(e_name)
 168        return res
 169
 170    @property
 171    def covobs(self):
 172        return self._covobs
 173
 174    def gamma_method(self, **kwargs):
 175        """Estimate the error and related properties of the Obs.
 176
 177        Parameters
 178        ----------
 179        S : float
 180            specifies a custom value for the parameter S (default 2.0).
 181            If set to 0 it is assumed that the data exhibits no
 182            autocorrelation. In this case the error estimates coincides
 183            with the sample standard error.
 184        tau_exp : float
 185            positive value triggers the critical slowing down analysis
 186            (default 0.0).
 187        N_sigma : float
 188            number of standard deviations from zero until the tail is
 189            attached to the autocorrelation function (default 1).
 190        fft : bool
 191            determines whether the fft algorithm is used for the computation
 192            of the autocorrelation function (default True)
 193        """
 194
 195        e_content = self.e_content
 196        self.e_dvalue = {}
 197        self.e_ddvalue = {}
 198        self.e_tauint = {}
 199        self.e_dtauint = {}
 200        self.e_windowsize = {}
 201        self.e_n_tauint = {}
 202        self.e_n_dtauint = {}
 203        e_gamma = {}
 204        self.e_rho = {}
 205        self.e_drho = {}
 206        self._dvalue = 0
 207        self.ddvalue = 0
 208
 209        self.S = {}
 210        self.tau_exp = {}
 211        self.N_sigma = {}
 212
 213        if kwargs.get('fft') is False:
 214            fft = False
 215        else:
 216            fft = True
 217
 218        def _parse_kwarg(kwarg_name):
 219            if kwarg_name in kwargs:
 220                tmp = kwargs.get(kwarg_name)
 221                if isinstance(tmp, (int, float)):
 222                    if tmp < 0:
 223                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 224                    for e, e_name in enumerate(self.e_names):
 225                        getattr(self, kwarg_name)[e_name] = tmp
 226                else:
 227                    raise TypeError(kwarg_name + ' is not in proper format.')
 228            else:
 229                for e, e_name in enumerate(self.e_names):
 230                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 231                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 232                    else:
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 234
 235        _parse_kwarg('S')
 236        _parse_kwarg('tau_exp')
 237        _parse_kwarg('N_sigma')
 238
 239        for e, e_name in enumerate(self.mc_names):
 240            gapsize = _determine_gap(self, e_content, e_name)
 241
 242            r_length = []
 243            for r_name in e_content[e_name]:
 244                if isinstance(self.idl[r_name], range):
 245                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 246                else:
 247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 248
 249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 250            w_max = max(r_length) // 2
 251            e_gamma[e_name] = np.zeros(w_max)
 252            self.e_rho[e_name] = np.zeros(w_max)
 253            self.e_drho[e_name] = np.zeros(w_max)
 254
 255            for r_name in e_content[e_name]:
 256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 257
 258            gamma_div = np.zeros(w_max)
 259            for r_name in e_content[e_name]:
 260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 261            gamma_div[gamma_div < 1] = 1.0
 262            e_gamma[e_name] /= gamma_div[:w_max]
 263
 264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 265                self.e_tauint[e_name] = 0.5
 266                self.e_dtauint[e_name] = 0.0
 267                self.e_dvalue[e_name] = 0.0
 268                self.e_ddvalue[e_name] = 0.0
 269                self.e_windowsize[e_name] = 0
 270                continue
 271
 272            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 273            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 274            # Make sure no entry of tauint is smaller than 0.5
 275            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 276            # hep-lat/0306017 eq. (42)
 277            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 278            self.e_n_dtauint[e_name][0] = 0.0
 279
 280            def _compute_drho(i):
 281                tmp = (self.e_rho[e_name][i + 1:w_max]
 282                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 283                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 284                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 285                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 286
 287            if self.tau_exp[e_name] > 0:
 288                _compute_drho(1)
 289                texp = self.tau_exp[e_name]
 290                # Critical slowing down analysis
 291                if w_max // 2 <= 1:
 292                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 293                for n in range(1, w_max // 2):
 294                    _compute_drho(n + 1)
 295                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 296                        # Bias correction hep-lat/0306017 eq. (49) included
 297                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 298                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 299                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 300                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 301                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 302                        self.e_windowsize[e_name] = n
 303                        break
 304            else:
 305                if self.S[e_name] == 0.0:
 306                    self.e_tauint[e_name] = 0.5
 307                    self.e_dtauint[e_name] = 0.0
 308                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 309                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 310                    self.e_windowsize[e_name] = 0
 311                else:
 312                    # Standard automatic windowing procedure
 313                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 314                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 315                    for n in range(1, w_max):
 316                        if g_w[n - 1] < 0 or n >= w_max - 1:
 317                            _compute_drho(n)
 318                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 319                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 320                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 321                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 322                            self.e_windowsize[e_name] = n
 323                            break
 324
 325            self._dvalue += self.e_dvalue[e_name] ** 2
 326            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 327
 328        for e_name in self.cov_names:
 329            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 330            self.e_ddvalue[e_name] = 0
 331            self._dvalue += self.e_dvalue[e_name]**2
 332
 333        self._dvalue = np.sqrt(self._dvalue)
 334        if self._dvalue == 0.0:
 335            self.ddvalue = 0.0
 336        else:
 337            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 338        return
 339
 340    gm = gamma_method
 341
 342    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 343        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 344           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 345
 346        Parameters
 347        ----------
 348        deltas : list
 349            List of fluctuations
 350        idx : list
 351            List or range of configurations on which the deltas are defined.
 352        shape : int
 353            Number of configurations in idx.
 354        w_max : int
 355            Upper bound for the summation window.
 356        fft : bool
 357            determines whether the fft algorithm is used for the computation
 358            of the autocorrelation function.
 359        gapsize : int
 360            The target distance between two configurations. If longer distances
 361            are found in idx, the data is expanded.
 362        """
 363        gamma = np.zeros(w_max)
 364        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 365        new_shape = len(deltas)
 366        if fft:
 367            max_gamma = min(new_shape, w_max)
 368            # The padding for the fft has to be even
 369            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 370            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 371        else:
 372            for n in range(w_max):
 373                if new_shape - n >= 0:
 374                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 375
 376        return gamma
 377
 378    def details(self, ens_content=True):
 379        """Output detailed properties of the Obs.
 380
 381        Parameters
 382        ----------
 383        ens_content : bool
 384            print details about the ensembles and replica if true.
 385        """
 386        if self.tag is not None:
 387            print("Description:", self.tag)
 388        if not hasattr(self, 'e_dvalue'):
 389            print('Result\t %3.8e' % (self.value))
 390        else:
 391            if self.value == 0.0:
 392                percentage = np.nan
 393            else:
 394                percentage = np.abs(self._dvalue / self.value) * 100
 395            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 396            if len(self.e_names) > 1:
 397                print(' Ensemble errors:')
 398            e_content = self.e_content
 399            for e_name in self.mc_names:
 400                gap = _determine_gap(self, e_content, e_name)
 401
 402                if len(self.e_names) > 1:
 403                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 404                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 405                tau_string += f" in units of {gap} config"
 406                if gap > 1:
 407                    tau_string += "s"
 408                if self.tau_exp[e_name] > 0:
 409                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 410                else:
 411                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 412                print(tau_string)
 413            for e_name in self.cov_names:
 414                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 415        if ens_content is True:
 416            if len(self.e_names) == 1:
 417                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 418            else:
 419                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 420            my_string_list = []
 421            for key, value in sorted(self.e_content.items()):
 422                if key not in self.covobs:
 423                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 424                    if len(value) == 1:
 425                        my_string += f': {self.shape[value[0]]} configurations'
 426                        if isinstance(self.idl[value[0]], range):
 427                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 428                        else:
 429                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 430                    else:
 431                        sublist = []
 432                        for v in value:
 433                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 434                            my_substring += f': {self.shape[v]} configurations'
 435                            if isinstance(self.idl[v], range):
 436                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 437                            else:
 438                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 439                            sublist.append(my_substring)
 440
 441                        my_string += '\n' + '\n'.join(sublist)
 442                else:
 443                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 444                my_string_list.append(my_string)
 445            print('\n'.join(my_string_list))
 446
 447    def reweight(self, weight):
 448        """Reweight the obs with given rewighting factors.
 449
 450        Parameters
 451        ----------
 452        weight : Obs
 453            Reweighting factor. An Observable that has to be defined on a superset of the
 454            configurations in obs[i].idl for all i.
 455        all_configs : bool
 456            if True, the reweighted observables are normalized by the average of
 457            the reweighting factor on all configurations in weight.idl and not
 458            on the configurations in obs[i].idl. Default False.
 459        """
 460        return reweight(weight, [self])[0]
 461
 462    def is_zero_within_error(self, sigma=1):
 463        """Checks whether the observable is zero within 'sigma' standard errors.
 464
 465        Parameters
 466        ----------
 467        sigma : int
 468            Number of standard errors used for the check.
 469
 470        Works only properly when the gamma method was run.
 471        """
 472        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 473
 474    def is_zero(self, atol=1e-10):
 475        """Checks whether the observable is zero within a given tolerance.
 476
 477        Parameters
 478        ----------
 479        atol : float
 480            Absolute tolerance (for details see numpy documentation).
 481        """
 482        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 483
 484    def plot_tauint(self, save=None):
 485        """Plot integrated autocorrelation time for each ensemble.
 486
 487        Parameters
 488        ----------
 489        save : str
 490            saves the figure to a file named 'save' if.
 491        """
 492        if not hasattr(self, 'e_dvalue'):
 493            raise Exception('Run the gamma method first.')
 494
 495        for e, e_name in enumerate(self.mc_names):
 496            fig = plt.figure()
 497            plt.xlabel(r'$W$')
 498            plt.ylabel(r'$\tau_\mathrm{int}$')
 499            length = int(len(self.e_n_tauint[e_name]))
 500            if self.tau_exp[e_name] > 0:
 501                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 502                x_help = np.arange(2 * self.tau_exp[e_name])
 503                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 504                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 505                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 506                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 507                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 508                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 509                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 510            else:
 511                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 512                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 513
 514            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 515            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 516            plt.legend()
 517            plt.xlim(-0.5, xmax)
 518            ylim = plt.ylim()
 519            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 520            plt.draw()
 521            if save:
 522                fig.savefig(save + "_" + str(e))
 523
 524    def plot_rho(self, save=None):
 525        """Plot normalized autocorrelation function time for each ensemble.
 526
 527        Parameters
 528        ----------
 529        save : str
 530            saves the figure to a file named 'save' if.
 531        """
 532        if not hasattr(self, 'e_dvalue'):
 533            raise Exception('Run the gamma method first.')
 534        for e, e_name in enumerate(self.mc_names):
 535            fig = plt.figure()
 536            plt.xlabel('W')
 537            plt.ylabel('rho')
 538            length = int(len(self.e_drho[e_name]))
 539            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 540            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 541            if self.tau_exp[e_name] > 0:
 542                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 543                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 544                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 545                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 546            else:
 547                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 548                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 549            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 550            plt.xlim(-0.5, xmax)
 551            plt.draw()
 552            if save:
 553                fig.savefig(save + "_" + str(e))
 554
 555    def plot_rep_dist(self):
 556        """Plot replica distribution for each ensemble with more than one replicum."""
 557        if not hasattr(self, 'e_dvalue'):
 558            raise Exception('Run the gamma method first.')
 559        for e, e_name in enumerate(self.mc_names):
 560            if len(self.e_content[e_name]) == 1:
 561                print('No replica distribution for a single replicum (', e_name, ')')
 562                continue
 563            r_length = []
 564            sub_r_mean = 0
 565            for r, r_name in enumerate(self.e_content[e_name]):
 566                r_length.append(len(self.deltas[r_name]))
 567                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 568            e_N = np.sum(r_length)
 569            sub_r_mean /= e_N
 570            arr = np.zeros(len(self.e_content[e_name]))
 571            for r, r_name in enumerate(self.e_content[e_name]):
 572                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 573            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 574            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 575            plt.draw()
 576
 577    def plot_history(self, expand=True):
 578        """Plot derived Monte Carlo history for each ensemble
 579
 580        Parameters
 581        ----------
 582        expand : bool
 583            show expanded history for irregular Monte Carlo chains (default: True).
 584        """
 585        for e, e_name in enumerate(self.mc_names):
 586            plt.figure()
 587            r_length = []
 588            tmp = []
 589            tmp_expanded = []
 590            for r, r_name in enumerate(self.e_content[e_name]):
 591                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 592                if expand:
 593                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 594                    r_length.append(len(tmp_expanded[-1]))
 595                else:
 596                    r_length.append(len(tmp[-1]))
 597            e_N = np.sum(r_length)
 598            x = np.arange(e_N)
 599            y_test = np.concatenate(tmp, axis=0)
 600            if expand:
 601                y = np.concatenate(tmp_expanded, axis=0)
 602            else:
 603                y = y_test
 604            plt.errorbar(x, y, fmt='.', markersize=3)
 605            plt.xlim(-0.5, e_N - 0.5)
 606            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 607            plt.draw()
 608
 609    def plot_piechart(self, save=None):
 610        """Plot piechart which shows the fractional contribution of each
 611        ensemble to the error and returns a dictionary containing the fractions.
 612
 613        Parameters
 614        ----------
 615        save : str
 616            saves the figure to a file named 'save' if.
 617        """
 618        if not hasattr(self, 'e_dvalue'):
 619            raise Exception('Run the gamma method first.')
 620        if np.isclose(0.0, self._dvalue, atol=1e-15):
 621            raise Exception('Error is 0.0')
 622        labels = self.e_names
 623        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 624        fig1, ax1 = plt.subplots()
 625        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 626        ax1.axis('equal')
 627        plt.draw()
 628        if save:
 629            fig1.savefig(save)
 630
 631        return dict(zip(labels, sizes))
 632
 633    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 634        """Dump the Obs to a file 'name' of chosen format.
 635
 636        Parameters
 637        ----------
 638        filename : str
 639            name of the file to be saved.
 640        datatype : str
 641            Format of the exported file. Supported formats include
 642            "json.gz" and "pickle"
 643        description : str
 644            Description for output file, only relevant for json.gz format.
 645        path : str
 646            specifies a custom path for the file (default '.')
 647        """
 648        if 'path' in kwargs:
 649            file_name = kwargs.get('path') + '/' + filename
 650        else:
 651            file_name = filename
 652
 653        if datatype == "json.gz":
 654            from .input.json import dump_to_json
 655            dump_to_json([self], file_name, description=description)
 656        elif datatype == "pickle":
 657            with open(file_name + '.p', 'wb') as fb:
 658                pickle.dump(self, fb)
 659        else:
 660            raise Exception("Unknown datatype " + str(datatype))
 661
 662    def export_jackknife(self):
 663        """Export jackknife samples from the Obs
 664
 665        Returns
 666        -------
 667        numpy.ndarray
 668            Returns a numpy array of length N + 1 where N is the number of samples
 669            for the given ensemble and replicum. The zeroth entry of the array contains
 670            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 671            derived from the Obs. The current implementation only works for observables
 672            defined on exactly one ensemble and replicum. The derived jackknife samples
 673            should agree with samples from a full jackknife analysis up to O(1/N).
 674        """
 675
 676        if len(self.names) != 1:
 677            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 678
 679        name = self.names[0]
 680        full_data = self.deltas[name] + self.r_values[name]
 681        n = full_data.size
 682        mean = self.value
 683        tmp_jacks = np.zeros(n + 1)
 684        tmp_jacks[0] = mean
 685        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 686        return tmp_jacks
 687
 688    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 689        """Export bootstrap samples from the Obs
 690
 691        Parameters
 692        ----------
 693        samples : int
 694            Number of bootstrap samples to generate.
 695        random_numbers : np.ndarray
 696            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 697            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 698        save_rng : str
 699            Save the random numbers to a file if a path is specified.
 700
 701        Returns
 702        -------
 703        numpy.ndarray
 704            Returns a numpy array of length N + 1 where N is the number of samples
 705            for the given ensemble and replicum. The zeroth entry of the array contains
 706            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 707            derived from the Obs. The current implementation only works for observables
 708            defined on exactly one ensemble and replicum. The derived bootstrap samples
 709            should agree with samples from a full bootstrap analysis up to O(1/N).
 710        """
 711        if len(self.names) != 1:
 712            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 713
 714        name = self.names[0]
 715        length = self.N
 716
 717        if random_numbers is None:
 718            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 719            rng = np.random.default_rng(seed)
 720            random_numbers = rng.integers(0, length, size=(samples, length))
 721
 722        if save_rng is not None:
 723            np.savetxt(save_rng, random_numbers, fmt='%i')
 724
 725        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 726        ret = np.zeros(samples + 1)
 727        ret[0] = self.value
 728        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 729        return ret
 730
 731    def __float__(self):
 732        return float(self.value)
 733
 734    def __repr__(self):
 735        return 'Obs[' + str(self) + ']'
 736
 737    def __str__(self):
 738        return _format_uncertainty(self.value, self._dvalue)
 739
 740    def __format__(self, format_type):
 741        if format_type == "":
 742            significance = 2
 743        else:
 744            significance = int(float(format_type.replace("+", "").replace("-", "")))
 745        my_str = _format_uncertainty(self.value, self._dvalue,
 746                                     significance=significance)
 747        for char in ["+", " "]:
 748            if format_type.startswith(char):
 749                if my_str[0] != "-":
 750                    my_str = char + my_str
 751        return my_str
 752
 753    def __hash__(self):
 754        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 755        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 756        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 757        hash_tuple += tuple([o.encode() for o in self.names])
 758        m = hashlib.md5()
 759        [m.update(o) for o in hash_tuple]
 760        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 761
 762    # Overload comparisons
 763    def __lt__(self, other):
 764        return self.value < other
 765
 766    def __le__(self, other):
 767        return self.value <= other
 768
 769    def __gt__(self, other):
 770        return self.value > other
 771
 772    def __ge__(self, other):
 773        return self.value >= other
 774
 775    def __eq__(self, other):
 776        if other is None:
 777            return False
 778        return (self - other).is_zero()
 779
 780    # Overload math operations
 781    def __add__(self, y):
 782        if isinstance(y, Obs):
 783            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 784        else:
 785            if isinstance(y, np.ndarray):
 786                return np.array([self + o for o in y])
 787            elif isinstance(y, complex):
 788                return CObs(self, 0) + y
 789            elif y.__class__.__name__ in ['Corr', 'CObs']:
 790                return NotImplemented
 791            else:
 792                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 793
 794    def __radd__(self, y):
 795        return self + y
 796
 797    def __mul__(self, y):
 798        if isinstance(y, Obs):
 799            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 800        else:
 801            if isinstance(y, np.ndarray):
 802                return np.array([self * o for o in y])
 803            elif isinstance(y, complex):
 804                return CObs(self * y.real, self * y.imag)
 805            elif y.__class__.__name__ in ['Corr', 'CObs']:
 806                return NotImplemented
 807            else:
 808                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 809
 810    def __rmul__(self, y):
 811        return self * y
 812
 813    def __sub__(self, y):
 814        if isinstance(y, Obs):
 815            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 816        else:
 817            if isinstance(y, np.ndarray):
 818                return np.array([self - o for o in y])
 819            elif y.__class__.__name__ in ['Corr', 'CObs']:
 820                return NotImplemented
 821            else:
 822                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 823
 824    def __rsub__(self, y):
 825        return -1 * (self - y)
 826
 827    def __pos__(self):
 828        return self
 829
 830    def __neg__(self):
 831        return -1 * self
 832
 833    def __truediv__(self, y):
 834        if isinstance(y, Obs):
 835            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 836        else:
 837            if isinstance(y, np.ndarray):
 838                return np.array([self / o for o in y])
 839            elif y.__class__.__name__ in ['Corr', 'CObs']:
 840                return NotImplemented
 841            else:
 842                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 843
 844    def __rtruediv__(self, y):
 845        if isinstance(y, Obs):
 846            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 847        else:
 848            if isinstance(y, np.ndarray):
 849                return np.array([o / self for o in y])
 850            elif y.__class__.__name__ in ['Corr', 'CObs']:
 851                return NotImplemented
 852            else:
 853                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 854
 855    def __pow__(self, y):
 856        if isinstance(y, Obs):
 857            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 858        else:
 859            return derived_observable(lambda x: x[0] ** y, [self])
 860
 861    def __rpow__(self, y):
 862        if isinstance(y, Obs):
 863            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 864        else:
 865            return derived_observable(lambda x: y ** x[0], [self])
 866
 867    def __abs__(self):
 868        return derived_observable(lambda x: anp.abs(x[0]), [self])
 869
 870    # Overload numpy functions
 871    def sqrt(self):
 872        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 873
 874    def log(self):
 875        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 876
 877    def exp(self):
 878        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 879
 880    def sin(self):
 881        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 882
 883    def cos(self):
 884        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 885
 886    def tan(self):
 887        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 888
 889    def arcsin(self):
 890        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 891
 892    def arccos(self):
 893        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 894
 895    def arctan(self):
 896        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 897
 898    def sinh(self):
 899        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 900
 901    def cosh(self):
 902        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 903
 904    def tanh(self):
 905        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 906
 907    def arcsinh(self):
 908        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 909
 910    def arccosh(self):
 911        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 912
 913    def arctanh(self):
 914        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 915
 916
 917class CObs:
 918    """Class for a complex valued observable."""
 919    __slots__ = ['_real', '_imag', 'tag']
 920
 921    def __init__(self, real, imag=0.0):
 922        self._real = real
 923        self._imag = imag
 924        self.tag = None
 925
 926    @property
 927    def real(self):
 928        return self._real
 929
 930    @property
 931    def imag(self):
 932        return self._imag
 933
 934    def gamma_method(self, **kwargs):
 935        """Executes the gamma_method for the real and the imaginary part."""
 936        if isinstance(self.real, Obs):
 937            self.real.gamma_method(**kwargs)
 938        if isinstance(self.imag, Obs):
 939            self.imag.gamma_method(**kwargs)
 940
 941    def is_zero(self):
 942        """Checks whether both real and imaginary part are zero within machine precision."""
 943        return self.real == 0.0 and self.imag == 0.0
 944
 945    def conjugate(self):
 946        return CObs(self.real, -self.imag)
 947
 948    def __add__(self, other):
 949        if isinstance(other, np.ndarray):
 950            return other + self
 951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 952            return CObs(self.real + other.real,
 953                        self.imag + other.imag)
 954        else:
 955            return CObs(self.real + other, self.imag)
 956
 957    def __radd__(self, y):
 958        return self + y
 959
 960    def __sub__(self, other):
 961        if isinstance(other, np.ndarray):
 962            return -1 * (other - self)
 963        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 964            return CObs(self.real - other.real, self.imag - other.imag)
 965        else:
 966            return CObs(self.real - other, self.imag)
 967
 968    def __rsub__(self, other):
 969        return -1 * (self - other)
 970
 971    def __mul__(self, other):
 972        if isinstance(other, np.ndarray):
 973            return other * self
 974        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 975            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 976                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 977                                               [self.real, other.real, self.imag, other.imag],
 978                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 979                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 982            elif getattr(other, 'imag', 0) != 0:
 983                return CObs(self.real * other.real - self.imag * other.imag,
 984                            self.imag * other.real + self.real * other.imag)
 985            else:
 986                return CObs(self.real * other.real, self.imag * other.real)
 987        else:
 988            return CObs(self.real * other, self.imag * other)
 989
 990    def __rmul__(self, other):
 991        return self * other
 992
 993    def __truediv__(self, other):
 994        if isinstance(other, np.ndarray):
 995            return 1 / (other / self)
 996        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 997            r = other.real ** 2 + other.imag ** 2
 998            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 999        else:
1000            return CObs(self.real / other, self.imag / other)
1001
1002    def __rtruediv__(self, other):
1003        r = self.real ** 2 + self.imag ** 2
1004        if hasattr(other, 'real') and hasattr(other, 'imag'):
1005            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1006        else:
1007            return CObs(self.real * other / r, -self.imag * other / r)
1008
1009    def __abs__(self):
1010        return np.sqrt(self.real**2 + self.imag**2)
1011
1012    def __pos__(self):
1013        return self
1014
1015    def __neg__(self):
1016        return -1 * self
1017
1018    def __eq__(self, other):
1019        return self.real == other.real and self.imag == other.imag
1020
1021    def __str__(self):
1022        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1023
1024    def __repr__(self):
1025        return 'CObs[' + str(self) + ']'
1026
1027    def __format__(self, format_type):
1028        if format_type == "":
1029            significance = 2
1030            format_type = "2"
1031        else:
1032            significance = int(float(format_type.replace("+", "").replace("-", "")))
1033        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1034
1035
1036def gamma_method(x, **kwargs):
1037    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1038
1039    See docstring of pe.Obs.gamma_method for details.
1040    """
1041    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1042
1043
1044gm = gamma_method
1045
1046
1047def _format_uncertainty(value, dvalue, significance=2):
1048    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1049    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1050        return str(value)
1051    if not isinstance(significance, int):
1052        raise TypeError("significance needs to be an integer.")
1053    if significance < 1:
1054        raise ValueError("significance needs to be larger than zero.")
1055    fexp = np.floor(np.log10(dvalue))
1056    if fexp < 0.0:
1057        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1058    elif fexp == 0.0:
1059        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1060    else:
1061        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1062
1063
1064def _expand_deltas(deltas, idx, shape, gapsize):
1065    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1066       configurations and where holes are filled by 0.
1067       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1068
1069    Parameters
1070    ----------
1071    deltas : list
1072        List of fluctuations
1073    idx : list
1074        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1075    shape : int
1076        Number of configs in idx.
1077    gapsize : int
1078        The target distance between two configurations. If longer distances
1079        are found in idx, the data is expanded.
1080    """
1081    if isinstance(idx, range):
1082        if (idx.step == gapsize):
1083            return deltas
1084    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1085    for i in range(shape):
1086        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1087    return ret
1088
1089
1090def _merge_idx(idl):
1091    """Returns the union of all lists in idl as range or sorted list
1092
1093    Parameters
1094    ----------
1095    idl : list
1096        List of lists or ranges.
1097    """
1098
1099    if _check_lists_equal(idl):
1100        return idl[0]
1101
1102    idunion = sorted(set().union(*idl))
1103
1104    # Check whether idunion can be expressed as range
1105    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1106    idtest = [list(idrange), idunion]
1107    if _check_lists_equal(idtest):
1108        return idrange
1109
1110    return idunion
1111
1112
1113def _intersection_idx(idl):
1114    """Returns the intersection of all lists in idl as range or sorted list
1115
1116    Parameters
1117    ----------
1118    idl : list
1119        List of lists or ranges.
1120    """
1121
1122    if _check_lists_equal(idl):
1123        return idl[0]
1124
1125    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1126
1127    # Check whether idinter can be expressed as range
1128    try:
1129        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1130        idtest = [list(idrange), idinter]
1131        if _check_lists_equal(idtest):
1132            return idrange
1133    except IndexError:
1134        pass
1135
1136    return idinter
1137
1138
1139def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1140    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1141       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1142       common divisor of the step sizes is used as new step size.
1143
1144    Parameters
1145    ----------
1146    deltas : list
1147        List of fluctuations
1148    idx : list
1149        List or range of configs on which the deltas are defined.
1150        Has to be a subset of new_idx and has to be sorted in ascending order.
1151    shape : list
1152        Number of configs in idx.
1153    new_idx : list
1154        List of configs that defines the new range, has to be sorted in ascending order.
1155    """
1156
1157    if type(idx) is range and type(new_idx) is range:
1158        if idx == new_idx:
1159            return deltas
1160    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1161    for i in range(shape):
1162        ret[idx[i] - new_idx[0]] = deltas[i]
1163    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1164
1165
1166def derived_observable(func, data, array_mode=False, **kwargs):
1167    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1168
1169    Parameters
1170    ----------
1171    func : object
1172        arbitrary function of the form func(data, **kwargs). For the
1173        automatic differentiation to work, all numpy functions have to have
1174        the autograd wrapper (use 'import autograd.numpy as anp').
1175    data : list
1176        list of Obs, e.g. [obs1, obs2, obs3].
1177    num_grad : bool
1178        if True, numerical derivatives are used instead of autograd
1179        (default False). To control the numerical differentiation the
1180        kwargs of numdifftools.step_generators.MaxStepGenerator
1181        can be used.
1182    man_grad : list
1183        manually supply a list or an array which contains the jacobian
1184        of func. Use cautiously, supplying the wrong derivative will
1185        not be intercepted.
1186
1187    Notes
1188    -----
1189    For simple mathematical operations it can be practical to use anonymous
1190    functions. For the ratio of two observables one can e.g. use
1191
1192    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1193    """
1194
1195    data = np.asarray(data)
1196    raveled_data = data.ravel()
1197
1198    # Workaround for matrix operations containing non Obs data
1199    if not all(isinstance(x, Obs) for x in raveled_data):
1200        for i in range(len(raveled_data)):
1201            if isinstance(raveled_data[i], (int, float)):
1202                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1203
1204    allcov = {}
1205    for o in raveled_data:
1206        for name in o.cov_names:
1207            if name in allcov:
1208                if not np.allclose(allcov[name], o.covobs[name].cov):
1209                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1210            else:
1211                allcov[name] = o.covobs[name].cov
1212
1213    n_obs = len(raveled_data)
1214    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1215    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1216    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1217
1218    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1219
1220    if data.ndim == 1:
1221        values = np.array([o.value for o in data])
1222    else:
1223        values = np.vectorize(lambda x: x.value)(data)
1224
1225    new_values = func(values, **kwargs)
1226
1227    multi = int(isinstance(new_values, np.ndarray))
1228
1229    new_r_values = {}
1230    new_idl_d = {}
1231    for name in new_sample_names:
1232        idl = []
1233        tmp_values = np.zeros(n_obs)
1234        for i, item in enumerate(raveled_data):
1235            tmp_values[i] = item.r_values.get(name, item.value)
1236            tmp_idl = item.idl.get(name)
1237            if tmp_idl is not None:
1238                idl.append(tmp_idl)
1239        if multi > 0:
1240            tmp_values = np.array(tmp_values).reshape(data.shape)
1241        new_r_values[name] = func(tmp_values, **kwargs)
1242        new_idl_d[name] = _merge_idx(idl)
1243
1244    if 'man_grad' in kwargs:
1245        deriv = np.asarray(kwargs.get('man_grad'))
1246        if new_values.shape + data.shape != deriv.shape:
1247            raise Exception('Manual derivative does not have correct shape.')
1248    elif kwargs.get('num_grad') is True:
1249        if multi > 0:
1250            raise Exception('Multi mode currently not supported for numerical derivative')
1251        options = {
1252            'base_step': 0.1,
1253            'step_ratio': 2.5}
1254        for key in options.keys():
1255            kwarg = kwargs.get(key)
1256            if kwarg is not None:
1257                options[key] = kwarg
1258        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1259        if tmp_df.size == 1:
1260            deriv = np.array([tmp_df.real])
1261        else:
1262            deriv = tmp_df.real
1263    else:
1264        deriv = jacobian(func)(values, **kwargs)
1265
1266    final_result = np.zeros(new_values.shape, dtype=object)
1267
1268    if array_mode is True:
1269
1270        class _Zero_grad():
1271            def __init__(self, N):
1272                self.grad = np.zeros((N, 1))
1273
1274        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1275        d_extracted = {}
1276        g_extracted = {}
1277        for name in new_sample_names:
1278            d_extracted[name] = []
1279            ens_length = len(new_idl_d[name])
1280            for i_dat, dat in enumerate(data):
1281                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1282        for name in new_cov_names:
1283            g_extracted[name] = []
1284            zero_grad = _Zero_grad(new_covobs_lengths[name])
1285            for i_dat, dat in enumerate(data):
1286                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1287
1288    for i_val, new_val in np.ndenumerate(new_values):
1289        new_deltas = {}
1290        new_grad = {}
1291        if array_mode is True:
1292            for name in new_sample_names:
1293                ens_length = d_extracted[name][0].shape[-1]
1294                new_deltas[name] = np.zeros(ens_length)
1295                for i_dat, dat in enumerate(d_extracted[name]):
1296                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1297            for name in new_cov_names:
1298                new_grad[name] = 0
1299                for i_dat, dat in enumerate(g_extracted[name]):
1300                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1301        else:
1302            for j_obs, obs in np.ndenumerate(data):
1303                for name in obs.names:
1304                    if name in obs.cov_names:
1305                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1306                    else:
1307                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1308
1309        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1310
1311        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1312            raise Exception('The same name has been used for deltas and covobs!')
1313        new_samples = []
1314        new_means = []
1315        new_idl = []
1316        new_names_obs = []
1317        for name in new_names:
1318            if name not in new_covobs:
1319                new_samples.append(new_deltas[name])
1320                new_idl.append(new_idl_d[name])
1321                new_means.append(new_r_values[name][i_val])
1322                new_names_obs.append(name)
1323        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1324        for name in new_covobs:
1325            final_result[i_val].names.append(name)
1326        final_result[i_val]._covobs = new_covobs
1327        final_result[i_val]._value = new_val
1328        final_result[i_val].reweighted = reweighted
1329
1330    if multi == 0:
1331        final_result = final_result.item()
1332
1333    return final_result
1334
1335
1336def _reduce_deltas(deltas, idx_old, idx_new):
1337    """Extract deltas defined on idx_old on all configs of idx_new.
1338
1339    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1340    are ordered in an ascending order.
1341
1342    Parameters
1343    ----------
1344    deltas : list
1345        List of fluctuations
1346    idx_old : list
1347        List or range of configs on which the deltas are defined
1348    idx_new : list
1349        List of configs for which we want to extract the deltas.
1350        Has to be a subset of idx_old.
1351    """
1352    if not len(deltas) == len(idx_old):
1353        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1354    if type(idx_old) is range and type(idx_new) is range:
1355        if idx_old == idx_new:
1356            return deltas
1357    if _check_lists_equal([idx_old, idx_new]):
1358        return deltas
1359    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1360    if len(indices) < len(idx_new):
1361        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1362    return np.array(deltas)[indices]
1363
1364
1365def reweight(weight, obs, **kwargs):
1366    """Reweight a list of observables.
1367
1368    Parameters
1369    ----------
1370    weight : Obs
1371        Reweighting factor. An Observable that has to be defined on a superset of the
1372        configurations in obs[i].idl for all i.
1373    obs : list
1374        list of Obs, e.g. [obs1, obs2, obs3].
1375    all_configs : bool
1376        if True, the reweighted observables are normalized by the average of
1377        the reweighting factor on all configurations in weight.idl and not
1378        on the configurations in obs[i].idl. Default False.
1379    """
1380    result = []
1381    for i in range(len(obs)):
1382        if len(obs[i].cov_names):
1383            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1384        if not set(obs[i].names).issubset(weight.names):
1385            raise Exception('Error: Ensembles do not fit')
1386        for name in obs[i].names:
1387            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1388                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1389        new_samples = []
1390        w_deltas = {}
1391        for name in sorted(obs[i].names):
1392            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1393            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1394        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1395
1396        if kwargs.get('all_configs'):
1397            new_weight = weight
1398        else:
1399            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1400
1401        result.append(tmp_obs / new_weight)
1402        result[-1].reweighted = True
1403
1404    return result
1405
1406
1407def correlate(obs_a, obs_b):
1408    """Correlate two observables.
1409
1410    Parameters
1411    ----------
1412    obs_a : Obs
1413        First observable
1414    obs_b : Obs
1415        Second observable
1416
1417    Notes
1418    -----
1419    Keep in mind to only correlate primary observables which have not been reweighted
1420    yet. The reweighting has to be applied after correlating the observables.
1421    Currently only works if ensembles are identical (this is not strictly necessary).
1422    """
1423
1424    if sorted(obs_a.names) != sorted(obs_b.names):
1425        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1426    if len(obs_a.cov_names) or len(obs_b.cov_names):
1427        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1428    for name in obs_a.names:
1429        if obs_a.shape[name] != obs_b.shape[name]:
1430            raise Exception('Shapes of ensemble', name, 'do not fit')
1431        if obs_a.idl[name] != obs_b.idl[name]:
1432            raise Exception('idl of ensemble', name, 'do not fit')
1433
1434    if obs_a.reweighted is True:
1435        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1436    if obs_b.reweighted is True:
1437        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1438
1439    new_samples = []
1440    new_idl = []
1441    for name in sorted(obs_a.names):
1442        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1443        new_idl.append(obs_a.idl[name])
1444
1445    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1446    o.reweighted = obs_a.reweighted or obs_b.reweighted
1447    return o
1448
1449
1450def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1451    r'''Calculates the error covariance matrix of a set of observables.
1452
1453    WARNING: This function should be used with care, especially for observables with support on multiple
1454             ensembles with differing autocorrelations. See the notes below for details.
1455
1456    The gamma method has to be applied first to all observables.
1457
1458    Parameters
1459    ----------
1460    obs : list or numpy.ndarray
1461        List or one dimensional array of Obs
1462    visualize : bool
1463        If True plots the corresponding normalized correlation matrix (default False).
1464    correlation : bool
1465        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1466    smooth : None or int
1467        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1468        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1469        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1470        small ones.
1471
1472    Notes
1473    -----
1474    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1475    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1476    in the absence of autocorrelation.
1477    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1478    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1479    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1480    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1481    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1482    '''
1483
1484    length = len(obs)
1485
1486    max_samples = np.max([o.N for o in obs])
1487    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1488        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1489
1490    cov = np.zeros((length, length))
1491    for i in range(length):
1492        for j in range(i, length):
1493            cov[i, j] = _covariance_element(obs[i], obs[j])
1494    cov = cov + cov.T - np.diag(np.diag(cov))
1495
1496    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1497
1498    if isinstance(smooth, int):
1499        corr = _smooth_eigenvalues(corr, smooth)
1500
1501    if visualize:
1502        plt.matshow(corr, vmin=-1, vmax=1)
1503        plt.set_cmap('RdBu')
1504        plt.colorbar()
1505        plt.draw()
1506
1507    if correlation is True:
1508        return corr
1509
1510    errors = [o.dvalue for o in obs]
1511    cov = np.diag(errors) @ corr @ np.diag(errors)
1512
1513    eigenvalues = np.linalg.eigh(cov)[0]
1514    if not np.all(eigenvalues >= 0):
1515        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1516
1517    return cov
1518
1519
1520def _smooth_eigenvalues(corr, E):
1521    """Eigenvalue smoothing as described in hep-lat/9412087
1522
1523    corr : np.ndarray
1524        correlation matrix
1525    E : integer
1526        Number of eigenvalues to be left substantially unchanged
1527    """
1528    if not (2 < E < corr.shape[0] - 1):
1529        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1530    vals, vec = np.linalg.eigh(corr)
1531    lambda_min = np.mean(vals[:-E])
1532    vals[vals < lambda_min] = lambda_min
1533    vals /= np.mean(vals)
1534    return vec @ np.diag(vals) @ vec.T
1535
1536
1537def _covariance_element(obs1, obs2):
1538    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1539
1540    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1541        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1542        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1543        return np.sum(deltas1 * deltas2)
1544
1545    if set(obs1.names).isdisjoint(set(obs2.names)):
1546        return 0.0
1547
1548    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1549        raise Exception('The gamma method has to be applied to both Obs first.')
1550
1551    dvalue = 0.0
1552
1553    for e_name in obs1.mc_names:
1554
1555        if e_name not in obs2.mc_names:
1556            continue
1557
1558        idl_d = {}
1559        for r_name in obs1.e_content[e_name]:
1560            if r_name not in obs2.e_content[e_name]:
1561                continue
1562            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1563
1564        gamma = 0.0
1565
1566        for r_name in obs1.e_content[e_name]:
1567            if r_name not in obs2.e_content[e_name]:
1568                continue
1569            if len(idl_d[r_name]) == 0:
1570                continue
1571            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1572
1573        if gamma == 0.0:
1574            continue
1575
1576        gamma_div = 0.0
1577        for r_name in obs1.e_content[e_name]:
1578            if r_name not in obs2.e_content[e_name]:
1579                continue
1580            if len(idl_d[r_name]) == 0:
1581                continue
1582            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1583        gamma /= gamma_div
1584
1585        dvalue += gamma
1586
1587    for e_name in obs1.cov_names:
1588
1589        if e_name not in obs2.cov_names:
1590            continue
1591
1592        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1593
1594    return dvalue
1595
1596
1597def import_jackknife(jacks, name, idl=None):
1598    """Imports jackknife samples and returns an Obs
1599
1600    Parameters
1601    ----------
1602    jacks : numpy.ndarray
1603        numpy array containing the mean value as zeroth entry and
1604        the N jackknife samples as first to Nth entry.
1605    name : str
1606        name of the ensemble the samples are defined on.
1607    """
1608    length = len(jacks) - 1
1609    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1610    samples = jacks[1:] @ prj
1611    mean = np.mean(samples)
1612    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1613    new_obs._value = jacks[0]
1614    return new_obs
1615
1616
1617def import_bootstrap(boots, name, random_numbers):
1618    """Imports bootstrap samples and returns an Obs
1619
1620    Parameters
1621    ----------
1622    boots : numpy.ndarray
1623        numpy array containing the mean value as zeroth entry and
1624        the N bootstrap samples as first to Nth entry.
1625    name : str
1626        name of the ensemble the samples are defined on.
1627    random_numbers : np.ndarray
1628        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1629        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1630        chain to be reconstructed.
1631    """
1632    samples, length = random_numbers.shape
1633    if samples != len(boots) - 1:
1634        raise ValueError("Random numbers do not have the correct shape.")
1635
1636    if samples < length:
1637        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1638
1639    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1640
1641    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1642    ret = Obs([samples], [name])
1643    ret._value = boots[0]
1644    return ret
1645
1646
1647def merge_obs(list_of_obs):
1648    """Combine all observables in list_of_obs into one new observable
1649
1650    Parameters
1651    ----------
1652    list_of_obs : list
1653        list of the Obs object to be combined
1654
1655    Notes
1656    -----
1657    It is not possible to combine obs which are based on the same replicum
1658    """
1659    replist = [item for obs in list_of_obs for item in obs.names]
1660    if (len(replist) == len(set(replist))) is False:
1661        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1662    if any([len(o.cov_names) for o in list_of_obs]):
1663        raise Exception('Not possible to merge data that contains covobs!')
1664    new_dict = {}
1665    idl_dict = {}
1666    for o in list_of_obs:
1667        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1668                        for key in set(o.deltas) | set(o.r_values)})
1669        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1670
1671    names = sorted(new_dict.keys())
1672    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1673    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1674    return o
1675
1676
1677def cov_Obs(means, cov, name, grad=None):
1678    """Create an Obs based on mean(s) and a covariance matrix
1679
1680    Parameters
1681    ----------
1682    mean : list of floats or float
1683        N mean value(s) of the new Obs
1684    cov : list or array
1685        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1686    name : str
1687        identifier for the covariance matrix
1688    grad : list or array
1689        Gradient of the Covobs wrt. the means belonging to cov.
1690    """
1691
1692    def covobs_to_obs(co):
1693        """Make an Obs out of a Covobs
1694
1695        Parameters
1696        ----------
1697        co : Covobs
1698            Covobs to be embedded into the Obs
1699        """
1700        o = Obs([], [], means=[])
1701        o._value = co.value
1702        o.names.append(co.name)
1703        o._covobs[co.name] = co
1704        o._dvalue = np.sqrt(co.errsq())
1705        return o
1706
1707    ol = []
1708    if isinstance(means, (float, int)):
1709        means = [means]
1710
1711    for i in range(len(means)):
1712        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1713    if ol[0].covobs[name].N != len(means):
1714        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1715    if len(ol) == 1:
1716        return ol[0]
1717    return ol
1718
1719
1720def _determine_gap(o, e_content, e_name):
1721    gaps = []
1722    for r_name in e_content[e_name]:
1723        if isinstance(o.idl[r_name], range):
1724            gaps.append(o.idl[r_name].step)
1725        else:
1726            gaps.append(np.min(np.diff(o.idl[r_name])))
1727
1728    gap = min(gaps)
1729    if not np.all([gi % gap == 0 for gi in gaps]):
1730        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1731
1732    return gap
1733
1734
1735def _check_lists_equal(idl):
1736    '''
1737    Use groupby to efficiently check whether all elements of idl are identical.
1738    Returns True if all elements are equal, otherwise False.
1739
1740    Parameters
1741    ----------
1742    idl : list of lists, ranges or np.ndarrays
1743    '''
1744    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1745    if next(g, True) and not next(g, False):
1746        return True
1747    return False

class CObs: View Source

 918class CObs:
 919    """Class for a complex valued observable."""
 920    __slots__ = ['_real', '_imag', 'tag']
 921
 922    def __init__(self, real, imag=0.0):
 923        self._real = real
 924        self._imag = imag
 925        self.tag = None
 926
 927    @property
 928    def real(self):
 929        return self._real
 930
 931    @property
 932    def imag(self):
 933        return self._imag
 934
 935    def gamma_method(self, **kwargs):
 936        """Executes the gamma_method for the real and the imaginary part."""
 937        if isinstance(self.real, Obs):
 938            self.real.gamma_method(**kwargs)
 939        if isinstance(self.imag, Obs):
 940            self.imag.gamma_method(**kwargs)
 941
 942    def is_zero(self):
 943        """Checks whether both real and imaginary part are zero within machine precision."""
 944        return self.real == 0.0 and self.imag == 0.0
 945
 946    def conjugate(self):
 947        return CObs(self.real, -self.imag)
 948
 949    def __add__(self, other):
 950        if isinstance(other, np.ndarray):
 951            return other + self
 952        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 953            return CObs(self.real + other.real,
 954                        self.imag + other.imag)
 955        else:
 956            return CObs(self.real + other, self.imag)
 957
 958    def __radd__(self, y):
 959        return self + y
 960
 961    def __sub__(self, other):
 962        if isinstance(other, np.ndarray):
 963            return -1 * (other - self)
 964        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 965            return CObs(self.real - other.real, self.imag - other.imag)
 966        else:
 967            return CObs(self.real - other, self.imag)
 968
 969    def __rsub__(self, other):
 970        return -1 * (self - other)
 971
 972    def __mul__(self, other):
 973        if isinstance(other, np.ndarray):
 974            return other * self
 975        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 976            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 977                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 978                                               [self.real, other.real, self.imag, other.imag],
 979                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 980                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 981                                               [self.real, other.real, self.imag, other.imag],
 982                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 983            elif getattr(other, 'imag', 0) != 0:
 984                return CObs(self.real * other.real - self.imag * other.imag,
 985                            self.imag * other.real + self.real * other.imag)
 986            else:
 987                return CObs(self.real * other.real, self.imag * other.real)
 988        else:
 989            return CObs(self.real * other, self.imag * other)
 990
 991    def __rmul__(self, other):
 992        return self * other
 993
 994    def __truediv__(self, other):
 995        if isinstance(other, np.ndarray):
 996            return 1 / (other / self)
 997        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 998            r = other.real ** 2 + other.imag ** 2
 999            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1000        else:
1001            return CObs(self.real / other, self.imag / other)
1002
1003    def __rtruediv__(self, other):
1004        r = self.real ** 2 + self.imag ** 2
1005        if hasattr(other, 'real') and hasattr(other, 'imag'):
1006            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1007        else:
1008            return CObs(self.real * other / r, -self.imag * other / r)
1009
1010    def __abs__(self):
1011        return np.sqrt(self.real**2 + self.imag**2)
1012
1013    def __pos__(self):
1014        return self
1015
1016    def __neg__(self):
1017        return -1 * self
1018
1019    def __eq__(self, other):
1020        return self.real == other.real and self.imag == other.imag
1021
1022    def __str__(self):
1023        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1024
1025    def __repr__(self):
1026        return 'CObs[' + str(self) + ']'
1027
1028    def __format__(self, format_type):
1029        if format_type == "":
1030            significance = 2
1031            format_type = "2"
1032        else:
1033            significance = int(float(format_type.replace("+", "").replace("-", "")))
1034        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

922    def __init__(self, real, imag=0.0):
923        self._real = real
924        self._imag = imag
925        self.tag = None

tag

real

imag

def gamma_method(self, **kwargs): View Source

935    def gamma_method(self, **kwargs):
936        """Executes the gamma_method for the real and the imaginary part."""
937        if isinstance(self.real, Obs):
938            self.real.gamma_method(**kwargs)
939        if isinstance(self.imag, Obs):
940            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

942    def is_zero(self):
943        """Checks whether both real and imaginary part are zero within machine precision."""
944        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

946    def conjugate(self):
947        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1037def gamma_method(x, **kwargs):
1038    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1039
1040    See docstring of pe.Obs.gamma_method for details.
1041    """
1042    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1037def gamma_method(x, **kwargs):
1038    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1039
1040    See docstring of pe.Obs.gamma_method for details.
1041    """
1042    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1167def derived_observable(func, data, array_mode=False, **kwargs):
1168    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1169
1170    Parameters
1171    ----------
1172    func : object
1173        arbitrary function of the form func(data, **kwargs). For the
1174        automatic differentiation to work, all numpy functions have to have
1175        the autograd wrapper (use 'import autograd.numpy as anp').
1176    data : list
1177        list of Obs, e.g. [obs1, obs2, obs3].
1178    num_grad : bool
1179        if True, numerical derivatives are used instead of autograd
1180        (default False). To control the numerical differentiation the
1181        kwargs of numdifftools.step_generators.MaxStepGenerator
1182        can be used.
1183    man_grad : list
1184        manually supply a list or an array which contains the jacobian
1185        of func. Use cautiously, supplying the wrong derivative will
1186        not be intercepted.
1187
1188    Notes
1189    -----
1190    For simple mathematical operations it can be practical to use anonymous
1191    functions. For the ratio of two observables one can e.g. use
1192
1193    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1194    """
1195
1196    data = np.asarray(data)
1197    raveled_data = data.ravel()
1198
1199    # Workaround for matrix operations containing non Obs data
1200    if not all(isinstance(x, Obs) for x in raveled_data):
1201        for i in range(len(raveled_data)):
1202            if isinstance(raveled_data[i], (int, float)):
1203                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1204
1205    allcov = {}
1206    for o in raveled_data:
1207        for name in o.cov_names:
1208            if name in allcov:
1209                if not np.allclose(allcov[name], o.covobs[name].cov):
1210                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1211            else:
1212                allcov[name] = o.covobs[name].cov
1213
1214    n_obs = len(raveled_data)
1215    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1216    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1217    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1218
1219    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1220
1221    if data.ndim == 1:
1222        values = np.array([o.value for o in data])
1223    else:
1224        values = np.vectorize(lambda x: x.value)(data)
1225
1226    new_values = func(values, **kwargs)
1227
1228    multi = int(isinstance(new_values, np.ndarray))
1229
1230    new_r_values = {}
1231    new_idl_d = {}
1232    for name in new_sample_names:
1233        idl = []
1234        tmp_values = np.zeros(n_obs)
1235        for i, item in enumerate(raveled_data):
1236            tmp_values[i] = item.r_values.get(name, item.value)
1237            tmp_idl = item.idl.get(name)
1238            if tmp_idl is not None:
1239                idl.append(tmp_idl)
1240        if multi > 0:
1241            tmp_values = np.array(tmp_values).reshape(data.shape)
1242        new_r_values[name] = func(tmp_values, **kwargs)
1243        new_idl_d[name] = _merge_idx(idl)
1244
1245    if 'man_grad' in kwargs:
1246        deriv = np.asarray(kwargs.get('man_grad'))
1247        if new_values.shape + data.shape != deriv.shape:
1248            raise Exception('Manual derivative does not have correct shape.')
1249    elif kwargs.get('num_grad') is True:
1250        if multi > 0:
1251            raise Exception('Multi mode currently not supported for numerical derivative')
1252        options = {
1253            'base_step': 0.1,
1254            'step_ratio': 2.5}
1255        for key in options.keys():
1256            kwarg = kwargs.get(key)
1257            if kwarg is not None:
1258                options[key] = kwarg
1259        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1260        if tmp_df.size == 1:
1261            deriv = np.array([tmp_df.real])
1262        else:
1263            deriv = tmp_df.real
1264    else:
1265        deriv = jacobian(func)(values, **kwargs)
1266
1267    final_result = np.zeros(new_values.shape, dtype=object)
1268
1269    if array_mode is True:
1270
1271        class _Zero_grad():
1272            def __init__(self, N):
1273                self.grad = np.zeros((N, 1))
1274
1275        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1276        d_extracted = {}
1277        g_extracted = {}
1278        for name in new_sample_names:
1279            d_extracted[name] = []
1280            ens_length = len(new_idl_d[name])
1281            for i_dat, dat in enumerate(data):
1282                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1283        for name in new_cov_names:
1284            g_extracted[name] = []
1285            zero_grad = _Zero_grad(new_covobs_lengths[name])
1286            for i_dat, dat in enumerate(data):
1287                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1288
1289    for i_val, new_val in np.ndenumerate(new_values):
1290        new_deltas = {}
1291        new_grad = {}
1292        if array_mode is True:
1293            for name in new_sample_names:
1294                ens_length = d_extracted[name][0].shape[-1]
1295                new_deltas[name] = np.zeros(ens_length)
1296                for i_dat, dat in enumerate(d_extracted[name]):
1297                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1298            for name in new_cov_names:
1299                new_grad[name] = 0
1300                for i_dat, dat in enumerate(g_extracted[name]):
1301                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1302        else:
1303            for j_obs, obs in np.ndenumerate(data):
1304                for name in obs.names:
1305                    if name in obs.cov_names:
1306                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1307                    else:
1308                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1309
1310        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1311
1312        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1313            raise Exception('The same name has been used for deltas and covobs!')
1314        new_samples = []
1315        new_means = []
1316        new_idl = []
1317        new_names_obs = []
1318        for name in new_names:
1319            if name not in new_covobs:
1320                new_samples.append(new_deltas[name])
1321                new_idl.append(new_idl_d[name])
1322                new_means.append(new_r_values[name][i_val])
1323                new_names_obs.append(name)
1324        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1325        for name in new_covobs:
1326            final_result[i_val].names.append(name)
1327        final_result[i_val]._covobs = new_covobs
1328        final_result[i_val]._value = new_val
1329        final_result[i_val].reweighted = reweighted
1330
1331    if multi == 0:
1332        final_result = final_result.item()
1333
1334    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1366def reweight(weight, obs, **kwargs):
1367    """Reweight a list of observables.
1368
1369    Parameters
1370    ----------
1371    weight : Obs
1372        Reweighting factor. An Observable that has to be defined on a superset of the
1373        configurations in obs[i].idl for all i.
1374    obs : list
1375        list of Obs, e.g. [obs1, obs2, obs3].
1376    all_configs : bool
1377        if True, the reweighted observables are normalized by the average of
1378        the reweighting factor on all configurations in weight.idl and not
1379        on the configurations in obs[i].idl. Default False.
1380    """
1381    result = []
1382    for i in range(len(obs)):
1383        if len(obs[i].cov_names):
1384            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1385        if not set(obs[i].names).issubset(weight.names):
1386            raise Exception('Error: Ensembles do not fit')
1387        for name in obs[i].names:
1388            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1389                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1390        new_samples = []
1391        w_deltas = {}
1392        for name in sorted(obs[i].names):
1393            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1394            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1395        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1396
1397        if kwargs.get('all_configs'):
1398            new_weight = weight
1399        else:
1400            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1401
1402        result.append(tmp_obs / new_weight)
1403        result[-1].reweighted = True
1404
1405    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1408def correlate(obs_a, obs_b):
1409    """Correlate two observables.
1410
1411    Parameters
1412    ----------
1413    obs_a : Obs
1414        First observable
1415    obs_b : Obs
1416        Second observable
1417
1418    Notes
1419    -----
1420    Keep in mind to only correlate primary observables which have not been reweighted
1421    yet. The reweighting has to be applied after correlating the observables.
1422    Currently only works if ensembles are identical (this is not strictly necessary).
1423    """
1424
1425    if sorted(obs_a.names) != sorted(obs_b.names):
1426        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1427    if len(obs_a.cov_names) or len(obs_b.cov_names):
1428        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1429    for name in obs_a.names:
1430        if obs_a.shape[name] != obs_b.shape[name]:
1431            raise Exception('Shapes of ensemble', name, 'do not fit')
1432        if obs_a.idl[name] != obs_b.idl[name]:
1433            raise Exception('idl of ensemble', name, 'do not fit')
1434
1435    if obs_a.reweighted is True:
1436        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1437    if obs_b.reweighted is True:
1438        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1439
1440    new_samples = []
1441    new_idl = []
1442    for name in sorted(obs_a.names):
1443        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1444        new_idl.append(obs_a.idl[name])
1445
1446    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1447    o.reweighted = obs_a.reweighted or obs_b.reweighted
1448    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1451def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1452    r'''Calculates the error covariance matrix of a set of observables.
1453
1454    WARNING: This function should be used with care, especially for observables with support on multiple
1455             ensembles with differing autocorrelations. See the notes below for details.
1456
1457    The gamma method has to be applied first to all observables.
1458
1459    Parameters
1460    ----------
1461    obs : list or numpy.ndarray
1462        List or one dimensional array of Obs
1463    visualize : bool
1464        If True plots the corresponding normalized correlation matrix (default False).
1465    correlation : bool
1466        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1467    smooth : None or int
1468        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1469        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1470        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1471        small ones.
1472
1473    Notes
1474    -----
1475    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1476    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1477    in the absence of autocorrelation.
1478    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1479    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1480    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1481    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1482    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1483    '''
1484
1485    length = len(obs)
1486
1487    max_samples = np.max([o.N for o in obs])
1488    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1489        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1490
1491    cov = np.zeros((length, length))
1492    for i in range(length):
1493        for j in range(i, length):
1494            cov[i, j] = _covariance_element(obs[i], obs[j])
1495    cov = cov + cov.T - np.diag(np.diag(cov))
1496
1497    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1498
1499    if isinstance(smooth, int):
1500        corr = _smooth_eigenvalues(corr, smooth)
1501
1502    if visualize:
1503        plt.matshow(corr, vmin=-1, vmax=1)
1504        plt.set_cmap('RdBu')
1505        plt.colorbar()
1506        plt.draw()
1507
1508    if correlation is True:
1509        return corr
1510
1511    errors = [o.dvalue for o in obs]
1512    cov = np.diag(errors) @ corr @ np.diag(errors)
1513
1514    eigenvalues = np.linalg.eigh(cov)[0]
1515    if not np.all(eigenvalues >= 0):
1516        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1517
1518    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1598def import_jackknife(jacks, name, idl=None):
1599    """Imports jackknife samples and returns an Obs
1600
1601    Parameters
1602    ----------
1603    jacks : numpy.ndarray
1604        numpy array containing the mean value as zeroth entry and
1605        the N jackknife samples as first to Nth entry.
1606    name : str
1607        name of the ensemble the samples are defined on.
1608    """
1609    length = len(jacks) - 1
1610    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1611    samples = jacks[1:] @ prj
1612    mean = np.mean(samples)
1613    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1614    new_obs._value = jacks[0]
1615    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1618def import_bootstrap(boots, name, random_numbers):
1619    """Imports bootstrap samples and returns an Obs
1620
1621    Parameters
1622    ----------
1623    boots : numpy.ndarray
1624        numpy array containing the mean value as zeroth entry and
1625        the N bootstrap samples as first to Nth entry.
1626    name : str
1627        name of the ensemble the samples are defined on.
1628    random_numbers : np.ndarray
1629        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1630        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1631        chain to be reconstructed.
1632    """
1633    samples, length = random_numbers.shape
1634    if samples != len(boots) - 1:
1635        raise ValueError("Random numbers do not have the correct shape.")
1636
1637    if samples < length:
1638        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1639
1640    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1641
1642    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1643    ret = Obs([samples], [name])
1644    ret._value = boots[0]
1645    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1648def merge_obs(list_of_obs):
1649    """Combine all observables in list_of_obs into one new observable
1650
1651    Parameters
1652    ----------
1653    list_of_obs : list
1654        list of the Obs object to be combined
1655
1656    Notes
1657    -----
1658    It is not possible to combine obs which are based on the same replicum
1659    """
1660    replist = [item for obs in list_of_obs for item in obs.names]
1661    if (len(replist) == len(set(replist))) is False:
1662        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1663    if any([len(o.cov_names) for o in list_of_obs]):
1664        raise Exception('Not possible to merge data that contains covobs!')
1665    new_dict = {}
1666    idl_dict = {}
1667    for o in list_of_obs:
1668        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1669                        for key in set(o.deltas) | set(o.r_values)})
1670        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1671
1672    names = sorted(new_dict.keys())
1673    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1674    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1675    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1678def cov_Obs(means, cov, name, grad=None):
1679    """Create an Obs based on mean(s) and a covariance matrix
1680
1681    Parameters
1682    ----------
1683    mean : list of floats or float
1684        N mean value(s) of the new Obs
1685    cov : list or array
1686        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1687    name : str
1688        identifier for the covariance matrix
1689    grad : list or array
1690        Gradient of the Covobs wrt. the means belonging to cov.
1691    """
1692
1693    def covobs_to_obs(co):
1694        """Make an Obs out of a Covobs
1695
1696        Parameters
1697        ----------
1698        co : Covobs
1699            Covobs to be embedded into the Obs
1700        """
1701        o = Obs([], [], means=[])
1702        o._value = co.value
1703        o.names.append(co.name)
1704        o._covobs[co.name] = co
1705        o._dvalue = np.sqrt(co.errsq())
1706        return o
1707
1708    ol = []
1709    if isinstance(means, (float, int)):
1710        means = [means]
1711
1712    for i in range(len(means)):
1713        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1714    if ol[0].covobs[name].N != len(means):
1715        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1716    if len(ol) == 1:
1717        return ol[0]
1718    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.