pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise Exception('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise Exception('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise Exception('names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise Exception('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.idl = {}
 101        if idl is not None:
 102            for name, idx in sorted(zip(names, idl)):
 103                if isinstance(idx, range):
 104                    self.idl[name] = idx
 105                elif isinstance(idx, (list, np.ndarray)):
 106                    dc = np.unique(np.diff(idx))
 107                    if np.any(dc < 0):
 108                        raise Exception("Unsorted idx for idl[%s]" % (name))
 109                    if len(dc) == 1:
 110                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 111                    else:
 112                        self.idl[name] = list(idx)
 113                else:
 114                    raise Exception('incompatible type for idl[%s].' % (name))
 115        else:
 116            for name, sample in sorted(zip(names, samples)):
 117                self.idl[name] = range(1, len(sample) + 1)
 118
 119        if kwargs.get("means") is not None:
 120            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 121                self.shape[name] = len(self.idl[name])
 122                self.N += self.shape[name]
 123                self.r_values[name] = mean
 124                self.deltas[name] = sample
 125        else:
 126            for name, sample in sorted(zip(names, samples)):
 127                self.shape[name] = len(self.idl[name])
 128                self.N += self.shape[name]
 129                if len(sample) != self.shape[name]:
 130                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 131                self.r_values[name] = np.mean(sample)
 132                self.deltas[name] = sample - self.r_values[name]
 133                self._value += self.shape[name] * self.r_values[name]
 134            self._value /= self.N
 135
 136        self._dvalue = 0.0
 137        self.ddvalue = 0.0
 138        self.reweighted = False
 139
 140        self.tag = None
 141
 142    @property
 143    def value(self):
 144        return self._value
 145
 146    @property
 147    def dvalue(self):
 148        return self._dvalue
 149
 150    @property
 151    def e_names(self):
 152        return sorted(set([o.split('|')[0] for o in self.names]))
 153
 154    @property
 155    def cov_names(self):
 156        return sorted(set([o for o in self.covobs.keys()]))
 157
 158    @property
 159    def mc_names(self):
 160        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 161
 162    @property
 163    def e_content(self):
 164        res = {}
 165        for e, e_name in enumerate(self.e_names):
 166            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 167            if e_name in self.names:
 168                res[e_name].append(e_name)
 169        return res
 170
 171    @property
 172    def covobs(self):
 173        return self._covobs
 174
 175    def gamma_method(self, **kwargs):
 176        """Estimate the error and related properties of the Obs.
 177
 178        Parameters
 179        ----------
 180        S : float
 181            specifies a custom value for the parameter S (default 2.0).
 182            If set to 0 it is assumed that the data exhibits no
 183            autocorrelation. In this case the error estimates coincides
 184            with the sample standard error.
 185        tau_exp : float
 186            positive value triggers the critical slowing down analysis
 187            (default 0.0).
 188        N_sigma : float
 189            number of standard deviations from zero until the tail is
 190            attached to the autocorrelation function (default 1).
 191        fft : bool
 192            determines whether the fft algorithm is used for the computation
 193            of the autocorrelation function (default True)
 194        """
 195
 196        e_content = self.e_content
 197        self.e_dvalue = {}
 198        self.e_ddvalue = {}
 199        self.e_tauint = {}
 200        self.e_dtauint = {}
 201        self.e_windowsize = {}
 202        self.e_n_tauint = {}
 203        self.e_n_dtauint = {}
 204        e_gamma = {}
 205        self.e_rho = {}
 206        self.e_drho = {}
 207        self._dvalue = 0
 208        self.ddvalue = 0
 209
 210        self.S = {}
 211        self.tau_exp = {}
 212        self.N_sigma = {}
 213
 214        if kwargs.get('fft') is False:
 215            fft = False
 216        else:
 217            fft = True
 218
 219        def _parse_kwarg(kwarg_name):
 220            if kwarg_name in kwargs:
 221                tmp = kwargs.get(kwarg_name)
 222                if isinstance(tmp, (int, float)):
 223                    if tmp < 0:
 224                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 225                    for e, e_name in enumerate(self.e_names):
 226                        getattr(self, kwarg_name)[e_name] = tmp
 227                else:
 228                    raise TypeError(kwarg_name + ' is not in proper format.')
 229            else:
 230                for e, e_name in enumerate(self.e_names):
 231                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 232                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 233                    else:
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 235
 236        _parse_kwarg('S')
 237        _parse_kwarg('tau_exp')
 238        _parse_kwarg('N_sigma')
 239
 240        for e, e_name in enumerate(self.mc_names):
 241            r_length = []
 242            for r_name in e_content[e_name]:
 243                if isinstance(self.idl[r_name], range):
 244                    r_length.append(len(self.idl[r_name]))
 245                else:
 246                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 247
 248            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 249            w_max = max(r_length) // 2
 250            e_gamma[e_name] = np.zeros(w_max)
 251            self.e_rho[e_name] = np.zeros(w_max)
 252            self.e_drho[e_name] = np.zeros(w_max)
 253
 254            for r_name in e_content[e_name]:
 255                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 256
 257            gamma_div = np.zeros(w_max)
 258            for r_name in e_content[e_name]:
 259                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 260            gamma_div[gamma_div < 1] = 1.0
 261            e_gamma[e_name] /= gamma_div[:w_max]
 262
 263            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 264                self.e_tauint[e_name] = 0.5
 265                self.e_dtauint[e_name] = 0.0
 266                self.e_dvalue[e_name] = 0.0
 267                self.e_ddvalue[e_name] = 0.0
 268                self.e_windowsize[e_name] = 0
 269                continue
 270
 271            gaps = []
 272            for r_name in e_content[e_name]:
 273                if isinstance(self.idl[r_name], range):
 274                    gaps.append(1)
 275                else:
 276                    gaps.append(np.min(np.diff(self.idl[r_name])))
 277
 278            if not np.all([gi == gaps[0] for gi in gaps]):
 279                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 280            else:
 281                gapsize = gaps[0]
 282
 283            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 284            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 285            # Make sure no entry of tauint is smaller than 0.5
 286            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 287            # hep-lat/0306017 eq. (42)
 288            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 289            self.e_n_dtauint[e_name][0] = 0.0
 290
 291            def _compute_drho(i):
 292                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 293                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 294
 295            if self.tau_exp[e_name] > 0:
 296                _compute_drho(gapsize)
 297                texp = self.tau_exp[e_name]
 298                # Critical slowing down analysis
 299                if w_max // 2 <= 1:
 300                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 301                for n in range(gapsize, w_max // 2, gapsize):
 302                    _compute_drho(n + gapsize)
 303                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 304                        # Bias correction hep-lat/0306017 eq. (49) included
 305                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 306                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 307                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 308                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 309                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 310                        self.e_windowsize[e_name] = n
 311                        break
 312            else:
 313                if self.S[e_name] == 0.0:
 314                    self.e_tauint[e_name] = 0.5
 315                    self.e_dtauint[e_name] = 0.0
 316                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 317                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 318                    self.e_windowsize[e_name] = 0
 319                else:
 320                    # Standard automatic windowing procedure
 321                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 322                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 323                    for n in range(1, w_max):
 324                        if g_w[n - 1] < 0 or n >= w_max - 1:
 325                            _compute_drho(gapsize * n)
 326                            n *= gapsize
 327                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 328                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 329                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 330                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 331                            self.e_windowsize[e_name] = n
 332                            break
 333
 334            self._dvalue += self.e_dvalue[e_name] ** 2
 335            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 336
 337        for e_name in self.cov_names:
 338            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 339            self.e_ddvalue[e_name] = 0
 340            self._dvalue += self.e_dvalue[e_name]**2
 341
 342        self._dvalue = np.sqrt(self._dvalue)
 343        if self._dvalue == 0.0:
 344            self.ddvalue = 0.0
 345        else:
 346            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 347        return
 348
 349    gm = gamma_method
 350
 351    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 352        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 353           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 354
 355        Parameters
 356        ----------
 357        deltas : list
 358            List of fluctuations
 359        idx : list
 360            List or range of configurations on which the deltas are defined.
 361        shape : int
 362            Number of configurations in idx.
 363        w_max : int
 364            Upper bound for the summation window.
 365        fft : bool
 366            determines whether the fft algorithm is used for the computation
 367            of the autocorrelation function.
 368        """
 369        gamma = np.zeros(w_max)
 370        deltas = _expand_deltas(deltas, idx, shape)
 371        new_shape = len(deltas)
 372        if fft:
 373            max_gamma = min(new_shape, w_max)
 374            # The padding for the fft has to be even
 375            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 376            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 377        else:
 378            for n in range(w_max):
 379                if new_shape - n >= 0:
 380                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 381
 382        return gamma
 383
 384    def details(self, ens_content=True):
 385        """Output detailed properties of the Obs.
 386
 387        Parameters
 388        ----------
 389        ens_content : bool
 390            print details about the ensembles and replica if true.
 391        """
 392        if self.tag is not None:
 393            print("Description:", self.tag)
 394        if not hasattr(self, 'e_dvalue'):
 395            print('Result\t %3.8e' % (self.value))
 396        else:
 397            if self.value == 0.0:
 398                percentage = np.nan
 399            else:
 400                percentage = np.abs(self._dvalue / self.value) * 100
 401            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 402            if len(self.e_names) > 1:
 403                print(' Ensemble errors:')
 404            e_content = self.e_content
 405            for e_name in self.mc_names:
 406                if isinstance(self.idl[e_content[e_name][0]], range):
 407                    gap = self.idl[e_content[e_name][0]].step
 408                else:
 409                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 410
 411                if len(self.e_names) > 1:
 412                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 413                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 414                tau_string += f" in units of {gap} config"
 415                if gap > 1:
 416                    tau_string += "s"
 417                if self.tau_exp[e_name] > 0:
 418                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 419                else:
 420                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 421                print(tau_string)
 422            for e_name in self.cov_names:
 423                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 424        if ens_content is True:
 425            if len(self.e_names) == 1:
 426                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 427            else:
 428                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 429            my_string_list = []
 430            for key, value in sorted(self.e_content.items()):
 431                if key not in self.covobs:
 432                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 433                    if len(value) == 1:
 434                        my_string += f': {self.shape[value[0]]} configurations'
 435                        if isinstance(self.idl[value[0]], range):
 436                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 437                        else:
 438                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 439                    else:
 440                        sublist = []
 441                        for v in value:
 442                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 443                            my_substring += f': {self.shape[v]} configurations'
 444                            if isinstance(self.idl[v], range):
 445                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 446                            else:
 447                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 448                            sublist.append(my_substring)
 449
 450                        my_string += '\n' + '\n'.join(sublist)
 451                else:
 452                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 453                my_string_list.append(my_string)
 454            print('\n'.join(my_string_list))
 455
 456    def reweight(self, weight):
 457        """Reweight the obs with given rewighting factors.
 458
 459        Parameters
 460        ----------
 461        weight : Obs
 462            Reweighting factor. An Observable that has to be defined on a superset of the
 463            configurations in obs[i].idl for all i.
 464        all_configs : bool
 465            if True, the reweighted observables are normalized by the average of
 466            the reweighting factor on all configurations in weight.idl and not
 467            on the configurations in obs[i].idl. Default False.
 468        """
 469        return reweight(weight, [self])[0]
 470
 471    def is_zero_within_error(self, sigma=1):
 472        """Checks whether the observable is zero within 'sigma' standard errors.
 473
 474        Parameters
 475        ----------
 476        sigma : int
 477            Number of standard errors used for the check.
 478
 479        Works only properly when the gamma method was run.
 480        """
 481        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 482
 483    def is_zero(self, atol=1e-10):
 484        """Checks whether the observable is zero within a given tolerance.
 485
 486        Parameters
 487        ----------
 488        atol : float
 489            Absolute tolerance (for details see numpy documentation).
 490        """
 491        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 492
 493    def plot_tauint(self, save=None):
 494        """Plot integrated autocorrelation time for each ensemble.
 495
 496        Parameters
 497        ----------
 498        save : str
 499            saves the figure to a file named 'save' if.
 500        """
 501        if not hasattr(self, 'e_dvalue'):
 502            raise Exception('Run the gamma method first.')
 503
 504        for e, e_name in enumerate(self.mc_names):
 505            fig = plt.figure()
 506            plt.xlabel(r'$W$')
 507            plt.ylabel(r'$\tau_\mathrm{int}$')
 508            length = int(len(self.e_n_tauint[e_name]))
 509            if self.tau_exp[e_name] > 0:
 510                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 511                x_help = np.arange(2 * self.tau_exp[e_name])
 512                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 513                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 514                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 515                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 516                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 517                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 518                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 519            else:
 520                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 521                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 522
 523            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 524            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 525            plt.legend()
 526            plt.xlim(-0.5, xmax)
 527            ylim = plt.ylim()
 528            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 529            plt.draw()
 530            if save:
 531                fig.savefig(save + "_" + str(e))
 532
 533    def plot_rho(self, save=None):
 534        """Plot normalized autocorrelation function time for each ensemble.
 535
 536        Parameters
 537        ----------
 538        save : str
 539            saves the figure to a file named 'save' if.
 540        """
 541        if not hasattr(self, 'e_dvalue'):
 542            raise Exception('Run the gamma method first.')
 543        for e, e_name in enumerate(self.mc_names):
 544            fig = plt.figure()
 545            plt.xlabel('W')
 546            plt.ylabel('rho')
 547            length = int(len(self.e_drho[e_name]))
 548            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 549            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 550            if self.tau_exp[e_name] > 0:
 551                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 552                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 553                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 554                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 555            else:
 556                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 557                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 558            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 559            plt.xlim(-0.5, xmax)
 560            plt.draw()
 561            if save:
 562                fig.savefig(save + "_" + str(e))
 563
 564    def plot_rep_dist(self):
 565        """Plot replica distribution for each ensemble with more than one replicum."""
 566        if not hasattr(self, 'e_dvalue'):
 567            raise Exception('Run the gamma method first.')
 568        for e, e_name in enumerate(self.mc_names):
 569            if len(self.e_content[e_name]) == 1:
 570                print('No replica distribution for a single replicum (', e_name, ')')
 571                continue
 572            r_length = []
 573            sub_r_mean = 0
 574            for r, r_name in enumerate(self.e_content[e_name]):
 575                r_length.append(len(self.deltas[r_name]))
 576                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 577            e_N = np.sum(r_length)
 578            sub_r_mean /= e_N
 579            arr = np.zeros(len(self.e_content[e_name]))
 580            for r, r_name in enumerate(self.e_content[e_name]):
 581                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 582            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 583            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 584            plt.draw()
 585
 586    def plot_history(self, expand=True):
 587        """Plot derived Monte Carlo history for each ensemble
 588
 589        Parameters
 590        ----------
 591        expand : bool
 592            show expanded history for irregular Monte Carlo chains (default: True).
 593        """
 594        for e, e_name in enumerate(self.mc_names):
 595            plt.figure()
 596            r_length = []
 597            tmp = []
 598            tmp_expanded = []
 599            for r, r_name in enumerate(self.e_content[e_name]):
 600                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 601                if expand:
 602                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 603                    r_length.append(len(tmp_expanded[-1]))
 604                else:
 605                    r_length.append(len(tmp[-1]))
 606            e_N = np.sum(r_length)
 607            x = np.arange(e_N)
 608            y_test = np.concatenate(tmp, axis=0)
 609            if expand:
 610                y = np.concatenate(tmp_expanded, axis=0)
 611            else:
 612                y = y_test
 613            plt.errorbar(x, y, fmt='.', markersize=3)
 614            plt.xlim(-0.5, e_N - 0.5)
 615            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 616            plt.draw()
 617
 618    def plot_piechart(self, save=None):
 619        """Plot piechart which shows the fractional contribution of each
 620        ensemble to the error and returns a dictionary containing the fractions.
 621
 622        Parameters
 623        ----------
 624        save : str
 625            saves the figure to a file named 'save' if.
 626        """
 627        if not hasattr(self, 'e_dvalue'):
 628            raise Exception('Run the gamma method first.')
 629        if np.isclose(0.0, self._dvalue, atol=1e-15):
 630            raise Exception('Error is 0.0')
 631        labels = self.e_names
 632        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 633        fig1, ax1 = plt.subplots()
 634        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 635        ax1.axis('equal')
 636        plt.draw()
 637        if save:
 638            fig1.savefig(save)
 639
 640        return dict(zip(self.e_names, sizes))
 641
 642    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 643        """Dump the Obs to a file 'name' of chosen format.
 644
 645        Parameters
 646        ----------
 647        filename : str
 648            name of the file to be saved.
 649        datatype : str
 650            Format of the exported file. Supported formats include
 651            "json.gz" and "pickle"
 652        description : str
 653            Description for output file, only relevant for json.gz format.
 654        path : str
 655            specifies a custom path for the file (default '.')
 656        """
 657        if 'path' in kwargs:
 658            file_name = kwargs.get('path') + '/' + filename
 659        else:
 660            file_name = filename
 661
 662        if datatype == "json.gz":
 663            from .input.json import dump_to_json
 664            dump_to_json([self], file_name, description=description)
 665        elif datatype == "pickle":
 666            with open(file_name + '.p', 'wb') as fb:
 667                pickle.dump(self, fb)
 668        else:
 669            raise Exception("Unknown datatype " + str(datatype))
 670
 671    def export_jackknife(self):
 672        """Export jackknife samples from the Obs
 673
 674        Returns
 675        -------
 676        numpy.ndarray
 677            Returns a numpy array of length N + 1 where N is the number of samples
 678            for the given ensemble and replicum. The zeroth entry of the array contains
 679            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 680            derived from the Obs. The current implementation only works for observables
 681            defined on exactly one ensemble and replicum. The derived jackknife samples
 682            should agree with samples from a full jackknife analysis up to O(1/N).
 683        """
 684
 685        if len(self.names) != 1:
 686            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 687
 688        name = self.names[0]
 689        full_data = self.deltas[name] + self.r_values[name]
 690        n = full_data.size
 691        mean = self.value
 692        tmp_jacks = np.zeros(n + 1)
 693        tmp_jacks[0] = mean
 694        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 695        return tmp_jacks
 696
 697    def __float__(self):
 698        return float(self.value)
 699
 700    def __repr__(self):
 701        return 'Obs[' + str(self) + ']'
 702
 703    def __str__(self):
 704        return _format_uncertainty(self.value, self._dvalue)
 705
 706    def __hash__(self):
 707        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 708        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 709        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 710        hash_tuple += tuple([o.encode() for o in self.names])
 711        m = hashlib.md5()
 712        [m.update(o) for o in hash_tuple]
 713        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 714
 715    # Overload comparisons
 716    def __lt__(self, other):
 717        return self.value < other
 718
 719    def __le__(self, other):
 720        return self.value <= other
 721
 722    def __gt__(self, other):
 723        return self.value > other
 724
 725    def __ge__(self, other):
 726        return self.value >= other
 727
 728    def __eq__(self, other):
 729        return (self - other).is_zero()
 730
 731    def __ne__(self, other):
 732        return not (self - other).is_zero()
 733
 734    # Overload math operations
 735    def __add__(self, y):
 736        if isinstance(y, Obs):
 737            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 738        else:
 739            if isinstance(y, np.ndarray):
 740                return np.array([self + o for o in y])
 741            elif y.__class__.__name__ in ['Corr', 'CObs']:
 742                return NotImplemented
 743            else:
 744                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 745
 746    def __radd__(self, y):
 747        return self + y
 748
 749    def __mul__(self, y):
 750        if isinstance(y, Obs):
 751            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 752        else:
 753            if isinstance(y, np.ndarray):
 754                return np.array([self * o for o in y])
 755            elif isinstance(y, complex):
 756                return CObs(self * y.real, self * y.imag)
 757            elif y.__class__.__name__ in ['Corr', 'CObs']:
 758                return NotImplemented
 759            else:
 760                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 761
 762    def __rmul__(self, y):
 763        return self * y
 764
 765    def __sub__(self, y):
 766        if isinstance(y, Obs):
 767            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 768        else:
 769            if isinstance(y, np.ndarray):
 770                return np.array([self - o for o in y])
 771            elif y.__class__.__name__ in ['Corr', 'CObs']:
 772                return NotImplemented
 773            else:
 774                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 775
 776    def __rsub__(self, y):
 777        return -1 * (self - y)
 778
 779    def __pos__(self):
 780        return self
 781
 782    def __neg__(self):
 783        return -1 * self
 784
 785    def __truediv__(self, y):
 786        if isinstance(y, Obs):
 787            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 788        else:
 789            if isinstance(y, np.ndarray):
 790                return np.array([self / o for o in y])
 791            elif y.__class__.__name__ in ['Corr', 'CObs']:
 792                return NotImplemented
 793            else:
 794                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 795
 796    def __rtruediv__(self, y):
 797        if isinstance(y, Obs):
 798            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 799        else:
 800            if isinstance(y, np.ndarray):
 801                return np.array([o / self for o in y])
 802            elif y.__class__.__name__ in ['Corr', 'CObs']:
 803                return NotImplemented
 804            else:
 805                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 806
 807    def __pow__(self, y):
 808        if isinstance(y, Obs):
 809            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 810        else:
 811            return derived_observable(lambda x: x[0] ** y, [self])
 812
 813    def __rpow__(self, y):
 814        if isinstance(y, Obs):
 815            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 816        else:
 817            return derived_observable(lambda x: y ** x[0], [self])
 818
 819    def __abs__(self):
 820        return derived_observable(lambda x: anp.abs(x[0]), [self])
 821
 822    # Overload numpy functions
 823    def sqrt(self):
 824        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 825
 826    def log(self):
 827        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 828
 829    def exp(self):
 830        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 831
 832    def sin(self):
 833        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 834
 835    def cos(self):
 836        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 837
 838    def tan(self):
 839        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 840
 841    def arcsin(self):
 842        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 843
 844    def arccos(self):
 845        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 846
 847    def arctan(self):
 848        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 849
 850    def sinh(self):
 851        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 852
 853    def cosh(self):
 854        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 855
 856    def tanh(self):
 857        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 858
 859    def arcsinh(self):
 860        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 861
 862    def arccosh(self):
 863        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 864
 865    def arctanh(self):
 866        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 867
 868
 869class CObs:
 870    """Class for a complex valued observable."""
 871    __slots__ = ['_real', '_imag', 'tag']
 872
 873    def __init__(self, real, imag=0.0):
 874        self._real = real
 875        self._imag = imag
 876        self.tag = None
 877
 878    @property
 879    def real(self):
 880        return self._real
 881
 882    @property
 883    def imag(self):
 884        return self._imag
 885
 886    def gamma_method(self, **kwargs):
 887        """Executes the gamma_method for the real and the imaginary part."""
 888        if isinstance(self.real, Obs):
 889            self.real.gamma_method(**kwargs)
 890        if isinstance(self.imag, Obs):
 891            self.imag.gamma_method(**kwargs)
 892
 893    def is_zero(self):
 894        """Checks whether both real and imaginary part are zero within machine precision."""
 895        return self.real == 0.0 and self.imag == 0.0
 896
 897    def conjugate(self):
 898        return CObs(self.real, -self.imag)
 899
 900    def __add__(self, other):
 901        if isinstance(other, np.ndarray):
 902            return other + self
 903        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 904            return CObs(self.real + other.real,
 905                        self.imag + other.imag)
 906        else:
 907            return CObs(self.real + other, self.imag)
 908
 909    def __radd__(self, y):
 910        return self + y
 911
 912    def __sub__(self, other):
 913        if isinstance(other, np.ndarray):
 914            return -1 * (other - self)
 915        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 916            return CObs(self.real - other.real, self.imag - other.imag)
 917        else:
 918            return CObs(self.real - other, self.imag)
 919
 920    def __rsub__(self, other):
 921        return -1 * (self - other)
 922
 923    def __mul__(self, other):
 924        if isinstance(other, np.ndarray):
 925            return other * self
 926        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 927            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 928                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 929                                               [self.real, other.real, self.imag, other.imag],
 930                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 931                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 932                                               [self.real, other.real, self.imag, other.imag],
 933                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 934            elif getattr(other, 'imag', 0) != 0:
 935                return CObs(self.real * other.real - self.imag * other.imag,
 936                            self.imag * other.real + self.real * other.imag)
 937            else:
 938                return CObs(self.real * other.real, self.imag * other.real)
 939        else:
 940            return CObs(self.real * other, self.imag * other)
 941
 942    def __rmul__(self, other):
 943        return self * other
 944
 945    def __truediv__(self, other):
 946        if isinstance(other, np.ndarray):
 947            return 1 / (other / self)
 948        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 949            r = other.real ** 2 + other.imag ** 2
 950            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 951        else:
 952            return CObs(self.real / other, self.imag / other)
 953
 954    def __rtruediv__(self, other):
 955        r = self.real ** 2 + self.imag ** 2
 956        if hasattr(other, 'real') and hasattr(other, 'imag'):
 957            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 958        else:
 959            return CObs(self.real * other / r, -self.imag * other / r)
 960
 961    def __abs__(self):
 962        return np.sqrt(self.real**2 + self.imag**2)
 963
 964    def __pos__(self):
 965        return self
 966
 967    def __neg__(self):
 968        return -1 * self
 969
 970    def __eq__(self, other):
 971        return self.real == other.real and self.imag == other.imag
 972
 973    def __str__(self):
 974        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 975
 976    def __repr__(self):
 977        return 'CObs[' + str(self) + ']'
 978
 979
 980def _format_uncertainty(value, dvalue):
 981    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 982    if dvalue == 0.0:
 983        return str(value)
 984    fexp = np.floor(np.log10(dvalue))
 985    if fexp < 0.0:
 986        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 987    elif fexp == 0.0:
 988        return '{:.1f}({:1.1f})'.format(value, dvalue)
 989    else:
 990        return '{:.0f}({:2.0f})'.format(value, dvalue)
 991
 992
 993def _expand_deltas(deltas, idx, shape):
 994    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 995       If idx is of type range, the deltas are not changed
 996
 997    Parameters
 998    ----------
 999    deltas : list
1000        List of fluctuations
1001    idx : list
1002        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1003    shape : int
1004        Number of configs in idx.
1005    """
1006    if isinstance(idx, range):
1007        return deltas
1008    else:
1009        ret = np.zeros(idx[-1] - idx[0] + 1)
1010        for i in range(shape):
1011            ret[idx[i] - idx[0]] = deltas[i]
1012        return ret
1013
1014
1015def _merge_idx(idl):
1016    """Returns the union of all lists in idl as sorted list
1017
1018    Parameters
1019    ----------
1020    idl : list
1021        List of lists or ranges.
1022    """
1023
1024    # Use groupby to efficiently check whether all elements of idl are identical
1025    try:
1026        g = groupby(idl)
1027        if next(g, True) and not next(g, False):
1028            return idl[0]
1029    except Exception:
1030        pass
1031
1032    if np.all([type(idx) is range for idx in idl]):
1033        if len(set([idx[0] for idx in idl])) == 1:
1034            idstart = min([idx.start for idx in idl])
1035            idstop = max([idx.stop for idx in idl])
1036            idstep = min([idx.step for idx in idl])
1037            return range(idstart, idstop, idstep)
1038
1039    return sorted(set().union(*idl))
1040
1041
1042def _intersection_idx(idl):
1043    """Returns the intersection of all lists in idl as sorted list
1044
1045    Parameters
1046    ----------
1047    idl : list
1048        List of lists or ranges.
1049    """
1050
1051    def _lcm(*args):
1052        """Returns the lowest common multiple of args.
1053
1054        From python 3.9 onwards the math library contains an lcm function."""
1055        return reduce(lambda a, b: a * b // gcd(a, b), args)
1056
1057    # Use groupby to efficiently check whether all elements of idl are identical
1058    try:
1059        g = groupby(idl)
1060        if next(g, True) and not next(g, False):
1061            return idl[0]
1062    except Exception:
1063        pass
1064
1065    if np.all([type(idx) is range for idx in idl]):
1066        if len(set([idx[0] for idx in idl])) == 1:
1067            idstart = max([idx.start for idx in idl])
1068            idstop = min([idx.stop for idx in idl])
1069            idstep = _lcm(*[idx.step for idx in idl])
1070            return range(idstart, idstop, idstep)
1071
1072    return sorted(set.intersection(*[set(o) for o in idl]))
1073
1074
1075def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1076    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1077       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1078       common divisor of the step sizes is used as new step size.
1079
1080    Parameters
1081    ----------
1082    deltas : list
1083        List of fluctuations
1084    idx : list
1085        List or range of configs on which the deltas are defined.
1086        Has to be a subset of new_idx and has to be sorted in ascending order.
1087    shape : list
1088        Number of configs in idx.
1089    new_idx : list
1090        List of configs that defines the new range, has to be sorted in ascending order.
1091    """
1092
1093    if type(idx) is range and type(new_idx) is range:
1094        if idx == new_idx:
1095            return deltas
1096    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1097    for i in range(shape):
1098        ret[idx[i] - new_idx[0]] = deltas[i]
1099    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)
1100
1101
1102def derived_observable(func, data, array_mode=False, **kwargs):
1103    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1104
1105    Parameters
1106    ----------
1107    func : object
1108        arbitrary function of the form func(data, **kwargs). For the
1109        automatic differentiation to work, all numpy functions have to have
1110        the autograd wrapper (use 'import autograd.numpy as anp').
1111    data : list
1112        list of Obs, e.g. [obs1, obs2, obs3].
1113    num_grad : bool
1114        if True, numerical derivatives are used instead of autograd
1115        (default False). To control the numerical differentiation the
1116        kwargs of numdifftools.step_generators.MaxStepGenerator
1117        can be used.
1118    man_grad : list
1119        manually supply a list or an array which contains the jacobian
1120        of func. Use cautiously, supplying the wrong derivative will
1121        not be intercepted.
1122
1123    Notes
1124    -----
1125    For simple mathematical operations it can be practical to use anonymous
1126    functions. For the ratio of two observables one can e.g. use
1127
1128    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1129    """
1130
1131    data = np.asarray(data)
1132    raveled_data = data.ravel()
1133
1134    # Workaround for matrix operations containing non Obs data
1135    if not all(isinstance(x, Obs) for x in raveled_data):
1136        for i in range(len(raveled_data)):
1137            if isinstance(raveled_data[i], (int, float)):
1138                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1139
1140    allcov = {}
1141    for o in raveled_data:
1142        for name in o.cov_names:
1143            if name in allcov:
1144                if not np.allclose(allcov[name], o.covobs[name].cov):
1145                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1146            else:
1147                allcov[name] = o.covobs[name].cov
1148
1149    n_obs = len(raveled_data)
1150    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1151    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1152    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1153
1154    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1155
1156    if data.ndim == 1:
1157        values = np.array([o.value for o in data])
1158    else:
1159        values = np.vectorize(lambda x: x.value)(data)
1160
1161    new_values = func(values, **kwargs)
1162
1163    multi = int(isinstance(new_values, np.ndarray))
1164
1165    new_r_values = {}
1166    new_idl_d = {}
1167    for name in new_sample_names:
1168        idl = []
1169        tmp_values = np.zeros(n_obs)
1170        for i, item in enumerate(raveled_data):
1171            tmp_values[i] = item.r_values.get(name, item.value)
1172            tmp_idl = item.idl.get(name)
1173            if tmp_idl is not None:
1174                idl.append(tmp_idl)
1175        if multi > 0:
1176            tmp_values = np.array(tmp_values).reshape(data.shape)
1177        new_r_values[name] = func(tmp_values, **kwargs)
1178        new_idl_d[name] = _merge_idx(idl)
1179
1180    if 'man_grad' in kwargs:
1181        deriv = np.asarray(kwargs.get('man_grad'))
1182        if new_values.shape + data.shape != deriv.shape:
1183            raise Exception('Manual derivative does not have correct shape.')
1184    elif kwargs.get('num_grad') is True:
1185        if multi > 0:
1186            raise Exception('Multi mode currently not supported for numerical derivative')
1187        options = {
1188            'base_step': 0.1,
1189            'step_ratio': 2.5}
1190        for key in options.keys():
1191            kwarg = kwargs.get(key)
1192            if kwarg is not None:
1193                options[key] = kwarg
1194        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1195        if tmp_df.size == 1:
1196            deriv = np.array([tmp_df.real])
1197        else:
1198            deriv = tmp_df.real
1199    else:
1200        deriv = jacobian(func)(values, **kwargs)
1201
1202    final_result = np.zeros(new_values.shape, dtype=object)
1203
1204    if array_mode is True:
1205
1206        class _Zero_grad():
1207            def __init__(self, N):
1208                self.grad = np.zeros((N, 1))
1209
1210        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1211        d_extracted = {}
1212        g_extracted = {}
1213        for name in new_sample_names:
1214            d_extracted[name] = []
1215            ens_length = len(new_idl_d[name])
1216            for i_dat, dat in enumerate(data):
1217                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1218        for name in new_cov_names:
1219            g_extracted[name] = []
1220            zero_grad = _Zero_grad(new_covobs_lengths[name])
1221            for i_dat, dat in enumerate(data):
1222                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1223
1224    for i_val, new_val in np.ndenumerate(new_values):
1225        new_deltas = {}
1226        new_grad = {}
1227        if array_mode is True:
1228            for name in new_sample_names:
1229                ens_length = d_extracted[name][0].shape[-1]
1230                new_deltas[name] = np.zeros(ens_length)
1231                for i_dat, dat in enumerate(d_extracted[name]):
1232                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1233            for name in new_cov_names:
1234                new_grad[name] = 0
1235                for i_dat, dat in enumerate(g_extracted[name]):
1236                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1237        else:
1238            for j_obs, obs in np.ndenumerate(data):
1239                for name in obs.names:
1240                    if name in obs.cov_names:
1241                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1242                    else:
1243                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1244
1245        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1246
1247        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1248            raise Exception('The same name has been used for deltas and covobs!')
1249        new_samples = []
1250        new_means = []
1251        new_idl = []
1252        new_names_obs = []
1253        for name in new_names:
1254            if name not in new_covobs:
1255                new_samples.append(new_deltas[name])
1256                new_idl.append(new_idl_d[name])
1257                new_means.append(new_r_values[name][i_val])
1258                new_names_obs.append(name)
1259        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1260        for name in new_covobs:
1261            final_result[i_val].names.append(name)
1262        final_result[i_val]._covobs = new_covobs
1263        final_result[i_val]._value = new_val
1264        final_result[i_val].reweighted = reweighted
1265
1266    if multi == 0:
1267        final_result = final_result.item()
1268
1269    return final_result
1270
1271
1272def _reduce_deltas(deltas, idx_old, idx_new):
1273    """Extract deltas defined on idx_old on all configs of idx_new.
1274
1275    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1276    are ordered in an ascending order.
1277
1278    Parameters
1279    ----------
1280    deltas : list
1281        List of fluctuations
1282    idx_old : list
1283        List or range of configs on which the deltas are defined
1284    idx_new : list
1285        List of configs for which we want to extract the deltas.
1286        Has to be a subset of idx_old.
1287    """
1288    if not len(deltas) == len(idx_old):
1289        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1290    if type(idx_old) is range and type(idx_new) is range:
1291        if idx_old == idx_new:
1292            return deltas
1293    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1294    try:
1295        g = groupby([idx_old, idx_new])
1296        if next(g, True) and not next(g, False):
1297            return deltas
1298    except Exception:
1299        pass
1300    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1301    if len(indices) < len(idx_new):
1302        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1303    return np.array(deltas)[indices]
1304
1305
1306def reweight(weight, obs, **kwargs):
1307    """Reweight a list of observables.
1308
1309    Parameters
1310    ----------
1311    weight : Obs
1312        Reweighting factor. An Observable that has to be defined on a superset of the
1313        configurations in obs[i].idl for all i.
1314    obs : list
1315        list of Obs, e.g. [obs1, obs2, obs3].
1316    all_configs : bool
1317        if True, the reweighted observables are normalized by the average of
1318        the reweighting factor on all configurations in weight.idl and not
1319        on the configurations in obs[i].idl. Default False.
1320    """
1321    result = []
1322    for i in range(len(obs)):
1323        if len(obs[i].cov_names):
1324            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1325        if not set(obs[i].names).issubset(weight.names):
1326            raise Exception('Error: Ensembles do not fit')
1327        for name in obs[i].names:
1328            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1329                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1330        new_samples = []
1331        w_deltas = {}
1332        for name in sorted(obs[i].names):
1333            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1334            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1335        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1336
1337        if kwargs.get('all_configs'):
1338            new_weight = weight
1339        else:
1340            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1341
1342        result.append(tmp_obs / new_weight)
1343        result[-1].reweighted = True
1344
1345    return result
1346
1347
1348def correlate(obs_a, obs_b):
1349    """Correlate two observables.
1350
1351    Parameters
1352    ----------
1353    obs_a : Obs
1354        First observable
1355    obs_b : Obs
1356        Second observable
1357
1358    Notes
1359    -----
1360    Keep in mind to only correlate primary observables which have not been reweighted
1361    yet. The reweighting has to be applied after correlating the observables.
1362    Currently only works if ensembles are identical (this is not strictly necessary).
1363    """
1364
1365    if sorted(obs_a.names) != sorted(obs_b.names):
1366        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1367    if len(obs_a.cov_names) or len(obs_b.cov_names):
1368        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1369    for name in obs_a.names:
1370        if obs_a.shape[name] != obs_b.shape[name]:
1371            raise Exception('Shapes of ensemble', name, 'do not fit')
1372        if obs_a.idl[name] != obs_b.idl[name]:
1373            raise Exception('idl of ensemble', name, 'do not fit')
1374
1375    if obs_a.reweighted is True:
1376        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1377    if obs_b.reweighted is True:
1378        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1379
1380    new_samples = []
1381    new_idl = []
1382    for name in sorted(obs_a.names):
1383        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1384        new_idl.append(obs_a.idl[name])
1385
1386    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1387    o.reweighted = obs_a.reweighted or obs_b.reweighted
1388    return o
1389
1390
1391def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1392    r'''Calculates the error covariance matrix of a set of observables.
1393
1394    WARNING: This function should be used with care, especially for observables with support on multiple
1395             ensembles with differing autocorrelations. See the notes below for details.
1396
1397    The gamma method has to be applied first to all observables.
1398
1399    Parameters
1400    ----------
1401    obs : list or numpy.ndarray
1402        List or one dimensional array of Obs
1403    visualize : bool
1404        If True plots the corresponding normalized correlation matrix (default False).
1405    correlation : bool
1406        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1407    smooth : None or int
1408        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1409        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1410        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1411        small ones.
1412
1413    Notes
1414    -----
1415    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1416    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1417    in the absence of autocorrelation.
1418    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1419    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1420    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1421    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1422    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1423    '''
1424
1425    length = len(obs)
1426
1427    max_samples = np.max([o.N for o in obs])
1428    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1429        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1430
1431    cov = np.zeros((length, length))
1432    for i in range(length):
1433        for j in range(i, length):
1434            cov[i, j] = _covariance_element(obs[i], obs[j])
1435    cov = cov + cov.T - np.diag(np.diag(cov))
1436
1437    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1438
1439    if isinstance(smooth, int):
1440        corr = _smooth_eigenvalues(corr, smooth)
1441
1442    if visualize:
1443        plt.matshow(corr, vmin=-1, vmax=1)
1444        plt.set_cmap('RdBu')
1445        plt.colorbar()
1446        plt.draw()
1447
1448    if correlation is True:
1449        return corr
1450
1451    errors = [o.dvalue for o in obs]
1452    cov = np.diag(errors) @ corr @ np.diag(errors)
1453
1454    eigenvalues = np.linalg.eigh(cov)[0]
1455    if not np.all(eigenvalues >= 0):
1456        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1457
1458    return cov
1459
1460
1461def _smooth_eigenvalues(corr, E):
1462    """Eigenvalue smoothing as described in hep-lat/9412087
1463
1464    corr : np.ndarray
1465        correlation matrix
1466    E : integer
1467        Number of eigenvalues to be left substantially unchanged
1468    """
1469    if not (2 < E < corr.shape[0] - 1):
1470        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1471    vals, vec = np.linalg.eigh(corr)
1472    lambda_min = np.mean(vals[:-E])
1473    vals[vals < lambda_min] = lambda_min
1474    vals /= np.mean(vals)
1475    return vec @ np.diag(vals) @ vec.T
1476
1477
1478def _covariance_element(obs1, obs2):
1479    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1480
1481    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1482        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1483        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1484        return np.sum(deltas1 * deltas2)
1485
1486    if set(obs1.names).isdisjoint(set(obs2.names)):
1487        return 0.0
1488
1489    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1490        raise Exception('The gamma method has to be applied to both Obs first.')
1491
1492    dvalue = 0.0
1493
1494    for e_name in obs1.mc_names:
1495
1496        if e_name not in obs2.mc_names:
1497            continue
1498
1499        idl_d = {}
1500        for r_name in obs1.e_content[e_name]:
1501            if r_name not in obs2.e_content[e_name]:
1502                continue
1503            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1504
1505        gamma = 0.0
1506
1507        for r_name in obs1.e_content[e_name]:
1508            if r_name not in obs2.e_content[e_name]:
1509                continue
1510            if len(idl_d[r_name]) == 0:
1511                continue
1512            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1513
1514        if gamma == 0.0:
1515            continue
1516
1517        gamma_div = 0.0
1518        for r_name in obs1.e_content[e_name]:
1519            if r_name not in obs2.e_content[e_name]:
1520                continue
1521            if len(idl_d[r_name]) == 0:
1522                continue
1523            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1524        gamma /= gamma_div
1525
1526        dvalue += gamma
1527
1528    for e_name in obs1.cov_names:
1529
1530        if e_name not in obs2.cov_names:
1531            continue
1532
1533        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1534
1535    return dvalue
1536
1537
1538def import_jackknife(jacks, name, idl=None):
1539    """Imports jackknife samples and returns an Obs
1540
1541    Parameters
1542    ----------
1543    jacks : numpy.ndarray
1544        numpy array containing the mean value as zeroth entry and
1545        the N jackknife samples as first to Nth entry.
1546    name : str
1547        name of the ensemble the samples are defined on.
1548    """
1549    length = len(jacks) - 1
1550    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1551    samples = jacks[1:] @ prj
1552    mean = np.mean(samples)
1553    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1554    new_obs._value = jacks[0]
1555    return new_obs
1556
1557
1558def merge_obs(list_of_obs):
1559    """Combine all observables in list_of_obs into one new observable
1560
1561    Parameters
1562    ----------
1563    list_of_obs : list
1564        list of the Obs object to be combined
1565
1566    Notes
1567    -----
1568    It is not possible to combine obs which are based on the same replicum
1569    """
1570    replist = [item for obs in list_of_obs for item in obs.names]
1571    if (len(replist) == len(set(replist))) is False:
1572        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1573    if any([len(o.cov_names) for o in list_of_obs]):
1574        raise Exception('Not possible to merge data that contains covobs!')
1575    new_dict = {}
1576    idl_dict = {}
1577    for o in list_of_obs:
1578        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1579                        for key in set(o.deltas) | set(o.r_values)})
1580        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1581
1582    names = sorted(new_dict.keys())
1583    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1584    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1585    return o
1586
1587
1588def cov_Obs(means, cov, name, grad=None):
1589    """Create an Obs based on mean(s) and a covariance matrix
1590
1591    Parameters
1592    ----------
1593    mean : list of floats or float
1594        N mean value(s) of the new Obs
1595    cov : list or array
1596        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1597    name : str
1598        identifier for the covariance matrix
1599    grad : list or array
1600        Gradient of the Covobs wrt. the means belonging to cov.
1601    """
1602
1603    def covobs_to_obs(co):
1604        """Make an Obs out of a Covobs
1605
1606        Parameters
1607        ----------
1608        co : Covobs
1609            Covobs to be embedded into the Obs
1610        """
1611        o = Obs([], [], means=[])
1612        o._value = co.value
1613        o.names.append(co.name)
1614        o._covobs[co.name] = co
1615        o._dvalue = np.sqrt(co.errsq())
1616        return o
1617
1618    ol = []
1619    if isinstance(means, (float, int)):
1620        means = [means]
1621
1622    for i in range(len(means)):
1623        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1624    if ol[0].covobs[name].N != len(means):
1625        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1626    if len(ol) == 1:
1627        return ol[0]
1628    return ol
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise Exception("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise Exception('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None
142
143    @property
144    def value(self):
145        return self._value
146
147    @property
148    def dvalue(self):
149        return self._dvalue
150
151    @property
152    def e_names(self):
153        return sorted(set([o.split('|')[0] for o in self.names]))
154
155    @property
156    def cov_names(self):
157        return sorted(set([o for o in self.covobs.keys()]))
158
159    @property
160    def mc_names(self):
161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
162
163    @property
164    def e_content(self):
165        res = {}
166        for e, e_name in enumerate(self.e_names):
167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
168            if e_name in self.names:
169                res[e_name].append(e_name)
170        return res
171
172    @property
173    def covobs(self):
174        return self._covobs
175
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            if self.tau_exp[e_name] > 0:
297                _compute_drho(gapsize)
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if g_w[n - 1] < 0 or n >= w_max - 1:
326                            _compute_drho(gapsize * n)
327                            n *= gapsize
328                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
329                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
330                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
331                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
332                            self.e_windowsize[e_name] = n
333                            break
334
335            self._dvalue += self.e_dvalue[e_name] ** 2
336            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
337
338        for e_name in self.cov_names:
339            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
340            self.e_ddvalue[e_name] = 0
341            self._dvalue += self.e_dvalue[e_name]**2
342
343        self._dvalue = np.sqrt(self._dvalue)
344        if self._dvalue == 0.0:
345            self.ddvalue = 0.0
346        else:
347            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
348        return
349
350    gm = gamma_method
351
352    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
353        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
354           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
355
356        Parameters
357        ----------
358        deltas : list
359            List of fluctuations
360        idx : list
361            List or range of configurations on which the deltas are defined.
362        shape : int
363            Number of configurations in idx.
364        w_max : int
365            Upper bound for the summation window.
366        fft : bool
367            determines whether the fft algorithm is used for the computation
368            of the autocorrelation function.
369        """
370        gamma = np.zeros(w_max)
371        deltas = _expand_deltas(deltas, idx, shape)
372        new_shape = len(deltas)
373        if fft:
374            max_gamma = min(new_shape, w_max)
375            # The padding for the fft has to be even
376            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
377            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
378        else:
379            for n in range(w_max):
380                if new_shape - n >= 0:
381                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
382
383        return gamma
384
385    def details(self, ens_content=True):
386        """Output detailed properties of the Obs.
387
388        Parameters
389        ----------
390        ens_content : bool
391            print details about the ensembles and replica if true.
392        """
393        if self.tag is not None:
394            print("Description:", self.tag)
395        if not hasattr(self, 'e_dvalue'):
396            print('Result\t %3.8e' % (self.value))
397        else:
398            if self.value == 0.0:
399                percentage = np.nan
400            else:
401                percentage = np.abs(self._dvalue / self.value) * 100
402            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
403            if len(self.e_names) > 1:
404                print(' Ensemble errors:')
405            e_content = self.e_content
406            for e_name in self.mc_names:
407                if isinstance(self.idl[e_content[e_name][0]], range):
408                    gap = self.idl[e_content[e_name][0]].step
409                else:
410                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
411
412                if len(self.e_names) > 1:
413                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
414                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
415                tau_string += f" in units of {gap} config"
416                if gap > 1:
417                    tau_string += "s"
418                if self.tau_exp[e_name] > 0:
419                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
420                else:
421                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
422                print(tau_string)
423            for e_name in self.cov_names:
424                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
425        if ens_content is True:
426            if len(self.e_names) == 1:
427                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
428            else:
429                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
430            my_string_list = []
431            for key, value in sorted(self.e_content.items()):
432                if key not in self.covobs:
433                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
434                    if len(value) == 1:
435                        my_string += f': {self.shape[value[0]]} configurations'
436                        if isinstance(self.idl[value[0]], range):
437                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
438                        else:
439                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
440                    else:
441                        sublist = []
442                        for v in value:
443                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
444                            my_substring += f': {self.shape[v]} configurations'
445                            if isinstance(self.idl[v], range):
446                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
447                            else:
448                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
449                            sublist.append(my_substring)
450
451                        my_string += '\n' + '\n'.join(sublist)
452                else:
453                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
454                my_string_list.append(my_string)
455            print('\n'.join(my_string_list))
456
457    def reweight(self, weight):
458        """Reweight the obs with given rewighting factors.
459
460        Parameters
461        ----------
462        weight : Obs
463            Reweighting factor. An Observable that has to be defined on a superset of the
464            configurations in obs[i].idl for all i.
465        all_configs : bool
466            if True, the reweighted observables are normalized by the average of
467            the reweighting factor on all configurations in weight.idl and not
468            on the configurations in obs[i].idl. Default False.
469        """
470        return reweight(weight, [self])[0]
471
472    def is_zero_within_error(self, sigma=1):
473        """Checks whether the observable is zero within 'sigma' standard errors.
474
475        Parameters
476        ----------
477        sigma : int
478            Number of standard errors used for the check.
479
480        Works only properly when the gamma method was run.
481        """
482        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
483
484    def is_zero(self, atol=1e-10):
485        """Checks whether the observable is zero within a given tolerance.
486
487        Parameters
488        ----------
489        atol : float
490            Absolute tolerance (for details see numpy documentation).
491        """
492        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
493
494    def plot_tauint(self, save=None):
495        """Plot integrated autocorrelation time for each ensemble.
496
497        Parameters
498        ----------
499        save : str
500            saves the figure to a file named 'save' if.
501        """
502        if not hasattr(self, 'e_dvalue'):
503            raise Exception('Run the gamma method first.')
504
505        for e, e_name in enumerate(self.mc_names):
506            fig = plt.figure()
507            plt.xlabel(r'$W$')
508            plt.ylabel(r'$\tau_\mathrm{int}$')
509            length = int(len(self.e_n_tauint[e_name]))
510            if self.tau_exp[e_name] > 0:
511                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
512                x_help = np.arange(2 * self.tau_exp[e_name])
513                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
514                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
515                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
516                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
517                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
518                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
519                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
520            else:
521                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
522                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
523
524            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
525            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
526            plt.legend()
527            plt.xlim(-0.5, xmax)
528            ylim = plt.ylim()
529            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
530            plt.draw()
531            if save:
532                fig.savefig(save + "_" + str(e))
533
534    def plot_rho(self, save=None):
535        """Plot normalized autocorrelation function time for each ensemble.
536
537        Parameters
538        ----------
539        save : str
540            saves the figure to a file named 'save' if.
541        """
542        if not hasattr(self, 'e_dvalue'):
543            raise Exception('Run the gamma method first.')
544        for e, e_name in enumerate(self.mc_names):
545            fig = plt.figure()
546            plt.xlabel('W')
547            plt.ylabel('rho')
548            length = int(len(self.e_drho[e_name]))
549            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
550            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
551            if self.tau_exp[e_name] > 0:
552                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
553                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
554                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
555                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
556            else:
557                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
558                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
559            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
560            plt.xlim(-0.5, xmax)
561            plt.draw()
562            if save:
563                fig.savefig(save + "_" + str(e))
564
565    def plot_rep_dist(self):
566        """Plot replica distribution for each ensemble with more than one replicum."""
567        if not hasattr(self, 'e_dvalue'):
568            raise Exception('Run the gamma method first.')
569        for e, e_name in enumerate(self.mc_names):
570            if len(self.e_content[e_name]) == 1:
571                print('No replica distribution for a single replicum (', e_name, ')')
572                continue
573            r_length = []
574            sub_r_mean = 0
575            for r, r_name in enumerate(self.e_content[e_name]):
576                r_length.append(len(self.deltas[r_name]))
577                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
578            e_N = np.sum(r_length)
579            sub_r_mean /= e_N
580            arr = np.zeros(len(self.e_content[e_name]))
581            for r, r_name in enumerate(self.e_content[e_name]):
582                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
583            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
584            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
585            plt.draw()
586
587    def plot_history(self, expand=True):
588        """Plot derived Monte Carlo history for each ensemble
589
590        Parameters
591        ----------
592        expand : bool
593            show expanded history for irregular Monte Carlo chains (default: True).
594        """
595        for e, e_name in enumerate(self.mc_names):
596            plt.figure()
597            r_length = []
598            tmp = []
599            tmp_expanded = []
600            for r, r_name in enumerate(self.e_content[e_name]):
601                tmp.append(self.deltas[r_name] + self.r_values[r_name])
602                if expand:
603                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
604                    r_length.append(len(tmp_expanded[-1]))
605                else:
606                    r_length.append(len(tmp[-1]))
607            e_N = np.sum(r_length)
608            x = np.arange(e_N)
609            y_test = np.concatenate(tmp, axis=0)
610            if expand:
611                y = np.concatenate(tmp_expanded, axis=0)
612            else:
613                y = y_test
614            plt.errorbar(x, y, fmt='.', markersize=3)
615            plt.xlim(-0.5, e_N - 0.5)
616            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
617            plt.draw()
618
619    def plot_piechart(self, save=None):
620        """Plot piechart which shows the fractional contribution of each
621        ensemble to the error and returns a dictionary containing the fractions.
622
623        Parameters
624        ----------
625        save : str
626            saves the figure to a file named 'save' if.
627        """
628        if not hasattr(self, 'e_dvalue'):
629            raise Exception('Run the gamma method first.')
630        if np.isclose(0.0, self._dvalue, atol=1e-15):
631            raise Exception('Error is 0.0')
632        labels = self.e_names
633        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
634        fig1, ax1 = plt.subplots()
635        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
636        ax1.axis('equal')
637        plt.draw()
638        if save:
639            fig1.savefig(save)
640
641        return dict(zip(self.e_names, sizes))
642
643    def dump(self, filename, datatype="json.gz", description="", **kwargs):
644        """Dump the Obs to a file 'name' of chosen format.
645
646        Parameters
647        ----------
648        filename : str
649            name of the file to be saved.
650        datatype : str
651            Format of the exported file. Supported formats include
652            "json.gz" and "pickle"
653        description : str
654            Description for output file, only relevant for json.gz format.
655        path : str
656            specifies a custom path for the file (default '.')
657        """
658        if 'path' in kwargs:
659            file_name = kwargs.get('path') + '/' + filename
660        else:
661            file_name = filename
662
663        if datatype == "json.gz":
664            from .input.json import dump_to_json
665            dump_to_json([self], file_name, description=description)
666        elif datatype == "pickle":
667            with open(file_name + '.p', 'wb') as fb:
668                pickle.dump(self, fb)
669        else:
670            raise Exception("Unknown datatype " + str(datatype))
671
672    def export_jackknife(self):
673        """Export jackknife samples from the Obs
674
675        Returns
676        -------
677        numpy.ndarray
678            Returns a numpy array of length N + 1 where N is the number of samples
679            for the given ensemble and replicum. The zeroth entry of the array contains
680            the mean value of the Obs, entries 1 to N contain the N jackknife samples
681            derived from the Obs. The current implementation only works for observables
682            defined on exactly one ensemble and replicum. The derived jackknife samples
683            should agree with samples from a full jackknife analysis up to O(1/N).
684        """
685
686        if len(self.names) != 1:
687            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
688
689        name = self.names[0]
690        full_data = self.deltas[name] + self.r_values[name]
691        n = full_data.size
692        mean = self.value
693        tmp_jacks = np.zeros(n + 1)
694        tmp_jacks[0] = mean
695        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
696        return tmp_jacks
697
698    def __float__(self):
699        return float(self.value)
700
701    def __repr__(self):
702        return 'Obs[' + str(self) + ']'
703
704    def __str__(self):
705        return _format_uncertainty(self.value, self._dvalue)
706
707    def __hash__(self):
708        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
709        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
710        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
711        hash_tuple += tuple([o.encode() for o in self.names])
712        m = hashlib.md5()
713        [m.update(o) for o in hash_tuple]
714        return int(m.hexdigest(), 16) & 0xFFFFFFFF
715
716    # Overload comparisons
717    def __lt__(self, other):
718        return self.value < other
719
720    def __le__(self, other):
721        return self.value <= other
722
723    def __gt__(self, other):
724        return self.value > other
725
726    def __ge__(self, other):
727        return self.value >= other
728
729    def __eq__(self, other):
730        return (self - other).is_zero()
731
732    def __ne__(self, other):
733        return not (self - other).is_zero()
734
735    # Overload math operations
736    def __add__(self, y):
737        if isinstance(y, Obs):
738            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
739        else:
740            if isinstance(y, np.ndarray):
741                return np.array([self + o for o in y])
742            elif y.__class__.__name__ in ['Corr', 'CObs']:
743                return NotImplemented
744            else:
745                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
746
747    def __radd__(self, y):
748        return self + y
749
750    def __mul__(self, y):
751        if isinstance(y, Obs):
752            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
753        else:
754            if isinstance(y, np.ndarray):
755                return np.array([self * o for o in y])
756            elif isinstance(y, complex):
757                return CObs(self * y.real, self * y.imag)
758            elif y.__class__.__name__ in ['Corr', 'CObs']:
759                return NotImplemented
760            else:
761                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
762
763    def __rmul__(self, y):
764        return self * y
765
766    def __sub__(self, y):
767        if isinstance(y, Obs):
768            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
769        else:
770            if isinstance(y, np.ndarray):
771                return np.array([self - o for o in y])
772            elif y.__class__.__name__ in ['Corr', 'CObs']:
773                return NotImplemented
774            else:
775                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
776
777    def __rsub__(self, y):
778        return -1 * (self - y)
779
780    def __pos__(self):
781        return self
782
783    def __neg__(self):
784        return -1 * self
785
786    def __truediv__(self, y):
787        if isinstance(y, Obs):
788            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
789        else:
790            if isinstance(y, np.ndarray):
791                return np.array([self / o for o in y])
792            elif y.__class__.__name__ in ['Corr', 'CObs']:
793                return NotImplemented
794            else:
795                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
796
797    def __rtruediv__(self, y):
798        if isinstance(y, Obs):
799            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
800        else:
801            if isinstance(y, np.ndarray):
802                return np.array([o / self for o in y])
803            elif y.__class__.__name__ in ['Corr', 'CObs']:
804                return NotImplemented
805            else:
806                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
807
808    def __pow__(self, y):
809        if isinstance(y, Obs):
810            return derived_observable(lambda x: x[0] ** x[1], [self, y])
811        else:
812            return derived_observable(lambda x: x[0] ** y, [self])
813
814    def __rpow__(self, y):
815        if isinstance(y, Obs):
816            return derived_observable(lambda x: x[0] ** x[1], [y, self])
817        else:
818            return derived_observable(lambda x: y ** x[0], [self])
819
820    def __abs__(self):
821        return derived_observable(lambda x: anp.abs(x[0]), [self])
822
823    # Overload numpy functions
824    def sqrt(self):
825        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
826
827    def log(self):
828        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
829
830    def exp(self):
831        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
832
833    def sin(self):
834        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
835
836    def cos(self):
837        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
838
839    def tan(self):
840        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
841
842    def arcsin(self):
843        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
844
845    def arccos(self):
846        return derived_observable(lambda x: anp.arccos(x[0]), [self])
847
848    def arctan(self):
849        return derived_observable(lambda x: anp.arctan(x[0]), [self])
850
851    def sinh(self):
852        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
853
854    def cosh(self):
855        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
856
857    def tanh(self):
858        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
859
860    def arcsinh(self):
861        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
862
863    def arccosh(self):
864        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
865
866    def arctanh(self):
867        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.idl = {}
102        if idl is not None:
103            for name, idx in sorted(zip(names, idl)):
104                if isinstance(idx, range):
105                    self.idl[name] = idx
106                elif isinstance(idx, (list, np.ndarray)):
107                    dc = np.unique(np.diff(idx))
108                    if np.any(dc < 0):
109                        raise Exception("Unsorted idx for idl[%s]" % (name))
110                    if len(dc) == 1:
111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
112                    else:
113                        self.idl[name] = list(idx)
114                else:
115                    raise Exception('incompatible type for idl[%s].' % (name))
116        else:
117            for name, sample in sorted(zip(names, samples)):
118                self.idl[name] = range(1, len(sample) + 1)
119
120        if kwargs.get("means") is not None:
121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
122                self.shape[name] = len(self.idl[name])
123                self.N += self.shape[name]
124                self.r_values[name] = mean
125                self.deltas[name] = sample
126        else:
127            for name, sample in sorted(zip(names, samples)):
128                self.shape[name] = len(self.idl[name])
129                self.N += self.shape[name]
130                if len(sample) != self.shape[name]:
131                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
132                self.r_values[name] = np.mean(sample)
133                self.deltas[name] = sample - self.r_values[name]
134                self._value += self.shape[name] * self.r_values[name]
135            self._value /= self.N
136
137        self._dvalue = 0.0
138        self.ddvalue = 0.0
139        self.reweighted = False
140
141        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            if self.tau_exp[e_name] > 0:
297                _compute_drho(gapsize)
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if g_w[n - 1] < 0 or n >= w_max - 1:
326                            _compute_drho(gapsize * n)
327                            n *= gapsize
328                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
329                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
330                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
331                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
332                            self.e_windowsize[e_name] = n
333                            break
334
335            self._dvalue += self.e_dvalue[e_name] ** 2
336            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
337
338        for e_name in self.cov_names:
339            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
340            self.e_ddvalue[e_name] = 0
341            self._dvalue += self.e_dvalue[e_name]**2
342
343        self._dvalue = np.sqrt(self._dvalue)
344        if self._dvalue == 0.0:
345            self.ddvalue = 0.0
346        else:
347            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
348        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
176    def gamma_method(self, **kwargs):
177        """Estimate the error and related properties of the Obs.
178
179        Parameters
180        ----------
181        S : float
182            specifies a custom value for the parameter S (default 2.0).
183            If set to 0 it is assumed that the data exhibits no
184            autocorrelation. In this case the error estimates coincides
185            with the sample standard error.
186        tau_exp : float
187            positive value triggers the critical slowing down analysis
188            (default 0.0).
189        N_sigma : float
190            number of standard deviations from zero until the tail is
191            attached to the autocorrelation function (default 1).
192        fft : bool
193            determines whether the fft algorithm is used for the computation
194            of the autocorrelation function (default True)
195        """
196
197        e_content = self.e_content
198        self.e_dvalue = {}
199        self.e_ddvalue = {}
200        self.e_tauint = {}
201        self.e_dtauint = {}
202        self.e_windowsize = {}
203        self.e_n_tauint = {}
204        self.e_n_dtauint = {}
205        e_gamma = {}
206        self.e_rho = {}
207        self.e_drho = {}
208        self._dvalue = 0
209        self.ddvalue = 0
210
211        self.S = {}
212        self.tau_exp = {}
213        self.N_sigma = {}
214
215        if kwargs.get('fft') is False:
216            fft = False
217        else:
218            fft = True
219
220        def _parse_kwarg(kwarg_name):
221            if kwarg_name in kwargs:
222                tmp = kwargs.get(kwarg_name)
223                if isinstance(tmp, (int, float)):
224                    if tmp < 0:
225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
226                    for e, e_name in enumerate(self.e_names):
227                        getattr(self, kwarg_name)[e_name] = tmp
228                else:
229                    raise TypeError(kwarg_name + ' is not in proper format.')
230            else:
231                for e, e_name in enumerate(self.e_names):
232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
234                    else:
235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
236
237        _parse_kwarg('S')
238        _parse_kwarg('tau_exp')
239        _parse_kwarg('N_sigma')
240
241        for e, e_name in enumerate(self.mc_names):
242            r_length = []
243            for r_name in e_content[e_name]:
244                if isinstance(self.idl[r_name], range):
245                    r_length.append(len(self.idl[r_name]))
246                else:
247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
248
249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
250            w_max = max(r_length) // 2
251            e_gamma[e_name] = np.zeros(w_max)
252            self.e_rho[e_name] = np.zeros(w_max)
253            self.e_drho[e_name] = np.zeros(w_max)
254
255            for r_name in e_content[e_name]:
256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
257
258            gamma_div = np.zeros(w_max)
259            for r_name in e_content[e_name]:
260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
261            gamma_div[gamma_div < 1] = 1.0
262            e_gamma[e_name] /= gamma_div[:w_max]
263
264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
265                self.e_tauint[e_name] = 0.5
266                self.e_dtauint[e_name] = 0.0
267                self.e_dvalue[e_name] = 0.0
268                self.e_ddvalue[e_name] = 0.0
269                self.e_windowsize[e_name] = 0
270                continue
271
272            gaps = []
273            for r_name in e_content[e_name]:
274                if isinstance(self.idl[r_name], range):
275                    gaps.append(1)
276                else:
277                    gaps.append(np.min(np.diff(self.idl[r_name])))
278
279            if not np.all([gi == gaps[0] for gi in gaps]):
280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
281            else:
282                gapsize = gaps[0]
283
284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
286            # Make sure no entry of tauint is smaller than 0.5
287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
288            # hep-lat/0306017 eq. (42)
289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
290            self.e_n_dtauint[e_name][0] = 0.0
291
292            def _compute_drho(i):
293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
295
296            if self.tau_exp[e_name] > 0:
297                _compute_drho(gapsize)
298                texp = self.tau_exp[e_name]
299                # Critical slowing down analysis
300                if w_max // 2 <= 1:
301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
302                for n in range(gapsize, w_max // 2, gapsize):
303                    _compute_drho(n + gapsize)
304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
305                        # Bias correction hep-lat/0306017 eq. (49) included
306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
311                        self.e_windowsize[e_name] = n
312                        break
313            else:
314                if self.S[e_name] == 0.0:
315                    self.e_tauint[e_name] = 0.5
316                    self.e_dtauint[e_name] = 0.0
317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
319                    self.e_windowsize[e_name] = 0
320                else:
321                    # Standard automatic windowing procedure
322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
324                    for n in range(1, w_max):
325                        if g_w[n - 1] < 0 or n >= w_max - 1:
326                            _compute_drho(gapsize * n)
327                            n *= gapsize
328                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
329                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
330                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
331                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
332                            self.e_windowsize[e_name] = n
333                            break
334
335            self._dvalue += self.e_dvalue[e_name] ** 2
336            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
337
338        for e_name in self.cov_names:
339            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
340            self.e_ddvalue[e_name] = 0
341            self._dvalue += self.e_dvalue[e_name]**2
342
343        self._dvalue = np.sqrt(self._dvalue)
344        if self._dvalue == 0.0:
345            self.ddvalue = 0.0
346        else:
347            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
348        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
385    def details(self, ens_content=True):
386        """Output detailed properties of the Obs.
387
388        Parameters
389        ----------
390        ens_content : bool
391            print details about the ensembles and replica if true.
392        """
393        if self.tag is not None:
394            print("Description:", self.tag)
395        if not hasattr(self, 'e_dvalue'):
396            print('Result\t %3.8e' % (self.value))
397        else:
398            if self.value == 0.0:
399                percentage = np.nan
400            else:
401                percentage = np.abs(self._dvalue / self.value) * 100
402            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
403            if len(self.e_names) > 1:
404                print(' Ensemble errors:')
405            e_content = self.e_content
406            for e_name in self.mc_names:
407                if isinstance(self.idl[e_content[e_name][0]], range):
408                    gap = self.idl[e_content[e_name][0]].step
409                else:
410                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
411
412                if len(self.e_names) > 1:
413                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
414                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
415                tau_string += f" in units of {gap} config"
416                if gap > 1:
417                    tau_string += "s"
418                if self.tau_exp[e_name] > 0:
419                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
420                else:
421                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
422                print(tau_string)
423            for e_name in self.cov_names:
424                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
425        if ens_content is True:
426            if len(self.e_names) == 1:
427                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
428            else:
429                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
430            my_string_list = []
431            for key, value in sorted(self.e_content.items()):
432                if key not in self.covobs:
433                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
434                    if len(value) == 1:
435                        my_string += f': {self.shape[value[0]]} configurations'
436                        if isinstance(self.idl[value[0]], range):
437                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
438                        else:
439                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
440                    else:
441                        sublist = []
442                        for v in value:
443                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
444                            my_substring += f': {self.shape[v]} configurations'
445                            if isinstance(self.idl[v], range):
446                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
447                            else:
448                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
449                            sublist.append(my_substring)
450
451                        my_string += '\n' + '\n'.join(sublist)
452                else:
453                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
454                my_string_list.append(my_string)
455            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
457    def reweight(self, weight):
458        """Reweight the obs with given rewighting factors.
459
460        Parameters
461        ----------
462        weight : Obs
463            Reweighting factor. An Observable that has to be defined on a superset of the
464            configurations in obs[i].idl for all i.
465        all_configs : bool
466            if True, the reweighted observables are normalized by the average of
467            the reweighting factor on all configurations in weight.idl and not
468            on the configurations in obs[i].idl. Default False.
469        """
470        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
472    def is_zero_within_error(self, sigma=1):
473        """Checks whether the observable is zero within 'sigma' standard errors.
474
475        Parameters
476        ----------
477        sigma : int
478            Number of standard errors used for the check.
479
480        Works only properly when the gamma method was run.
481        """
482        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
484    def is_zero(self, atol=1e-10):
485        """Checks whether the observable is zero within a given tolerance.
486
487        Parameters
488        ----------
489        atol : float
490            Absolute tolerance (for details see numpy documentation).
491        """
492        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
494    def plot_tauint(self, save=None):
495        """Plot integrated autocorrelation time for each ensemble.
496
497        Parameters
498        ----------
499        save : str
500            saves the figure to a file named 'save' if.
501        """
502        if not hasattr(self, 'e_dvalue'):
503            raise Exception('Run the gamma method first.')
504
505        for e, e_name in enumerate(self.mc_names):
506            fig = plt.figure()
507            plt.xlabel(r'$W$')
508            plt.ylabel(r'$\tau_\mathrm{int}$')
509            length = int(len(self.e_n_tauint[e_name]))
510            if self.tau_exp[e_name] > 0:
511                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
512                x_help = np.arange(2 * self.tau_exp[e_name])
513                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
514                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
515                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
516                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
517                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
518                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
519                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
520            else:
521                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
522                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
523
524            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
525            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
526            plt.legend()
527            plt.xlim(-0.5, xmax)
528            ylim = plt.ylim()
529            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
530            plt.draw()
531            if save:
532                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
534    def plot_rho(self, save=None):
535        """Plot normalized autocorrelation function time for each ensemble.
536
537        Parameters
538        ----------
539        save : str
540            saves the figure to a file named 'save' if.
541        """
542        if not hasattr(self, 'e_dvalue'):
543            raise Exception('Run the gamma method first.')
544        for e, e_name in enumerate(self.mc_names):
545            fig = plt.figure()
546            plt.xlabel('W')
547            plt.ylabel('rho')
548            length = int(len(self.e_drho[e_name]))
549            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
550            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
551            if self.tau_exp[e_name] > 0:
552                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
553                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
554                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
555                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
556            else:
557                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
558                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
559            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
560            plt.xlim(-0.5, xmax)
561            plt.draw()
562            if save:
563                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
565    def plot_rep_dist(self):
566        """Plot replica distribution for each ensemble with more than one replicum."""
567        if not hasattr(self, 'e_dvalue'):
568            raise Exception('Run the gamma method first.')
569        for e, e_name in enumerate(self.mc_names):
570            if len(self.e_content[e_name]) == 1:
571                print('No replica distribution for a single replicum (', e_name, ')')
572                continue
573            r_length = []
574            sub_r_mean = 0
575            for r, r_name in enumerate(self.e_content[e_name]):
576                r_length.append(len(self.deltas[r_name]))
577                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
578            e_N = np.sum(r_length)
579            sub_r_mean /= e_N
580            arr = np.zeros(len(self.e_content[e_name]))
581            for r, r_name in enumerate(self.e_content[e_name]):
582                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
583            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
584            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
585            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
587    def plot_history(self, expand=True):
588        """Plot derived Monte Carlo history for each ensemble
589
590        Parameters
591        ----------
592        expand : bool
593            show expanded history for irregular Monte Carlo chains (default: True).
594        """
595        for e, e_name in enumerate(self.mc_names):
596            plt.figure()
597            r_length = []
598            tmp = []
599            tmp_expanded = []
600            for r, r_name in enumerate(self.e_content[e_name]):
601                tmp.append(self.deltas[r_name] + self.r_values[r_name])
602                if expand:
603                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
604                    r_length.append(len(tmp_expanded[-1]))
605                else:
606                    r_length.append(len(tmp[-1]))
607            e_N = np.sum(r_length)
608            x = np.arange(e_N)
609            y_test = np.concatenate(tmp, axis=0)
610            if expand:
611                y = np.concatenate(tmp_expanded, axis=0)
612            else:
613                y = y_test
614            plt.errorbar(x, y, fmt='.', markersize=3)
615            plt.xlim(-0.5, e_N - 0.5)
616            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
617            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
619    def plot_piechart(self, save=None):
620        """Plot piechart which shows the fractional contribution of each
621        ensemble to the error and returns a dictionary containing the fractions.
622
623        Parameters
624        ----------
625        save : str
626            saves the figure to a file named 'save' if.
627        """
628        if not hasattr(self, 'e_dvalue'):
629            raise Exception('Run the gamma method first.')
630        if np.isclose(0.0, self._dvalue, atol=1e-15):
631            raise Exception('Error is 0.0')
632        labels = self.e_names
633        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
634        fig1, ax1 = plt.subplots()
635        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
636        ax1.axis('equal')
637        plt.draw()
638        if save:
639            fig1.savefig(save)
640
641        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
643    def dump(self, filename, datatype="json.gz", description="", **kwargs):
644        """Dump the Obs to a file 'name' of chosen format.
645
646        Parameters
647        ----------
648        filename : str
649            name of the file to be saved.
650        datatype : str
651            Format of the exported file. Supported formats include
652            "json.gz" and "pickle"
653        description : str
654            Description for output file, only relevant for json.gz format.
655        path : str
656            specifies a custom path for the file (default '.')
657        """
658        if 'path' in kwargs:
659            file_name = kwargs.get('path') + '/' + filename
660        else:
661            file_name = filename
662
663        if datatype == "json.gz":
664            from .input.json import dump_to_json
665            dump_to_json([self], file_name, description=description)
666        elif datatype == "pickle":
667            with open(file_name + '.p', 'wb') as fb:
668                pickle.dump(self, fb)
669        else:
670            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
672    def export_jackknife(self):
673        """Export jackknife samples from the Obs
674
675        Returns
676        -------
677        numpy.ndarray
678            Returns a numpy array of length N + 1 where N is the number of samples
679            for the given ensemble and replicum. The zeroth entry of the array contains
680            the mean value of the Obs, entries 1 to N contain the N jackknife samples
681            derived from the Obs. The current implementation only works for observables
682            defined on exactly one ensemble and replicum. The derived jackknife samples
683            should agree with samples from a full jackknife analysis up to O(1/N).
684        """
685
686        if len(self.names) != 1:
687            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
688
689        name = self.names[0]
690        full_data = self.deltas[name] + self.r_values[name]
691        n = full_data.size
692        mean = self.value
693        tmp_jacks = np.zeros(n + 1)
694        tmp_jacks[0] = mean
695        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
696        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
824    def sqrt(self):
825        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
827    def log(self):
828        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
830    def exp(self):
831        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
833    def sin(self):
834        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
836    def cos(self):
837        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
839    def tan(self):
840        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
842    def arcsin(self):
843        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
845    def arccos(self):
846        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
848    def arctan(self):
849        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
851    def sinh(self):
852        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
854    def cosh(self):
855        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
857    def tanh(self):
858        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
860    def arcsinh(self):
861        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
863    def arccosh(self):
864        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
866    def arctanh(self):
867        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
870class CObs:
871    """Class for a complex valued observable."""
872    __slots__ = ['_real', '_imag', 'tag']
873
874    def __init__(self, real, imag=0.0):
875        self._real = real
876        self._imag = imag
877        self.tag = None
878
879    @property
880    def real(self):
881        return self._real
882
883    @property
884    def imag(self):
885        return self._imag
886
887    def gamma_method(self, **kwargs):
888        """Executes the gamma_method for the real and the imaginary part."""
889        if isinstance(self.real, Obs):
890            self.real.gamma_method(**kwargs)
891        if isinstance(self.imag, Obs):
892            self.imag.gamma_method(**kwargs)
893
894    def is_zero(self):
895        """Checks whether both real and imaginary part are zero within machine precision."""
896        return self.real == 0.0 and self.imag == 0.0
897
898    def conjugate(self):
899        return CObs(self.real, -self.imag)
900
901    def __add__(self, other):
902        if isinstance(other, np.ndarray):
903            return other + self
904        elif hasattr(other, 'real') and hasattr(other, 'imag'):
905            return CObs(self.real + other.real,
906                        self.imag + other.imag)
907        else:
908            return CObs(self.real + other, self.imag)
909
910    def __radd__(self, y):
911        return self + y
912
913    def __sub__(self, other):
914        if isinstance(other, np.ndarray):
915            return -1 * (other - self)
916        elif hasattr(other, 'real') and hasattr(other, 'imag'):
917            return CObs(self.real - other.real, self.imag - other.imag)
918        else:
919            return CObs(self.real - other, self.imag)
920
921    def __rsub__(self, other):
922        return -1 * (self - other)
923
924    def __mul__(self, other):
925        if isinstance(other, np.ndarray):
926            return other * self
927        elif hasattr(other, 'real') and hasattr(other, 'imag'):
928            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
929                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
930                                               [self.real, other.real, self.imag, other.imag],
931                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
932                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
933                                               [self.real, other.real, self.imag, other.imag],
934                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
935            elif getattr(other, 'imag', 0) != 0:
936                return CObs(self.real * other.real - self.imag * other.imag,
937                            self.imag * other.real + self.real * other.imag)
938            else:
939                return CObs(self.real * other.real, self.imag * other.real)
940        else:
941            return CObs(self.real * other, self.imag * other)
942
943    def __rmul__(self, other):
944        return self * other
945
946    def __truediv__(self, other):
947        if isinstance(other, np.ndarray):
948            return 1 / (other / self)
949        elif hasattr(other, 'real') and hasattr(other, 'imag'):
950            r = other.real ** 2 + other.imag ** 2
951            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
952        else:
953            return CObs(self.real / other, self.imag / other)
954
955    def __rtruediv__(self, other):
956        r = self.real ** 2 + self.imag ** 2
957        if hasattr(other, 'real') and hasattr(other, 'imag'):
958            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
959        else:
960            return CObs(self.real * other / r, -self.imag * other / r)
961
962    def __abs__(self):
963        return np.sqrt(self.real**2 + self.imag**2)
964
965    def __pos__(self):
966        return self
967
968    def __neg__(self):
969        return -1 * self
970
971    def __eq__(self, other):
972        return self.real == other.real and self.imag == other.imag
973
974    def __str__(self):
975        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
976
977    def __repr__(self):
978        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
874    def __init__(self, real, imag=0.0):
875        self._real = real
876        self._imag = imag
877        self.tag = None
def gamma_method(self, **kwargs):
887    def gamma_method(self, **kwargs):
888        """Executes the gamma_method for the real and the imaginary part."""
889        if isinstance(self.real, Obs):
890            self.real.gamma_method(**kwargs)
891        if isinstance(self.imag, Obs):
892            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
894    def is_zero(self):
895        """Checks whether both real and imaginary part are zero within machine precision."""
896        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
898    def conjugate(self):
899        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1103def derived_observable(func, data, array_mode=False, **kwargs):
1104    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1105
1106    Parameters
1107    ----------
1108    func : object
1109        arbitrary function of the form func(data, **kwargs). For the
1110        automatic differentiation to work, all numpy functions have to have
1111        the autograd wrapper (use 'import autograd.numpy as anp').
1112    data : list
1113        list of Obs, e.g. [obs1, obs2, obs3].
1114    num_grad : bool
1115        if True, numerical derivatives are used instead of autograd
1116        (default False). To control the numerical differentiation the
1117        kwargs of numdifftools.step_generators.MaxStepGenerator
1118        can be used.
1119    man_grad : list
1120        manually supply a list or an array which contains the jacobian
1121        of func. Use cautiously, supplying the wrong derivative will
1122        not be intercepted.
1123
1124    Notes
1125    -----
1126    For simple mathematical operations it can be practical to use anonymous
1127    functions. For the ratio of two observables one can e.g. use
1128
1129    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1130    """
1131
1132    data = np.asarray(data)
1133    raveled_data = data.ravel()
1134
1135    # Workaround for matrix operations containing non Obs data
1136    if not all(isinstance(x, Obs) for x in raveled_data):
1137        for i in range(len(raveled_data)):
1138            if isinstance(raveled_data[i], (int, float)):
1139                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1140
1141    allcov = {}
1142    for o in raveled_data:
1143        for name in o.cov_names:
1144            if name in allcov:
1145                if not np.allclose(allcov[name], o.covobs[name].cov):
1146                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1147            else:
1148                allcov[name] = o.covobs[name].cov
1149
1150    n_obs = len(raveled_data)
1151    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1152    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1153    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1154
1155    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1156
1157    if data.ndim == 1:
1158        values = np.array([o.value for o in data])
1159    else:
1160        values = np.vectorize(lambda x: x.value)(data)
1161
1162    new_values = func(values, **kwargs)
1163
1164    multi = int(isinstance(new_values, np.ndarray))
1165
1166    new_r_values = {}
1167    new_idl_d = {}
1168    for name in new_sample_names:
1169        idl = []
1170        tmp_values = np.zeros(n_obs)
1171        for i, item in enumerate(raveled_data):
1172            tmp_values[i] = item.r_values.get(name, item.value)
1173            tmp_idl = item.idl.get(name)
1174            if tmp_idl is not None:
1175                idl.append(tmp_idl)
1176        if multi > 0:
1177            tmp_values = np.array(tmp_values).reshape(data.shape)
1178        new_r_values[name] = func(tmp_values, **kwargs)
1179        new_idl_d[name] = _merge_idx(idl)
1180
1181    if 'man_grad' in kwargs:
1182        deriv = np.asarray(kwargs.get('man_grad'))
1183        if new_values.shape + data.shape != deriv.shape:
1184            raise Exception('Manual derivative does not have correct shape.')
1185    elif kwargs.get('num_grad') is True:
1186        if multi > 0:
1187            raise Exception('Multi mode currently not supported for numerical derivative')
1188        options = {
1189            'base_step': 0.1,
1190            'step_ratio': 2.5}
1191        for key in options.keys():
1192            kwarg = kwargs.get(key)
1193            if kwarg is not None:
1194                options[key] = kwarg
1195        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1196        if tmp_df.size == 1:
1197            deriv = np.array([tmp_df.real])
1198        else:
1199            deriv = tmp_df.real
1200    else:
1201        deriv = jacobian(func)(values, **kwargs)
1202
1203    final_result = np.zeros(new_values.shape, dtype=object)
1204
1205    if array_mode is True:
1206
1207        class _Zero_grad():
1208            def __init__(self, N):
1209                self.grad = np.zeros((N, 1))
1210
1211        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1212        d_extracted = {}
1213        g_extracted = {}
1214        for name in new_sample_names:
1215            d_extracted[name] = []
1216            ens_length = len(new_idl_d[name])
1217            for i_dat, dat in enumerate(data):
1218                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1219        for name in new_cov_names:
1220            g_extracted[name] = []
1221            zero_grad = _Zero_grad(new_covobs_lengths[name])
1222            for i_dat, dat in enumerate(data):
1223                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1224
1225    for i_val, new_val in np.ndenumerate(new_values):
1226        new_deltas = {}
1227        new_grad = {}
1228        if array_mode is True:
1229            for name in new_sample_names:
1230                ens_length = d_extracted[name][0].shape[-1]
1231                new_deltas[name] = np.zeros(ens_length)
1232                for i_dat, dat in enumerate(d_extracted[name]):
1233                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1234            for name in new_cov_names:
1235                new_grad[name] = 0
1236                for i_dat, dat in enumerate(g_extracted[name]):
1237                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1238        else:
1239            for j_obs, obs in np.ndenumerate(data):
1240                for name in obs.names:
1241                    if name in obs.cov_names:
1242                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1243                    else:
1244                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1245
1246        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1247
1248        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1249            raise Exception('The same name has been used for deltas and covobs!')
1250        new_samples = []
1251        new_means = []
1252        new_idl = []
1253        new_names_obs = []
1254        for name in new_names:
1255            if name not in new_covobs:
1256                new_samples.append(new_deltas[name])
1257                new_idl.append(new_idl_d[name])
1258                new_means.append(new_r_values[name][i_val])
1259                new_names_obs.append(name)
1260        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1261        for name in new_covobs:
1262            final_result[i_val].names.append(name)
1263        final_result[i_val]._covobs = new_covobs
1264        final_result[i_val]._value = new_val
1265        final_result[i_val].reweighted = reweighted
1266
1267    if multi == 0:
1268        final_result = final_result.item()
1269
1270    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1307def reweight(weight, obs, **kwargs):
1308    """Reweight a list of observables.
1309
1310    Parameters
1311    ----------
1312    weight : Obs
1313        Reweighting factor. An Observable that has to be defined on a superset of the
1314        configurations in obs[i].idl for all i.
1315    obs : list
1316        list of Obs, e.g. [obs1, obs2, obs3].
1317    all_configs : bool
1318        if True, the reweighted observables are normalized by the average of
1319        the reweighting factor on all configurations in weight.idl and not
1320        on the configurations in obs[i].idl. Default False.
1321    """
1322    result = []
1323    for i in range(len(obs)):
1324        if len(obs[i].cov_names):
1325            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1326        if not set(obs[i].names).issubset(weight.names):
1327            raise Exception('Error: Ensembles do not fit')
1328        for name in obs[i].names:
1329            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1330                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1331        new_samples = []
1332        w_deltas = {}
1333        for name in sorted(obs[i].names):
1334            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1335            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1336        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1337
1338        if kwargs.get('all_configs'):
1339            new_weight = weight
1340        else:
1341            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1342
1343        result.append(tmp_obs / new_weight)
1344        result[-1].reweighted = True
1345
1346    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1349def correlate(obs_a, obs_b):
1350    """Correlate two observables.
1351
1352    Parameters
1353    ----------
1354    obs_a : Obs
1355        First observable
1356    obs_b : Obs
1357        Second observable
1358
1359    Notes
1360    -----
1361    Keep in mind to only correlate primary observables which have not been reweighted
1362    yet. The reweighting has to be applied after correlating the observables.
1363    Currently only works if ensembles are identical (this is not strictly necessary).
1364    """
1365
1366    if sorted(obs_a.names) != sorted(obs_b.names):
1367        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1368    if len(obs_a.cov_names) or len(obs_b.cov_names):
1369        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1370    for name in obs_a.names:
1371        if obs_a.shape[name] != obs_b.shape[name]:
1372            raise Exception('Shapes of ensemble', name, 'do not fit')
1373        if obs_a.idl[name] != obs_b.idl[name]:
1374            raise Exception('idl of ensemble', name, 'do not fit')
1375
1376    if obs_a.reweighted is True:
1377        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1378    if obs_b.reweighted is True:
1379        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1380
1381    new_samples = []
1382    new_idl = []
1383    for name in sorted(obs_a.names):
1384        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1385        new_idl.append(obs_a.idl[name])
1386
1387    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1388    o.reweighted = obs_a.reweighted or obs_b.reweighted
1389    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1392def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1393    r'''Calculates the error covariance matrix of a set of observables.
1394
1395    WARNING: This function should be used with care, especially for observables with support on multiple
1396             ensembles with differing autocorrelations. See the notes below for details.
1397
1398    The gamma method has to be applied first to all observables.
1399
1400    Parameters
1401    ----------
1402    obs : list or numpy.ndarray
1403        List or one dimensional array of Obs
1404    visualize : bool
1405        If True plots the corresponding normalized correlation matrix (default False).
1406    correlation : bool
1407        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1408    smooth : None or int
1409        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1410        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1411        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1412        small ones.
1413
1414    Notes
1415    -----
1416    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1417    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1418    in the absence of autocorrelation.
1419    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1420    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1421    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1422    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1423    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1424    '''
1425
1426    length = len(obs)
1427
1428    max_samples = np.max([o.N for o in obs])
1429    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1430        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1431
1432    cov = np.zeros((length, length))
1433    for i in range(length):
1434        for j in range(i, length):
1435            cov[i, j] = _covariance_element(obs[i], obs[j])
1436    cov = cov + cov.T - np.diag(np.diag(cov))
1437
1438    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1439
1440    if isinstance(smooth, int):
1441        corr = _smooth_eigenvalues(corr, smooth)
1442
1443    if visualize:
1444        plt.matshow(corr, vmin=-1, vmax=1)
1445        plt.set_cmap('RdBu')
1446        plt.colorbar()
1447        plt.draw()
1448
1449    if correlation is True:
1450        return corr
1451
1452    errors = [o.dvalue for o in obs]
1453    cov = np.diag(errors) @ corr @ np.diag(errors)
1454
1455    eigenvalues = np.linalg.eigh(cov)[0]
1456    if not np.all(eigenvalues >= 0):
1457        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1458
1459    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1539def import_jackknife(jacks, name, idl=None):
1540    """Imports jackknife samples and returns an Obs
1541
1542    Parameters
1543    ----------
1544    jacks : numpy.ndarray
1545        numpy array containing the mean value as zeroth entry and
1546        the N jackknife samples as first to Nth entry.
1547    name : str
1548        name of the ensemble the samples are defined on.
1549    """
1550    length = len(jacks) - 1
1551    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1552    samples = jacks[1:] @ prj
1553    mean = np.mean(samples)
1554    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1555    new_obs._value = jacks[0]
1556    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1559def merge_obs(list_of_obs):
1560    """Combine all observables in list_of_obs into one new observable
1561
1562    Parameters
1563    ----------
1564    list_of_obs : list
1565        list of the Obs object to be combined
1566
1567    Notes
1568    -----
1569    It is not possible to combine obs which are based on the same replicum
1570    """
1571    replist = [item for obs in list_of_obs for item in obs.names]
1572    if (len(replist) == len(set(replist))) is False:
1573        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1574    if any([len(o.cov_names) for o in list_of_obs]):
1575        raise Exception('Not possible to merge data that contains covobs!')
1576    new_dict = {}
1577    idl_dict = {}
1578    for o in list_of_obs:
1579        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1580                        for key in set(o.deltas) | set(o.r_values)})
1581        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1582
1583    names = sorted(new_dict.keys())
1584    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1585    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1586    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1589def cov_Obs(means, cov, name, grad=None):
1590    """Create an Obs based on mean(s) and a covariance matrix
1591
1592    Parameters
1593    ----------
1594    mean : list of floats or float
1595        N mean value(s) of the new Obs
1596    cov : list or array
1597        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1598    name : str
1599        identifier for the covariance matrix
1600    grad : list or array
1601        Gradient of the Covobs wrt. the means belonging to cov.
1602    """
1603
1604    def covobs_to_obs(co):
1605        """Make an Obs out of a Covobs
1606
1607        Parameters
1608        ----------
1609        co : Covobs
1610            Covobs to be embedded into the Obs
1611        """
1612        o = Obs([], [], means=[])
1613        o._value = co.value
1614        o.names.append(co.name)
1615        o._covobs[co.name] = co
1616        o._dvalue = np.sqrt(co.errsq())
1617        return o
1618
1619    ol = []
1620    if isinstance(means, (float, int)):
1621        means = [means]
1622
1623    for i in range(len(means)):
1624        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1625    if ol[0].covobs[name].N != len(means):
1626        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1627    if len(ol) == 1:
1628        return ol[0]
1629    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.