pyerrors.obs

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60
  61    def __init__(self, samples, names, idl=None, **kwargs):
  62        """ Initialize Obs object.
  63
  64        Parameters
  65        ----------
  66        samples : list
  67            list of numpy arrays containing the Monte Carlo samples
  68        names : list
  69            list of strings labeling the individual samples
  70        idl : list, optional
  71            list of ranges or lists on which the samples are defined
  72        """
  73
  74        if kwargs.get("means") is None and len(samples):
  75            if len(samples) != len(names):
  76                raise Exception('Length of samples and names incompatible.')
  77            if idl is not None:
  78                if len(idl) != len(names):
  79                    raise Exception('Length of idl incompatible with samples and names.')
  80            name_length = len(names)
  81            if name_length > 1:
  82                if name_length != len(set(names)):
  83                    raise Exception('names are not unique.')
  84                if not all(isinstance(x, str) for x in names):
  85                    raise TypeError('All names have to be strings.')
  86            else:
  87                if not isinstance(names[0], str):
  88                    raise TypeError('All names have to be strings.')
  89            if min(len(x) for x in samples) <= 4:
  90                raise Exception('Samples have to have at least 5 entries.')
  91
  92        self.names = sorted(names)
  93        self.shape = {}
  94        self.r_values = {}
  95        self.deltas = {}
  96        self._covobs = {}
  97
  98        self._value = 0
  99        self.N = 0
 100        self.is_merged = {}
 101        self.idl = {}
 102        if idl is not None:
 103            for name, idx in sorted(zip(names, idl)):
 104                if isinstance(idx, range):
 105                    self.idl[name] = idx
 106                elif isinstance(idx, (list, np.ndarray)):
 107                    dc = np.unique(np.diff(idx))
 108                    if np.any(dc < 0):
 109                        raise Exception("Unsorted idx for idl[%s]" % (name))
 110                    if len(dc) == 1:
 111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 112                    else:
 113                        self.idl[name] = list(idx)
 114                else:
 115                    raise Exception('incompatible type for idl[%s].' % (name))
 116        else:
 117            for name, sample in sorted(zip(names, samples)):
 118                self.idl[name] = range(1, len(sample) + 1)
 119
 120        if kwargs.get("means") is not None:
 121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                self.r_values[name] = mean
 125                self.deltas[name] = sample
 126        else:
 127            for name, sample in sorted(zip(names, samples)):
 128                self.shape[name] = len(self.idl[name])
 129                self.N += self.shape[name]
 130                if len(sample) != self.shape[name]:
 131                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 132                self.r_values[name] = np.mean(sample)
 133                self.deltas[name] = sample - self.r_values[name]
 134                self._value += self.shape[name] * self.r_values[name]
 135            self._value /= self.N
 136
 137        self._dvalue = 0.0
 138        self.ddvalue = 0.0
 139        self.reweighted = False
 140
 141        self.tag = None
 142
 143    @property
 144    def value(self):
 145        return self._value
 146
 147    @property
 148    def dvalue(self):
 149        return self._dvalue
 150
 151    @property
 152    def e_names(self):
 153        return sorted(set([o.split('|')[0] for o in self.names]))
 154
 155    @property
 156    def cov_names(self):
 157        return sorted(set([o for o in self.covobs.keys()]))
 158
 159    @property
 160    def mc_names(self):
 161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 162
 163    @property
 164    def e_content(self):
 165        res = {}
 166        for e, e_name in enumerate(self.e_names):
 167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 168            if e_name in self.names:
 169                res[e_name].append(e_name)
 170        return res
 171
 172    @property
 173    def covobs(self):
 174        return self._covobs
 175
 176    def gamma_method(self, **kwargs):
 177        """Estimate the error and related properties of the Obs.
 178
 179        Parameters
 180        ----------
 181        S : float
 182            specifies a custom value for the parameter S (default 2.0).
 183            If set to 0 it is assumed that the data exhibits no
 184            autocorrelation. In this case the error estimates coincides
 185            with the sample standard error.
 186        tau_exp : float
 187            positive value triggers the critical slowing down analysis
 188            (default 0.0).
 189        N_sigma : float
 190            number of standard deviations from zero until the tail is
 191            attached to the autocorrelation function (default 1).
 192        fft : bool
 193            determines whether the fft algorithm is used for the computation
 194            of the autocorrelation function (default True)
 195        """
 196
 197        e_content = self.e_content
 198        self.e_dvalue = {}
 199        self.e_ddvalue = {}
 200        self.e_tauint = {}
 201        self.e_dtauint = {}
 202        self.e_windowsize = {}
 203        self.e_n_tauint = {}
 204        self.e_n_dtauint = {}
 205        e_gamma = {}
 206        self.e_rho = {}
 207        self.e_drho = {}
 208        self._dvalue = 0
 209        self.ddvalue = 0
 210
 211        self.S = {}
 212        self.tau_exp = {}
 213        self.N_sigma = {}
 214
 215        if kwargs.get('fft') is False:
 216            fft = False
 217        else:
 218            fft = True
 219
 220        def _parse_kwarg(kwarg_name):
 221            if kwarg_name in kwargs:
 222                tmp = kwargs.get(kwarg_name)
 223                if isinstance(tmp, (int, float)):
 224                    if tmp < 0:
 225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 226                    for e, e_name in enumerate(self.e_names):
 227                        getattr(self, kwarg_name)[e_name] = tmp
 228                else:
 229                    raise TypeError(kwarg_name + ' is not in proper format.')
 230            else:
 231                for e, e_name in enumerate(self.e_names):
 232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 234                    else:
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 236
 237        _parse_kwarg('S')
 238        _parse_kwarg('tau_exp')
 239        _parse_kwarg('N_sigma')
 240
 241        for e, e_name in enumerate(self.mc_names):
 242            r_length = []
 243            for r_name in e_content[e_name]:
 244                if isinstance(self.idl[r_name], range):
 245                    r_length.append(len(self.idl[r_name]))
 246                else:
 247                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 248
 249            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 250            w_max = max(r_length) // 2
 251            e_gamma[e_name] = np.zeros(w_max)
 252            self.e_rho[e_name] = np.zeros(w_max)
 253            self.e_drho[e_name] = np.zeros(w_max)
 254
 255            for r_name in e_content[e_name]:
 256                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 257
 258            gamma_div = np.zeros(w_max)
 259            for r_name in e_content[e_name]:
 260                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 261            gamma_div[gamma_div < 1] = 1.0
 262            e_gamma[e_name] /= gamma_div[:w_max]
 263
 264            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 265                self.e_tauint[e_name] = 0.5
 266                self.e_dtauint[e_name] = 0.0
 267                self.e_dvalue[e_name] = 0.0
 268                self.e_ddvalue[e_name] = 0.0
 269                self.e_windowsize[e_name] = 0
 270                continue
 271
 272            gaps = []
 273            for r_name in e_content[e_name]:
 274                if isinstance(self.idl[r_name], range):
 275                    gaps.append(1)
 276                else:
 277                    gaps.append(np.min(np.diff(self.idl[r_name])))
 278
 279            if not np.all([gi == gaps[0] for gi in gaps]):
 280                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 281            else:
 282                gapsize = gaps[0]
 283
 284            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 285            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 286            # Make sure no entry of tauint is smaller than 0.5
 287            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 288            # hep-lat/0306017 eq. (42)
 289            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 290            self.e_n_dtauint[e_name][0] = 0.0
 291
 292            def _compute_drho(i):
 293                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 294                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 295
 296            _compute_drho(gapsize)
 297            if self.tau_exp[e_name] > 0:
 298                texp = self.tau_exp[e_name]
 299                # Critical slowing down analysis
 300                if w_max // 2 <= 1:
 301                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 302                for n in range(gapsize, w_max // 2, gapsize):
 303                    _compute_drho(n + gapsize)
 304                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 305                        # Bias correction hep-lat/0306017 eq. (49) included
 306                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 307                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 308                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 309                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 310                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 311                        self.e_windowsize[e_name] = n
 312                        break
 313            else:
 314                if self.S[e_name] == 0.0:
 315                    self.e_tauint[e_name] = 0.5
 316                    self.e_dtauint[e_name] = 0.0
 317                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 318                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 319                    self.e_windowsize[e_name] = 0
 320                else:
 321                    # Standard automatic windowing procedure
 322                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 323                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 324                    for n in range(1, w_max):
 325                        if n < w_max // 2 - 2:
 326                            _compute_drho(gapsize * n + gapsize)
 327                        if g_w[n - 1] < 0 or n >= w_max - 1:
 328                            n *= gapsize
 329                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 330                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 331                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 332                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 333                            self.e_windowsize[e_name] = n
 334                            break
 335
 336            self._dvalue += self.e_dvalue[e_name] ** 2
 337            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 338
 339        for e_name in self.cov_names:
 340            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 341            self.e_ddvalue[e_name] = 0
 342            self._dvalue += self.e_dvalue[e_name]**2
 343
 344        self._dvalue = np.sqrt(self._dvalue)
 345        if self._dvalue == 0.0:
 346            self.ddvalue = 0.0
 347        else:
 348            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 349        return
 350
 351    gm = gamma_method
 352
 353    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 354        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 355           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 356
 357        Parameters
 358        ----------
 359        deltas : list
 360            List of fluctuations
 361        idx : list
 362            List or range of configurations on which the deltas are defined.
 363        shape : int
 364            Number of configurations in idx.
 365        w_max : int
 366            Upper bound for the summation window.
 367        fft : bool
 368            determines whether the fft algorithm is used for the computation
 369            of the autocorrelation function.
 370        """
 371        gamma = np.zeros(w_max)
 372        deltas = _expand_deltas(deltas, idx, shape)
 373        new_shape = len(deltas)
 374        if fft:
 375            max_gamma = min(new_shape, w_max)
 376            # The padding for the fft has to be even
 377            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 378            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 379        else:
 380            for n in range(w_max):
 381                if new_shape - n >= 0:
 382                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 383
 384        return gamma
 385
 386    def details(self, ens_content=True):
 387        """Output detailed properties of the Obs.
 388
 389        Parameters
 390        ----------
 391        ens_content : bool
 392            print details about the ensembles and replica if true.
 393        """
 394        if self.tag is not None:
 395            print("Description:", self.tag)
 396        if not hasattr(self, 'e_dvalue'):
 397            print('Result\t %3.8e' % (self.value))
 398        else:
 399            if self.value == 0.0:
 400                percentage = np.nan
 401            else:
 402                percentage = np.abs(self._dvalue / self.value) * 100
 403            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 404            if len(self.e_names) > 1:
 405                print(' Ensemble errors:')
 406            e_content = self.e_content
 407            for e_name in self.mc_names:
 408                if isinstance(self.idl[e_content[e_name][0]], range):
 409                    gap = self.idl[e_content[e_name][0]].step
 410                else:
 411                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 412
 413                if len(self.e_names) > 1:
 414                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 415                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 416                tau_string += f" in units of {gap} config"
 417                if gap > 1:
 418                    tau_string += "s"
 419                if self.tau_exp[e_name] > 0:
 420                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 421                else:
 422                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 423                print(tau_string)
 424            for e_name in self.cov_names:
 425                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 426        if ens_content is True:
 427            if len(self.e_names) == 1:
 428                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 429            else:
 430                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 431            my_string_list = []
 432            for key, value in sorted(self.e_content.items()):
 433                if key not in self.covobs:
 434                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 435                    if len(value) == 1:
 436                        my_string += f': {self.shape[value[0]]} configurations'
 437                        if isinstance(self.idl[value[0]], range):
 438                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 439                        else:
 440                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 441                    else:
 442                        sublist = []
 443                        for v in value:
 444                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 445                            my_substring += f': {self.shape[v]} configurations'
 446                            if isinstance(self.idl[v], range):
 447                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 448                            else:
 449                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 450                            sublist.append(my_substring)
 451
 452                        my_string += '\n' + '\n'.join(sublist)
 453                else:
 454                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 455                my_string_list.append(my_string)
 456            print('\n'.join(my_string_list))
 457
 458    def reweight(self, weight):
 459        """Reweight the obs with given rewighting factors.
 460
 461        Parameters
 462        ----------
 463        weight : Obs
 464            Reweighting factor. An Observable that has to be defined on a superset of the
 465            configurations in obs[i].idl for all i.
 466        all_configs : bool
 467            if True, the reweighted observables are normalized by the average of
 468            the reweighting factor on all configurations in weight.idl and not
 469            on the configurations in obs[i].idl. Default False.
 470        """
 471        return reweight(weight, [self])[0]
 472
 473    def is_zero_within_error(self, sigma=1):
 474        """Checks whether the observable is zero within 'sigma' standard errors.
 475
 476        Parameters
 477        ----------
 478        sigma : int
 479            Number of standard errors used for the check.
 480
 481        Works only properly when the gamma method was run.
 482        """
 483        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 484
 485    def is_zero(self, atol=1e-10):
 486        """Checks whether the observable is zero within a given tolerance.
 487
 488        Parameters
 489        ----------
 490        atol : float
 491            Absolute tolerance (for details see numpy documentation).
 492        """
 493        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 494
 495    def plot_tauint(self, save=None):
 496        """Plot integrated autocorrelation time for each ensemble.
 497
 498        Parameters
 499        ----------
 500        save : str
 501            saves the figure to a file named 'save' if.
 502        """
 503        if not hasattr(self, 'e_dvalue'):
 504            raise Exception('Run the gamma method first.')
 505
 506        for e, e_name in enumerate(self.mc_names):
 507            fig = plt.figure()
 508            plt.xlabel(r'$W$')
 509            plt.ylabel(r'$\tau_\mathrm{int}$')
 510            length = int(len(self.e_n_tauint[e_name]))
 511            if self.tau_exp[e_name] > 0:
 512                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 513                x_help = np.arange(2 * self.tau_exp[e_name])
 514                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 515                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 516                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 517                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 518                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 519                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 520                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 521            else:
 522                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 523                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 524
 525            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 526            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 527            plt.legend()
 528            plt.xlim(-0.5, xmax)
 529            ylim = plt.ylim()
 530            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 531            plt.draw()
 532            if save:
 533                fig.savefig(save + "_" + str(e))
 534
 535    def plot_rho(self, save=None):
 536        """Plot normalized autocorrelation function time for each ensemble.
 537
 538        Parameters
 539        ----------
 540        save : str
 541            saves the figure to a file named 'save' if.
 542        """
 543        if not hasattr(self, 'e_dvalue'):
 544            raise Exception('Run the gamma method first.')
 545        for e, e_name in enumerate(self.mc_names):
 546            fig = plt.figure()
 547            plt.xlabel('W')
 548            plt.ylabel('rho')
 549            length = int(len(self.e_drho[e_name]))
 550            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 551            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 552            if self.tau_exp[e_name] > 0:
 553                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 554                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 555                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 556                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 557            else:
 558                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 559                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 560            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 561            plt.xlim(-0.5, xmax)
 562            plt.draw()
 563            if save:
 564                fig.savefig(save + "_" + str(e))
 565
 566    def plot_rep_dist(self):
 567        """Plot replica distribution for each ensemble with more than one replicum."""
 568        if not hasattr(self, 'e_dvalue'):
 569            raise Exception('Run the gamma method first.')
 570        for e, e_name in enumerate(self.mc_names):
 571            if len(self.e_content[e_name]) == 1:
 572                print('No replica distribution for a single replicum (', e_name, ')')
 573                continue
 574            r_length = []
 575            sub_r_mean = 0
 576            for r, r_name in enumerate(self.e_content[e_name]):
 577                r_length.append(len(self.deltas[r_name]))
 578                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 579            e_N = np.sum(r_length)
 580            sub_r_mean /= e_N
 581            arr = np.zeros(len(self.e_content[e_name]))
 582            for r, r_name in enumerate(self.e_content[e_name]):
 583                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 584            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 585            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 586            plt.draw()
 587
 588    def plot_history(self, expand=True):
 589        """Plot derived Monte Carlo history for each ensemble
 590
 591        Parameters
 592        ----------
 593        expand : bool
 594            show expanded history for irregular Monte Carlo chains (default: True).
 595        """
 596        for e, e_name in enumerate(self.mc_names):
 597            plt.figure()
 598            r_length = []
 599            tmp = []
 600            tmp_expanded = []
 601            for r, r_name in enumerate(self.e_content[e_name]):
 602                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 603                if expand:
 604                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 605                    r_length.append(len(tmp_expanded[-1]))
 606                else:
 607                    r_length.append(len(tmp[-1]))
 608            e_N = np.sum(r_length)
 609            x = np.arange(e_N)
 610            y_test = np.concatenate(tmp, axis=0)
 611            if expand:
 612                y = np.concatenate(tmp_expanded, axis=0)
 613            else:
 614                y = y_test
 615            plt.errorbar(x, y, fmt='.', markersize=3)
 616            plt.xlim(-0.5, e_N - 0.5)
 617            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 618            plt.draw()
 619
 620    def plot_piechart(self, save=None):
 621        """Plot piechart which shows the fractional contribution of each
 622        ensemble to the error and returns a dictionary containing the fractions.
 623
 624        Parameters
 625        ----------
 626        save : str
 627            saves the figure to a file named 'save' if.
 628        """
 629        if not hasattr(self, 'e_dvalue'):
 630            raise Exception('Run the gamma method first.')
 631        if np.isclose(0.0, self._dvalue, atol=1e-15):
 632            raise Exception('Error is 0.0')
 633        labels = self.e_names
 634        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 635        fig1, ax1 = plt.subplots()
 636        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 637        ax1.axis('equal')
 638        plt.draw()
 639        if save:
 640            fig1.savefig(save)
 641
 642        return dict(zip(self.e_names, sizes))
 643
 644    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 645        """Dump the Obs to a file 'name' of chosen format.
 646
 647        Parameters
 648        ----------
 649        filename : str
 650            name of the file to be saved.
 651        datatype : str
 652            Format of the exported file. Supported formats include
 653            "json.gz" and "pickle"
 654        description : str
 655            Description for output file, only relevant for json.gz format.
 656        path : str
 657            specifies a custom path for the file (default '.')
 658        """
 659        if 'path' in kwargs:
 660            file_name = kwargs.get('path') + '/' + filename
 661        else:
 662            file_name = filename
 663
 664        if datatype == "json.gz":
 665            from .input.json import dump_to_json
 666            dump_to_json([self], file_name, description=description)
 667        elif datatype == "pickle":
 668            with open(file_name + '.p', 'wb') as fb:
 669                pickle.dump(self, fb)
 670        else:
 671            raise Exception("Unknown datatype " + str(datatype))
 672
 673    def export_jackknife(self):
 674        """Export jackknife samples from the Obs
 675
 676        Returns
 677        -------
 678        numpy.ndarray
 679            Returns a numpy array of length N + 1 where N is the number of samples
 680            for the given ensemble and replicum. The zeroth entry of the array contains
 681            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 682            derived from the Obs. The current implementation only works for observables
 683            defined on exactly one ensemble and replicum. The derived jackknife samples
 684            should agree with samples from a full jackknife analysis up to O(1/N).
 685        """
 686
 687        if len(self.names) != 1:
 688            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 689
 690        name = self.names[0]
 691        full_data = self.deltas[name] + self.r_values[name]
 692        n = full_data.size
 693        mean = self.value
 694        tmp_jacks = np.zeros(n + 1)
 695        tmp_jacks[0] = mean
 696        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 697        return tmp_jacks
 698
 699    def __float__(self):
 700        return float(self.value)
 701
 702    def __repr__(self):
 703        return 'Obs[' + str(self) + ']'
 704
 705    def __str__(self):
 706        return _format_uncertainty(self.value, self._dvalue)
 707
 708    def __hash__(self):
 709        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 710        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 711        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 712        hash_tuple += tuple([o.encode() for o in self.names])
 713        m = hashlib.md5()
 714        [m.update(o) for o in hash_tuple]
 715        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 716
 717    # Overload comparisons
 718    def __lt__(self, other):
 719        return self.value < other
 720
 721    def __le__(self, other):
 722        return self.value <= other
 723
 724    def __gt__(self, other):
 725        return self.value > other
 726
 727    def __ge__(self, other):
 728        return self.value >= other
 729
 730    def __eq__(self, other):
 731        return (self - other).is_zero()
 732
 733    def __ne__(self, other):
 734        return not (self - other).is_zero()
 735
 736    # Overload math operations
 737    def __add__(self, y):
 738        if isinstance(y, Obs):
 739            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 740        else:
 741            if isinstance(y, np.ndarray):
 742                return np.array([self + o for o in y])
 743            elif y.__class__.__name__ in ['Corr', 'CObs']:
 744                return NotImplemented
 745            else:
 746                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 747
 748    def __radd__(self, y):
 749        return self + y
 750
 751    def __mul__(self, y):
 752        if isinstance(y, Obs):
 753            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 754        else:
 755            if isinstance(y, np.ndarray):
 756                return np.array([self * o for o in y])
 757            elif isinstance(y, complex):
 758                return CObs(self * y.real, self * y.imag)
 759            elif y.__class__.__name__ in ['Corr', 'CObs']:
 760                return NotImplemented
 761            else:
 762                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 763
 764    def __rmul__(self, y):
 765        return self * y
 766
 767    def __sub__(self, y):
 768        if isinstance(y, Obs):
 769            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 770        else:
 771            if isinstance(y, np.ndarray):
 772                return np.array([self - o for o in y])
 773            elif y.__class__.__name__ in ['Corr', 'CObs']:
 774                return NotImplemented
 775            else:
 776                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 777
 778    def __rsub__(self, y):
 779        return -1 * (self - y)
 780
 781    def __pos__(self):
 782        return self
 783
 784    def __neg__(self):
 785        return -1 * self
 786
 787    def __truediv__(self, y):
 788        if isinstance(y, Obs):
 789            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 790        else:
 791            if isinstance(y, np.ndarray):
 792                return np.array([self / o for o in y])
 793            elif y.__class__.__name__ in ['Corr', 'CObs']:
 794                return NotImplemented
 795            else:
 796                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 797
 798    def __rtruediv__(self, y):
 799        if isinstance(y, Obs):
 800            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 801        else:
 802            if isinstance(y, np.ndarray):
 803                return np.array([o / self for o in y])
 804            elif y.__class__.__name__ in ['Corr', 'CObs']:
 805                return NotImplemented
 806            else:
 807                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 808
 809    def __pow__(self, y):
 810        if isinstance(y, Obs):
 811            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 812        else:
 813            return derived_observable(lambda x: x[0] ** y, [self])
 814
 815    def __rpow__(self, y):
 816        if isinstance(y, Obs):
 817            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 818        else:
 819            return derived_observable(lambda x: y ** x[0], [self])
 820
 821    def __abs__(self):
 822        return derived_observable(lambda x: anp.abs(x[0]), [self])
 823
 824    # Overload numpy functions
 825    def sqrt(self):
 826        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 827
 828    def log(self):
 829        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 830
 831    def exp(self):
 832        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 833
 834    def sin(self):
 835        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 836
 837    def cos(self):
 838        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 839
 840    def tan(self):
 841        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 842
 843    def arcsin(self):
 844        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 845
 846    def arccos(self):
 847        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 848
 849    def arctan(self):
 850        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 851
 852    def sinh(self):
 853        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 854
 855    def cosh(self):
 856        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 857
 858    def tanh(self):
 859        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 860
 861    def arcsinh(self):
 862        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 863
 864    def arccosh(self):
 865        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 866
 867    def arctanh(self):
 868        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 869
 870
 871class CObs:
 872    """Class for a complex valued observable."""
 873    __slots__ = ['_real', '_imag', 'tag']
 874
 875    def __init__(self, real, imag=0.0):
 876        self._real = real
 877        self._imag = imag
 878        self.tag = None
 879
 880    @property
 881    def real(self):
 882        return self._real
 883
 884    @property
 885    def imag(self):
 886        return self._imag
 887
 888    def gamma_method(self, **kwargs):
 889        """Executes the gamma_method for the real and the imaginary part."""
 890        if isinstance(self.real, Obs):
 891            self.real.gamma_method(**kwargs)
 892        if isinstance(self.imag, Obs):
 893            self.imag.gamma_method(**kwargs)
 894
 895    def is_zero(self):
 896        """Checks whether both real and imaginary part are zero within machine precision."""
 897        return self.real == 0.0 and self.imag == 0.0
 898
 899    def conjugate(self):
 900        return CObs(self.real, -self.imag)
 901
 902    def __add__(self, other):
 903        if isinstance(other, np.ndarray):
 904            return other + self
 905        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 906            return CObs(self.real + other.real,
 907                        self.imag + other.imag)
 908        else:
 909            return CObs(self.real + other, self.imag)
 910
 911    def __radd__(self, y):
 912        return self + y
 913
 914    def __sub__(self, other):
 915        if isinstance(other, np.ndarray):
 916            return -1 * (other - self)
 917        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 918            return CObs(self.real - other.real, self.imag - other.imag)
 919        else:
 920            return CObs(self.real - other, self.imag)
 921
 922    def __rsub__(self, other):
 923        return -1 * (self - other)
 924
 925    def __mul__(self, other):
 926        if isinstance(other, np.ndarray):
 927            return other * self
 928        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 929            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 930                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 931                                               [self.real, other.real, self.imag, other.imag],
 932                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 933                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 934                                               [self.real, other.real, self.imag, other.imag],
 935                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 936            elif getattr(other, 'imag', 0) != 0:
 937                return CObs(self.real * other.real - self.imag * other.imag,
 938                            self.imag * other.real + self.real * other.imag)
 939            else:
 940                return CObs(self.real * other.real, self.imag * other.real)
 941        else:
 942            return CObs(self.real * other, self.imag * other)
 943
 944    def __rmul__(self, other):
 945        return self * other
 946
 947    def __truediv__(self, other):
 948        if isinstance(other, np.ndarray):
 949            return 1 / (other / self)
 950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 951            r = other.real ** 2 + other.imag ** 2
 952            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 953        else:
 954            return CObs(self.real / other, self.imag / other)
 955
 956    def __rtruediv__(self, other):
 957        r = self.real ** 2 + self.imag ** 2
 958        if hasattr(other, 'real') and hasattr(other, 'imag'):
 959            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 960        else:
 961            return CObs(self.real * other / r, -self.imag * other / r)
 962
 963    def __abs__(self):
 964        return np.sqrt(self.real**2 + self.imag**2)
 965
 966    def __pos__(self):
 967        return self
 968
 969    def __neg__(self):
 970        return -1 * self
 971
 972    def __eq__(self, other):
 973        return self.real == other.real and self.imag == other.imag
 974
 975    def __str__(self):
 976        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 977
 978    def __repr__(self):
 979        return 'CObs[' + str(self) + ']'
 980
 981
 982def _format_uncertainty(value, dvalue):
 983    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 984    if dvalue == 0.0:
 985        return str(value)
 986    fexp = np.floor(np.log10(dvalue))
 987    if fexp < 0.0:
 988        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 989    elif fexp == 0.0:
 990        return '{:.1f}({:1.1f})'.format(value, dvalue)
 991    else:
 992        return '{:.0f}({:2.0f})'.format(value, dvalue)
 993
 994
 995def _expand_deltas(deltas, idx, shape):
 996    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 997       If idx is of type range, the deltas are not changed
 998
 999    Parameters
1000    ----------
1001    deltas : list
1002        List of fluctuations
1003    idx : list
1004        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1005    shape : int
1006        Number of configs in idx.
1007    """
1008    if isinstance(idx, range):
1009        return deltas
1010    else:
1011        ret = np.zeros(idx[-1] - idx[0] + 1)
1012        for i in range(shape):
1013            ret[idx[i] - idx[0]] = deltas[i]
1014        return ret
1015
1016
1017def _merge_idx(idl):
1018    """Returns the union of all lists in idl as sorted list
1019
1020    Parameters
1021    ----------
1022    idl : list
1023        List of lists or ranges.
1024    """
1025
1026    # Use groupby to efficiently check whether all elements of idl are identical
1027    try:
1028        g = groupby(idl)
1029        if next(g, True) and not next(g, False):
1030            return idl[0]
1031    except Exception:
1032        pass
1033
1034    if np.all([type(idx) is range for idx in idl]):
1035        if len(set([idx[0] for idx in idl])) == 1:
1036            idstart = min([idx.start for idx in idl])
1037            idstop = max([idx.stop for idx in idl])
1038            idstep = min([idx.step for idx in idl])
1039            return range(idstart, idstop, idstep)
1040
1041    return sorted(set().union(*idl))
1042
1043
1044def _intersection_idx(idl):
1045    """Returns the intersection of all lists in idl as sorted list
1046
1047    Parameters
1048    ----------
1049    idl : list
1050        List of lists or ranges.
1051    """
1052
1053    def _lcm(*args):
1054        """Returns the lowest common multiple of args.
1055
1056        From python 3.9 onwards the math library contains an lcm function."""
1057        return reduce(lambda a, b: a * b // gcd(a, b), args)
1058
1059    # Use groupby to efficiently check whether all elements of idl are identical
1060    try:
1061        g = groupby(idl)
1062        if next(g, True) and not next(g, False):
1063            return idl[0]
1064    except Exception:
1065        pass
1066
1067    if np.all([type(idx) is range for idx in idl]):
1068        if len(set([idx[0] for idx in idl])) == 1:
1069            idstart = max([idx.start for idx in idl])
1070            idstop = min([idx.stop for idx in idl])
1071            idstep = _lcm(*[idx.step for idx in idl])
1072            return range(idstart, idstop, idstep)
1073
1074    return sorted(set.intersection(*[set(o) for o in idl]))
1075
1076
1077def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1078    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1079       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1080       common divisor of the step sizes is used as new step size.
1081
1082    Parameters
1083    ----------
1084    deltas : list
1085        List of fluctuations
1086    idx : list
1087        List or range of configs on which the deltas are defined.
1088        Has to be a subset of new_idx and has to be sorted in ascending order.
1089    shape : list
1090        Number of configs in idx.
1091    new_idx : list
1092        List of configs that defines the new range, has to be sorted in ascending order.
1093    """
1094
1095    if type(idx) is range and type(new_idx) is range:
1096        if idx == new_idx:
1097            return deltas
1098    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1099    for i in range(shape):
1100        ret[idx[i] - new_idx[0]] = deltas[i]
1101    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1102
1103
1104def derived_observable(func, data, array_mode=False, **kwargs):
1105    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1106
1107    Parameters
1108    ----------
1109    func : object
1110        arbitrary function of the form func(data, **kwargs). For the
1111        automatic differentiation to work, all numpy functions have to have
1112        the autograd wrapper (use 'import autograd.numpy as anp').
1113    data : list
1114        list of Obs, e.g. [obs1, obs2, obs3].
1115    num_grad : bool
1116        if True, numerical derivatives are used instead of autograd
1117        (default False). To control the numerical differentiation the
1118        kwargs of numdifftools.step_generators.MaxStepGenerator
1119        can be used.
1120    man_grad : list
1121        manually supply a list or an array which contains the jacobian
1122        of func. Use cautiously, supplying the wrong derivative will
1123        not be intercepted.
1124
1125    Notes
1126    -----
1127    For simple mathematical operations it can be practical to use anonymous
1128    functions. For the ratio of two observables one can e.g. use
1129
1130    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1131    """
1132
1133    data = np.asarray(data)
1134    raveled_data = data.ravel()
1135
1136    # Workaround for matrix operations containing non Obs data
1137    if not all(isinstance(x, Obs) for x in raveled_data):
1138        for i in range(len(raveled_data)):
1139            if isinstance(raveled_data[i], (int, float)):
1140                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1141
1142    allcov = {}
1143    for o in raveled_data:
1144        for name in o.cov_names:
1145            if name in allcov:
1146                if not np.allclose(allcov[name], o.covobs[name].cov):
1147                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1148            else:
1149                allcov[name] = o.covobs[name].cov
1150
1151    n_obs = len(raveled_data)
1152    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1153    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1154    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1155
1156    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1157    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1158
1159    if data.ndim == 1:
1160        values = np.array([o.value for o in data])
1161    else:
1162        values = np.vectorize(lambda x: x.value)(data)
1163
1164    new_values = func(values, **kwargs)
1165
1166    multi = int(isinstance(new_values, np.ndarray))
1167
1168    new_r_values = {}
1169    new_idl_d = {}
1170    for name in new_sample_names:
1171        idl = []
1172        tmp_values = np.zeros(n_obs)
1173        for i, item in enumerate(raveled_data):
1174            tmp_values[i] = item.r_values.get(name, item.value)
1175            tmp_idl = item.idl.get(name)
1176            if tmp_idl is not None:
1177                idl.append(tmp_idl)
1178        if multi > 0:
1179            tmp_values = np.array(tmp_values).reshape(data.shape)
1180        new_r_values[name] = func(tmp_values, **kwargs)
1181        new_idl_d[name] = _merge_idx(idl)
1182        if not is_merged[name]:
1183            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1184
1185    if 'man_grad' in kwargs:
1186        deriv = np.asarray(kwargs.get('man_grad'))
1187        if new_values.shape + data.shape != deriv.shape:
1188            raise Exception('Manual derivative does not have correct shape.')
1189    elif kwargs.get('num_grad') is True:
1190        if multi > 0:
1191            raise Exception('Multi mode currently not supported for numerical derivative')
1192        options = {
1193            'base_step': 0.1,
1194            'step_ratio': 2.5}
1195        for key in options.keys():
1196            kwarg = kwargs.get(key)
1197            if kwarg is not None:
1198                options[key] = kwarg
1199        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1200        if tmp_df.size == 1:
1201            deriv = np.array([tmp_df.real])
1202        else:
1203            deriv = tmp_df.real
1204    else:
1205        deriv = jacobian(func)(values, **kwargs)
1206
1207    final_result = np.zeros(new_values.shape, dtype=object)
1208
1209    if array_mode is True:
1210
1211        class _Zero_grad():
1212            def __init__(self, N):
1213                self.grad = np.zeros((N, 1))
1214
1215        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1216        d_extracted = {}
1217        g_extracted = {}
1218        for name in new_sample_names:
1219            d_extracted[name] = []
1220            ens_length = len(new_idl_d[name])
1221            for i_dat, dat in enumerate(data):
1222                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1223        for name in new_cov_names:
1224            g_extracted[name] = []
1225            zero_grad = _Zero_grad(new_covobs_lengths[name])
1226            for i_dat, dat in enumerate(data):
1227                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1228
1229    for i_val, new_val in np.ndenumerate(new_values):
1230        new_deltas = {}
1231        new_grad = {}
1232        if array_mode is True:
1233            for name in new_sample_names:
1234                ens_length = d_extracted[name][0].shape[-1]
1235                new_deltas[name] = np.zeros(ens_length)
1236                for i_dat, dat in enumerate(d_extracted[name]):
1237                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1238            for name in new_cov_names:
1239                new_grad[name] = 0
1240                for i_dat, dat in enumerate(g_extracted[name]):
1241                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1242        else:
1243            for j_obs, obs in np.ndenumerate(data):
1244                for name in obs.names:
1245                    if name in obs.cov_names:
1246                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1247                    else:
1248                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1249
1250        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1251
1252        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1253            raise Exception('The same name has been used for deltas and covobs!')
1254        new_samples = []
1255        new_means = []
1256        new_idl = []
1257        new_names_obs = []
1258        for name in new_names:
1259            if name not in new_covobs:
1260                new_samples.append(new_deltas[name])
1261                new_idl.append(new_idl_d[name])
1262                new_means.append(new_r_values[name][i_val])
1263                new_names_obs.append(name)
1264        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1265        for name in new_covobs:
1266            final_result[i_val].names.append(name)
1267        final_result[i_val]._covobs = new_covobs
1268        final_result[i_val]._value = new_val
1269        final_result[i_val].is_merged = is_merged
1270        final_result[i_val].reweighted = reweighted
1271
1272    if multi == 0:
1273        final_result = final_result.item()
1274
1275    return final_result
1276
1277
1278def _reduce_deltas(deltas, idx_old, idx_new):
1279    """Extract deltas defined on idx_old on all configs of idx_new.
1280
1281    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1282    are ordered in an ascending order.
1283
1284    Parameters
1285    ----------
1286    deltas : list
1287        List of fluctuations
1288    idx_old : list
1289        List or range of configs on which the deltas are defined
1290    idx_new : list
1291        List of configs for which we want to extract the deltas.
1292        Has to be a subset of idx_old.
1293    """
1294    if not len(deltas) == len(idx_old):
1295        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1296    if type(idx_old) is range and type(idx_new) is range:
1297        if idx_old == idx_new:
1298            return deltas
1299    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1300    try:
1301        g = groupby([idx_old, idx_new])
1302        if next(g, True) and not next(g, False):
1303            return deltas
1304    except Exception:
1305        pass
1306    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1307    if len(indices) < len(idx_new):
1308        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1309    return np.array(deltas)[indices]
1310
1311
1312def reweight(weight, obs, **kwargs):
1313    """Reweight a list of observables.
1314
1315    Parameters
1316    ----------
1317    weight : Obs
1318        Reweighting factor. An Observable that has to be defined on a superset of the
1319        configurations in obs[i].idl for all i.
1320    obs : list
1321        list of Obs, e.g. [obs1, obs2, obs3].
1322    all_configs : bool
1323        if True, the reweighted observables are normalized by the average of
1324        the reweighting factor on all configurations in weight.idl and not
1325        on the configurations in obs[i].idl. Default False.
1326    """
1327    result = []
1328    for i in range(len(obs)):
1329        if len(obs[i].cov_names):
1330            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1331        if not set(obs[i].names).issubset(weight.names):
1332            raise Exception('Error: Ensembles do not fit')
1333        for name in obs[i].names:
1334            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1335                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1336        new_samples = []
1337        w_deltas = {}
1338        for name in sorted(obs[i].names):
1339            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1340            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1341        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1342
1343        if kwargs.get('all_configs'):
1344            new_weight = weight
1345        else:
1346            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1347
1348        result.append(tmp_obs / new_weight)
1349        result[-1].reweighted = True
1350        result[-1].is_merged = obs[i].is_merged
1351
1352    return result
1353
1354
1355def correlate(obs_a, obs_b):
1356    """Correlate two observables.
1357
1358    Parameters
1359    ----------
1360    obs_a : Obs
1361        First observable
1362    obs_b : Obs
1363        Second observable
1364
1365    Notes
1366    -----
1367    Keep in mind to only correlate primary observables which have not been reweighted
1368    yet. The reweighting has to be applied after correlating the observables.
1369    Currently only works if ensembles are identical (this is not strictly necessary).
1370    """
1371
1372    if sorted(obs_a.names) != sorted(obs_b.names):
1373        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1374    if len(obs_a.cov_names) or len(obs_b.cov_names):
1375        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1376    for name in obs_a.names:
1377        if obs_a.shape[name] != obs_b.shape[name]:
1378            raise Exception('Shapes of ensemble', name, 'do not fit')
1379        if obs_a.idl[name] != obs_b.idl[name]:
1380            raise Exception('idl of ensemble', name, 'do not fit')
1381
1382    if obs_a.reweighted is True:
1383        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1384    if obs_b.reweighted is True:
1385        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1386
1387    new_samples = []
1388    new_idl = []
1389    for name in sorted(obs_a.names):
1390        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1391        new_idl.append(obs_a.idl[name])
1392
1393    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1394    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1395    o.reweighted = obs_a.reweighted or obs_b.reweighted
1396    return o
1397
1398
1399def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1400    r'''Calculates the error covariance matrix of a set of observables.
1401
1402    WARNING: This function should be used with care, especially for observables with support on multiple
1403             ensembles with differing autocorrelations. See the notes below for details.
1404
1405    The gamma method has to be applied first to all observables.
1406
1407    Parameters
1408    ----------
1409    obs : list or numpy.ndarray
1410        List or one dimensional array of Obs
1411    visualize : bool
1412        If True plots the corresponding normalized correlation matrix (default False).
1413    correlation : bool
1414        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1415    smooth : None or int
1416        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1417        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1418        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1419        small ones.
1420
1421    Notes
1422    -----
1423    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1424    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1425    in the absence of autocorrelation.
1426    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1427    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1428    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1429    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1430    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1431    '''
1432
1433    length = len(obs)
1434
1435    max_samples = np.max([o.N for o in obs])
1436    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1437        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1438
1439    cov = np.zeros((length, length))
1440    for i in range(length):
1441        for j in range(i, length):
1442            cov[i, j] = _covariance_element(obs[i], obs[j])
1443    cov = cov + cov.T - np.diag(np.diag(cov))
1444
1445    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1446
1447    if isinstance(smooth, int):
1448        corr = _smooth_eigenvalues(corr, smooth)
1449
1450    if visualize:
1451        plt.matshow(corr, vmin=-1, vmax=1)
1452        plt.set_cmap('RdBu')
1453        plt.colorbar()
1454        plt.draw()
1455
1456    if correlation is True:
1457        return corr
1458
1459    errors = [o.dvalue for o in obs]
1460    cov = np.diag(errors) @ corr @ np.diag(errors)
1461
1462    eigenvalues = np.linalg.eigh(cov)[0]
1463    if not np.all(eigenvalues >= 0):
1464        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1465
1466    return cov
1467
1468
1469def _smooth_eigenvalues(corr, E):
1470    """Eigenvalue smoothing as described in hep-lat/9412087
1471
1472    corr : np.ndarray
1473        correlation matrix
1474    E : integer
1475        Number of eigenvalues to be left substantially unchanged
1476    """
1477    if not (2 < E < corr.shape[0] - 1):
1478        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1479    vals, vec = np.linalg.eigh(corr)
1480    lambda_min = np.mean(vals[:-E])
1481    vals[vals < lambda_min] = lambda_min
1482    vals /= np.mean(vals)
1483    return vec @ np.diag(vals) @ vec.T
1484
1485
1486def _covariance_element(obs1, obs2):
1487    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1488
1489    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1490        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1491        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1492        return np.sum(deltas1 * deltas2)
1493
1494    if set(obs1.names).isdisjoint(set(obs2.names)):
1495        return 0.0
1496
1497    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1498        raise Exception('The gamma method has to be applied to both Obs first.')
1499
1500    dvalue = 0.0
1501
1502    for e_name in obs1.mc_names:
1503
1504        if e_name not in obs2.mc_names:
1505            continue
1506
1507        idl_d = {}
1508        for r_name in obs1.e_content[e_name]:
1509            if r_name not in obs2.e_content[e_name]:
1510                continue
1511            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1512
1513        gamma = 0.0
1514
1515        for r_name in obs1.e_content[e_name]:
1516            if r_name not in obs2.e_content[e_name]:
1517                continue
1518            if len(idl_d[r_name]) == 0:
1519                continue
1520            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1521
1522        if gamma == 0.0:
1523            continue
1524
1525        gamma_div = 0.0
1526        for r_name in obs1.e_content[e_name]:
1527            if r_name not in obs2.e_content[e_name]:
1528                continue
1529            if len(idl_d[r_name]) == 0:
1530                continue
1531            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1532        gamma /= gamma_div
1533
1534        dvalue += gamma
1535
1536    for e_name in obs1.cov_names:
1537
1538        if e_name not in obs2.cov_names:
1539            continue
1540
1541        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1542
1543    return dvalue
1544
1545
1546def import_jackknife(jacks, name, idl=None):
1547    """Imports jackknife samples and returns an Obs
1548
1549    Parameters
1550    ----------
1551    jacks : numpy.ndarray
1552        numpy array containing the mean value as zeroth entry and
1553        the N jackknife samples as first to Nth entry.
1554    name : str
1555        name of the ensemble the samples are defined on.
1556    """
1557    length = len(jacks) - 1
1558    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1559    samples = jacks[1:] @ prj
1560    mean = np.mean(samples)
1561    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1562    new_obs._value = jacks[0]
1563    return new_obs
1564
1565
1566def merge_obs(list_of_obs):
1567    """Combine all observables in list_of_obs into one new observable
1568
1569    Parameters
1570    ----------
1571    list_of_obs : list
1572        list of the Obs object to be combined
1573
1574    Notes
1575    -----
1576    It is not possible to combine obs which are based on the same replicum
1577    """
1578    replist = [item for obs in list_of_obs for item in obs.names]
1579    if (len(replist) == len(set(replist))) is False:
1580        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1581    if any([len(o.cov_names) for o in list_of_obs]):
1582        raise Exception('Not possible to merge data that contains covobs!')
1583    new_dict = {}
1584    idl_dict = {}
1585    for o in list_of_obs:
1586        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1587                        for key in set(o.deltas) | set(o.r_values)})
1588        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1589
1590    names = sorted(new_dict.keys())
1591    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1592    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1593    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1594    return o
1595
1596
1597def cov_Obs(means, cov, name, grad=None):
1598    """Create an Obs based on mean(s) and a covariance matrix
1599
1600    Parameters
1601    ----------
1602    mean : list of floats or float
1603        N mean value(s) of the new Obs
1604    cov : list or array
1605        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1606    name : str
1607        identifier for the covariance matrix
1608    grad : list or array
1609        Gradient of the Covobs wrt. the means belonging to cov.
1610    """
1611
1612    def covobs_to_obs(co):
1613        """Make an Obs out of a Covobs
1614
1615        Parameters
1616        ----------
1617        co : Covobs
1618            Covobs to be embedded into the Obs
1619        """
1620        o = Obs([], [], means=[])
1621        o._value = co.value
1622        o.names.append(co.name)
1623        o._covobs[co.name] = co
1624        o._dvalue = np.sqrt(co.errsq())
1625        return o
1626
1627    ol = []
1628    if isinstance(means, (float, int)):
1629        means = [means]
1630
1631    for i in range(len(means)):
1632        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1633    if ol[0].covobs[name].N != len(means):
1634        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1635    if len(ol) == 1:
1636        return ol[0]
1637    return ol
class Obs:
 20class Obs:
 21    """Class for a general observable.
 22
 23    Instances of Obs are the basic objects of a pyerrors error analysis.
 24    They are initialized with a list which contains arrays of samples for
 25    different ensembles/replica and another list of same length which contains
 26    the names of the ensembles/replica. Mathematical operations can be
 27    performed on instances. The result is another instance of Obs. The error of
 28    an instance can be computed with the gamma_method. Also contains additional
 29    methods for output and visualization of the error calculation.
 30
 31    Attributes
 32    ----------
 33    S_global : float
 34        Standard value for S (default 2.0)
 35    S_dict : dict
 36        Dictionary for S values. If an entry for a given ensemble
 37        exists this overwrites the standard value for that ensemble.
 38    tau_exp_global : float
 39        Standard value for tau_exp (default 0.0)
 40    tau_exp_dict : dict
 41        Dictionary for tau_exp values. If an entry for a given ensemble exists
 42        this overwrites the standard value for that ensemble.
 43    N_sigma_global : float
 44        Standard value for N_sigma (default 1.0)
 45    N_sigma_dict : dict
 46        Dictionary for N_sigma values. If an entry for a given ensemble exists
 47        this overwrites the standard value for that ensemble.
 48    """
 49    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
 50                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
 51                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
 52                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
 53                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
 54
 55    S_global = 2.0
 56    S_dict = {}
 57    tau_exp_global = 0.0
 58    tau_exp_dict = {}
 59    N_sigma_global = 1.0
 60    N_sigma_dict = {}
 61
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.is_merged = {}
102        self.idl = {}
103        if idl is not None:
104            for name, idx in sorted(zip(names, idl)):
105                if isinstance(idx, range):
106                    self.idl[name] = idx
107                elif isinstance(idx, (list, np.ndarray)):
108                    dc = np.unique(np.diff(idx))
109                    if np.any(dc < 0):
110                        raise Exception("Unsorted idx for idl[%s]" % (name))
111                    if len(dc) == 1:
112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
113                    else:
114                        self.idl[name] = list(idx)
115                else:
116                    raise Exception('incompatible type for idl[%s].' % (name))
117        else:
118            for name, sample in sorted(zip(names, samples)):
119                self.idl[name] = range(1, len(sample) + 1)
120
121        if kwargs.get("means") is not None:
122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                self.r_values[name] = mean
126                self.deltas[name] = sample
127        else:
128            for name, sample in sorted(zip(names, samples)):
129                self.shape[name] = len(self.idl[name])
130                self.N += self.shape[name]
131                if len(sample) != self.shape[name]:
132                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
133                self.r_values[name] = np.mean(sample)
134                self.deltas[name] = sample - self.r_values[name]
135                self._value += self.shape[name] * self.r_values[name]
136            self._value /= self.N
137
138        self._dvalue = 0.0
139        self.ddvalue = 0.0
140        self.reweighted = False
141
142        self.tag = None
143
144    @property
145    def value(self):
146        return self._value
147
148    @property
149    def dvalue(self):
150        return self._dvalue
151
152    @property
153    def e_names(self):
154        return sorted(set([o.split('|')[0] for o in self.names]))
155
156    @property
157    def cov_names(self):
158        return sorted(set([o for o in self.covobs.keys()]))
159
160    @property
161    def mc_names(self):
162        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
163
164    @property
165    def e_content(self):
166        res = {}
167        for e, e_name in enumerate(self.e_names):
168            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
169            if e_name in self.names:
170                res[e_name].append(e_name)
171        return res
172
173    @property
174    def covobs(self):
175        return self._covobs
176
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]))
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            gaps = []
274            for r_name in e_content[e_name]:
275                if isinstance(self.idl[r_name], range):
276                    gaps.append(1)
277                else:
278                    gaps.append(np.min(np.diff(self.idl[r_name])))
279
280            if not np.all([gi == gaps[0] for gi in gaps]):
281                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
282            else:
283                gapsize = gaps[0]
284
285            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
286            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
287            # Make sure no entry of tauint is smaller than 0.5
288            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
289            # hep-lat/0306017 eq. (42)
290            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
291            self.e_n_dtauint[e_name][0] = 0.0
292
293            def _compute_drho(i):
294                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
295                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
296
297            _compute_drho(gapsize)
298            if self.tau_exp[e_name] > 0:
299                texp = self.tau_exp[e_name]
300                # Critical slowing down analysis
301                if w_max // 2 <= 1:
302                    raise Exception("Need at least 8 samples for tau_exp error analysis")
303                for n in range(gapsize, w_max // 2, gapsize):
304                    _compute_drho(n + gapsize)
305                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
306                        # Bias correction hep-lat/0306017 eq. (49) included
307                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
308                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
309                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
310                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
311                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
312                        self.e_windowsize[e_name] = n
313                        break
314            else:
315                if self.S[e_name] == 0.0:
316                    self.e_tauint[e_name] = 0.5
317                    self.e_dtauint[e_name] = 0.0
318                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
319                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
320                    self.e_windowsize[e_name] = 0
321                else:
322                    # Standard automatic windowing procedure
323                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
324                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
325                    for n in range(1, w_max):
326                        if n < w_max // 2 - 2:
327                            _compute_drho(gapsize * n + gapsize)
328                        if g_w[n - 1] < 0 or n >= w_max - 1:
329                            n *= gapsize
330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
334                            self.e_windowsize[e_name] = n
335                            break
336
337            self._dvalue += self.e_dvalue[e_name] ** 2
338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
339
340        for e_name in self.cov_names:
341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
342            self.e_ddvalue[e_name] = 0
343            self._dvalue += self.e_dvalue[e_name]**2
344
345        self._dvalue = np.sqrt(self._dvalue)
346        if self._dvalue == 0.0:
347            self.ddvalue = 0.0
348        else:
349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
350        return
351
352    gm = gamma_method
353
354    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
355        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
356           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
357
358        Parameters
359        ----------
360        deltas : list
361            List of fluctuations
362        idx : list
363            List or range of configurations on which the deltas are defined.
364        shape : int
365            Number of configurations in idx.
366        w_max : int
367            Upper bound for the summation window.
368        fft : bool
369            determines whether the fft algorithm is used for the computation
370            of the autocorrelation function.
371        """
372        gamma = np.zeros(w_max)
373        deltas = _expand_deltas(deltas, idx, shape)
374        new_shape = len(deltas)
375        if fft:
376            max_gamma = min(new_shape, w_max)
377            # The padding for the fft has to be even
378            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
379            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
380        else:
381            for n in range(w_max):
382                if new_shape - n >= 0:
383                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
384
385        return gamma
386
387    def details(self, ens_content=True):
388        """Output detailed properties of the Obs.
389
390        Parameters
391        ----------
392        ens_content : bool
393            print details about the ensembles and replica if true.
394        """
395        if self.tag is not None:
396            print("Description:", self.tag)
397        if not hasattr(self, 'e_dvalue'):
398            print('Result\t %3.8e' % (self.value))
399        else:
400            if self.value == 0.0:
401                percentage = np.nan
402            else:
403                percentage = np.abs(self._dvalue / self.value) * 100
404            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
405            if len(self.e_names) > 1:
406                print(' Ensemble errors:')
407            e_content = self.e_content
408            for e_name in self.mc_names:
409                if isinstance(self.idl[e_content[e_name][0]], range):
410                    gap = self.idl[e_content[e_name][0]].step
411                else:
412                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
413
414                if len(self.e_names) > 1:
415                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
416                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
417                tau_string += f" in units of {gap} config"
418                if gap > 1:
419                    tau_string += "s"
420                if self.tau_exp[e_name] > 0:
421                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
422                else:
423                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
424                print(tau_string)
425            for e_name in self.cov_names:
426                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
427        if ens_content is True:
428            if len(self.e_names) == 1:
429                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
430            else:
431                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
432            my_string_list = []
433            for key, value in sorted(self.e_content.items()):
434                if key not in self.covobs:
435                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
436                    if len(value) == 1:
437                        my_string += f': {self.shape[value[0]]} configurations'
438                        if isinstance(self.idl[value[0]], range):
439                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
440                        else:
441                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
442                    else:
443                        sublist = []
444                        for v in value:
445                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
446                            my_substring += f': {self.shape[v]} configurations'
447                            if isinstance(self.idl[v], range):
448                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
449                            else:
450                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
451                            sublist.append(my_substring)
452
453                        my_string += '\n' + '\n'.join(sublist)
454                else:
455                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
456                my_string_list.append(my_string)
457            print('\n'.join(my_string_list))
458
459    def reweight(self, weight):
460        """Reweight the obs with given rewighting factors.
461
462        Parameters
463        ----------
464        weight : Obs
465            Reweighting factor. An Observable that has to be defined on a superset of the
466            configurations in obs[i].idl for all i.
467        all_configs : bool
468            if True, the reweighted observables are normalized by the average of
469            the reweighting factor on all configurations in weight.idl and not
470            on the configurations in obs[i].idl. Default False.
471        """
472        return reweight(weight, [self])[0]
473
474    def is_zero_within_error(self, sigma=1):
475        """Checks whether the observable is zero within 'sigma' standard errors.
476
477        Parameters
478        ----------
479        sigma : int
480            Number of standard errors used for the check.
481
482        Works only properly when the gamma method was run.
483        """
484        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
485
486    def is_zero(self, atol=1e-10):
487        """Checks whether the observable is zero within a given tolerance.
488
489        Parameters
490        ----------
491        atol : float
492            Absolute tolerance (for details see numpy documentation).
493        """
494        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
495
496    def plot_tauint(self, save=None):
497        """Plot integrated autocorrelation time for each ensemble.
498
499        Parameters
500        ----------
501        save : str
502            saves the figure to a file named 'save' if.
503        """
504        if not hasattr(self, 'e_dvalue'):
505            raise Exception('Run the gamma method first.')
506
507        for e, e_name in enumerate(self.mc_names):
508            fig = plt.figure()
509            plt.xlabel(r'$W$')
510            plt.ylabel(r'$\tau_\mathrm{int}$')
511            length = int(len(self.e_n_tauint[e_name]))
512            if self.tau_exp[e_name] > 0:
513                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
514                x_help = np.arange(2 * self.tau_exp[e_name])
515                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
516                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
517                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
518                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
519                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
520                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
521                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
522            else:
523                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
524                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
525
526            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
527            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
528            plt.legend()
529            plt.xlim(-0.5, xmax)
530            ylim = plt.ylim()
531            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
532            plt.draw()
533            if save:
534                fig.savefig(save + "_" + str(e))
535
536    def plot_rho(self, save=None):
537        """Plot normalized autocorrelation function time for each ensemble.
538
539        Parameters
540        ----------
541        save : str
542            saves the figure to a file named 'save' if.
543        """
544        if not hasattr(self, 'e_dvalue'):
545            raise Exception('Run the gamma method first.')
546        for e, e_name in enumerate(self.mc_names):
547            fig = plt.figure()
548            plt.xlabel('W')
549            plt.ylabel('rho')
550            length = int(len(self.e_drho[e_name]))
551            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
552            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
553            if self.tau_exp[e_name] > 0:
554                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
555                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
556                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
557                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
558            else:
559                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
560                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
561            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
562            plt.xlim(-0.5, xmax)
563            plt.draw()
564            if save:
565                fig.savefig(save + "_" + str(e))
566
567    def plot_rep_dist(self):
568        """Plot replica distribution for each ensemble with more than one replicum."""
569        if not hasattr(self, 'e_dvalue'):
570            raise Exception('Run the gamma method first.')
571        for e, e_name in enumerate(self.mc_names):
572            if len(self.e_content[e_name]) == 1:
573                print('No replica distribution for a single replicum (', e_name, ')')
574                continue
575            r_length = []
576            sub_r_mean = 0
577            for r, r_name in enumerate(self.e_content[e_name]):
578                r_length.append(len(self.deltas[r_name]))
579                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
580            e_N = np.sum(r_length)
581            sub_r_mean /= e_N
582            arr = np.zeros(len(self.e_content[e_name]))
583            for r, r_name in enumerate(self.e_content[e_name]):
584                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
585            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
586            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
587            plt.draw()
588
589    def plot_history(self, expand=True):
590        """Plot derived Monte Carlo history for each ensemble
591
592        Parameters
593        ----------
594        expand : bool
595            show expanded history for irregular Monte Carlo chains (default: True).
596        """
597        for e, e_name in enumerate(self.mc_names):
598            plt.figure()
599            r_length = []
600            tmp = []
601            tmp_expanded = []
602            for r, r_name in enumerate(self.e_content[e_name]):
603                tmp.append(self.deltas[r_name] + self.r_values[r_name])
604                if expand:
605                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
606                    r_length.append(len(tmp_expanded[-1]))
607                else:
608                    r_length.append(len(tmp[-1]))
609            e_N = np.sum(r_length)
610            x = np.arange(e_N)
611            y_test = np.concatenate(tmp, axis=0)
612            if expand:
613                y = np.concatenate(tmp_expanded, axis=0)
614            else:
615                y = y_test
616            plt.errorbar(x, y, fmt='.', markersize=3)
617            plt.xlim(-0.5, e_N - 0.5)
618            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
619            plt.draw()
620
621    def plot_piechart(self, save=None):
622        """Plot piechart which shows the fractional contribution of each
623        ensemble to the error and returns a dictionary containing the fractions.
624
625        Parameters
626        ----------
627        save : str
628            saves the figure to a file named 'save' if.
629        """
630        if not hasattr(self, 'e_dvalue'):
631            raise Exception('Run the gamma method first.')
632        if np.isclose(0.0, self._dvalue, atol=1e-15):
633            raise Exception('Error is 0.0')
634        labels = self.e_names
635        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
636        fig1, ax1 = plt.subplots()
637        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
638        ax1.axis('equal')
639        plt.draw()
640        if save:
641            fig1.savefig(save)
642
643        return dict(zip(self.e_names, sizes))
644
645    def dump(self, filename, datatype="json.gz", description="", **kwargs):
646        """Dump the Obs to a file 'name' of chosen format.
647
648        Parameters
649        ----------
650        filename : str
651            name of the file to be saved.
652        datatype : str
653            Format of the exported file. Supported formats include
654            "json.gz" and "pickle"
655        description : str
656            Description for output file, only relevant for json.gz format.
657        path : str
658            specifies a custom path for the file (default '.')
659        """
660        if 'path' in kwargs:
661            file_name = kwargs.get('path') + '/' + filename
662        else:
663            file_name = filename
664
665        if datatype == "json.gz":
666            from .input.json import dump_to_json
667            dump_to_json([self], file_name, description=description)
668        elif datatype == "pickle":
669            with open(file_name + '.p', 'wb') as fb:
670                pickle.dump(self, fb)
671        else:
672            raise Exception("Unknown datatype " + str(datatype))
673
674    def export_jackknife(self):
675        """Export jackknife samples from the Obs
676
677        Returns
678        -------
679        numpy.ndarray
680            Returns a numpy array of length N + 1 where N is the number of samples
681            for the given ensemble and replicum. The zeroth entry of the array contains
682            the mean value of the Obs, entries 1 to N contain the N jackknife samples
683            derived from the Obs. The current implementation only works for observables
684            defined on exactly one ensemble and replicum. The derived jackknife samples
685            should agree with samples from a full jackknife analysis up to O(1/N).
686        """
687
688        if len(self.names) != 1:
689            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
690
691        name = self.names[0]
692        full_data = self.deltas[name] + self.r_values[name]
693        n = full_data.size
694        mean = self.value
695        tmp_jacks = np.zeros(n + 1)
696        tmp_jacks[0] = mean
697        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
698        return tmp_jacks
699
700    def __float__(self):
701        return float(self.value)
702
703    def __repr__(self):
704        return 'Obs[' + str(self) + ']'
705
706    def __str__(self):
707        return _format_uncertainty(self.value, self._dvalue)
708
709    def __hash__(self):
710        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
711        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
712        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
713        hash_tuple += tuple([o.encode() for o in self.names])
714        m = hashlib.md5()
715        [m.update(o) for o in hash_tuple]
716        return int(m.hexdigest(), 16) & 0xFFFFFFFF
717
718    # Overload comparisons
719    def __lt__(self, other):
720        return self.value < other
721
722    def __le__(self, other):
723        return self.value <= other
724
725    def __gt__(self, other):
726        return self.value > other
727
728    def __ge__(self, other):
729        return self.value >= other
730
731    def __eq__(self, other):
732        return (self - other).is_zero()
733
734    def __ne__(self, other):
735        return not (self - other).is_zero()
736
737    # Overload math operations
738    def __add__(self, y):
739        if isinstance(y, Obs):
740            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
741        else:
742            if isinstance(y, np.ndarray):
743                return np.array([self + o for o in y])
744            elif y.__class__.__name__ in ['Corr', 'CObs']:
745                return NotImplemented
746            else:
747                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
748
749    def __radd__(self, y):
750        return self + y
751
752    def __mul__(self, y):
753        if isinstance(y, Obs):
754            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
755        else:
756            if isinstance(y, np.ndarray):
757                return np.array([self * o for o in y])
758            elif isinstance(y, complex):
759                return CObs(self * y.real, self * y.imag)
760            elif y.__class__.__name__ in ['Corr', 'CObs']:
761                return NotImplemented
762            else:
763                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
764
765    def __rmul__(self, y):
766        return self * y
767
768    def __sub__(self, y):
769        if isinstance(y, Obs):
770            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
771        else:
772            if isinstance(y, np.ndarray):
773                return np.array([self - o for o in y])
774            elif y.__class__.__name__ in ['Corr', 'CObs']:
775                return NotImplemented
776            else:
777                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
778
779    def __rsub__(self, y):
780        return -1 * (self - y)
781
782    def __pos__(self):
783        return self
784
785    def __neg__(self):
786        return -1 * self
787
788    def __truediv__(self, y):
789        if isinstance(y, Obs):
790            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
791        else:
792            if isinstance(y, np.ndarray):
793                return np.array([self / o for o in y])
794            elif y.__class__.__name__ in ['Corr', 'CObs']:
795                return NotImplemented
796            else:
797                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
798
799    def __rtruediv__(self, y):
800        if isinstance(y, Obs):
801            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
802        else:
803            if isinstance(y, np.ndarray):
804                return np.array([o / self for o in y])
805            elif y.__class__.__name__ in ['Corr', 'CObs']:
806                return NotImplemented
807            else:
808                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
809
810    def __pow__(self, y):
811        if isinstance(y, Obs):
812            return derived_observable(lambda x: x[0] ** x[1], [self, y])
813        else:
814            return derived_observable(lambda x: x[0] ** y, [self])
815
816    def __rpow__(self, y):
817        if isinstance(y, Obs):
818            return derived_observable(lambda x: x[0] ** x[1], [y, self])
819        else:
820            return derived_observable(lambda x: y ** x[0], [self])
821
822    def __abs__(self):
823        return derived_observable(lambda x: anp.abs(x[0]), [self])
824
825    # Overload numpy functions
826    def sqrt(self):
827        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
828
829    def log(self):
830        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
831
832    def exp(self):
833        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
834
835    def sin(self):
836        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
837
838    def cos(self):
839        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
840
841    def tan(self):
842        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
843
844    def arcsin(self):
845        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
846
847    def arccos(self):
848        return derived_observable(lambda x: anp.arccos(x[0]), [self])
849
850    def arctan(self):
851        return derived_observable(lambda x: anp.arctan(x[0]), [self])
852
853    def sinh(self):
854        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
855
856    def cosh(self):
857        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
858
859    def tanh(self):
860        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
861
862    def arcsinh(self):
863        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
864
865    def arccosh(self):
866        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
867
868    def arctanh(self):
869        return derived_observable(lambda x: anp.arctanh(x[0]), [self])

Class for a general observable.

Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.

Attributes
  • S_global (float): Standard value for S (default 2.0)
  • S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • tau_exp_global (float): Standard value for tau_exp (default 0.0)
  • tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
  • N_sigma_global (float): Standard value for N_sigma (default 1.0)
  • N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
Obs(samples, names, idl=None, **kwargs)
 62    def __init__(self, samples, names, idl=None, **kwargs):
 63        """ Initialize Obs object.
 64
 65        Parameters
 66        ----------
 67        samples : list
 68            list of numpy arrays containing the Monte Carlo samples
 69        names : list
 70            list of strings labeling the individual samples
 71        idl : list, optional
 72            list of ranges or lists on which the samples are defined
 73        """
 74
 75        if kwargs.get("means") is None and len(samples):
 76            if len(samples) != len(names):
 77                raise Exception('Length of samples and names incompatible.')
 78            if idl is not None:
 79                if len(idl) != len(names):
 80                    raise Exception('Length of idl incompatible with samples and names.')
 81            name_length = len(names)
 82            if name_length > 1:
 83                if name_length != len(set(names)):
 84                    raise Exception('names are not unique.')
 85                if not all(isinstance(x, str) for x in names):
 86                    raise TypeError('All names have to be strings.')
 87            else:
 88                if not isinstance(names[0], str):
 89                    raise TypeError('All names have to be strings.')
 90            if min(len(x) for x in samples) <= 4:
 91                raise Exception('Samples have to have at least 5 entries.')
 92
 93        self.names = sorted(names)
 94        self.shape = {}
 95        self.r_values = {}
 96        self.deltas = {}
 97        self._covobs = {}
 98
 99        self._value = 0
100        self.N = 0
101        self.is_merged = {}
102        self.idl = {}
103        if idl is not None:
104            for name, idx in sorted(zip(names, idl)):
105                if isinstance(idx, range):
106                    self.idl[name] = idx
107                elif isinstance(idx, (list, np.ndarray)):
108                    dc = np.unique(np.diff(idx))
109                    if np.any(dc < 0):
110                        raise Exception("Unsorted idx for idl[%s]" % (name))
111                    if len(dc) == 1:
112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
113                    else:
114                        self.idl[name] = list(idx)
115                else:
116                    raise Exception('incompatible type for idl[%s].' % (name))
117        else:
118            for name, sample in sorted(zip(names, samples)):
119                self.idl[name] = range(1, len(sample) + 1)
120
121        if kwargs.get("means") is not None:
122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
123                self.shape[name] = len(self.idl[name])
124                self.N += self.shape[name]
125                self.r_values[name] = mean
126                self.deltas[name] = sample
127        else:
128            for name, sample in sorted(zip(names, samples)):
129                self.shape[name] = len(self.idl[name])
130                self.N += self.shape[name]
131                if len(sample) != self.shape[name]:
132                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
133                self.r_values[name] = np.mean(sample)
134                self.deltas[name] = sample - self.r_values[name]
135                self._value += self.shape[name] * self.r_values[name]
136            self._value /= self.N
137
138        self._dvalue = 0.0
139        self.ddvalue = 0.0
140        self.reweighted = False
141
142        self.tag = None

Initialize Obs object.

Parameters
  • samples (list): list of numpy arrays containing the Monte Carlo samples
  • names (list): list of strings labeling the individual samples
  • idl (list, optional): list of ranges or lists on which the samples are defined
def gamma_method(self, **kwargs):
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]))
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            gaps = []
274            for r_name in e_content[e_name]:
275                if isinstance(self.idl[r_name], range):
276                    gaps.append(1)
277                else:
278                    gaps.append(np.min(np.diff(self.idl[r_name])))
279
280            if not np.all([gi == gaps[0] for gi in gaps]):
281                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
282            else:
283                gapsize = gaps[0]
284
285            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
286            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
287            # Make sure no entry of tauint is smaller than 0.5
288            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
289            # hep-lat/0306017 eq. (42)
290            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
291            self.e_n_dtauint[e_name][0] = 0.0
292
293            def _compute_drho(i):
294                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
295                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
296
297            _compute_drho(gapsize)
298            if self.tau_exp[e_name] > 0:
299                texp = self.tau_exp[e_name]
300                # Critical slowing down analysis
301                if w_max // 2 <= 1:
302                    raise Exception("Need at least 8 samples for tau_exp error analysis")
303                for n in range(gapsize, w_max // 2, gapsize):
304                    _compute_drho(n + gapsize)
305                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
306                        # Bias correction hep-lat/0306017 eq. (49) included
307                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
308                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
309                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
310                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
311                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
312                        self.e_windowsize[e_name] = n
313                        break
314            else:
315                if self.S[e_name] == 0.0:
316                    self.e_tauint[e_name] = 0.5
317                    self.e_dtauint[e_name] = 0.0
318                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
319                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
320                    self.e_windowsize[e_name] = 0
321                else:
322                    # Standard automatic windowing procedure
323                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
324                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
325                    for n in range(1, w_max):
326                        if n < w_max // 2 - 2:
327                            _compute_drho(gapsize * n + gapsize)
328                        if g_w[n - 1] < 0 or n >= w_max - 1:
329                            n *= gapsize
330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
334                            self.e_windowsize[e_name] = n
335                            break
336
337            self._dvalue += self.e_dvalue[e_name] ** 2
338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
339
340        for e_name in self.cov_names:
341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
342            self.e_ddvalue[e_name] = 0
343            self._dvalue += self.e_dvalue[e_name]**2
344
345        self._dvalue = np.sqrt(self._dvalue)
346        if self._dvalue == 0.0:
347            self.ddvalue = 0.0
348        else:
349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
350        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def gm(self, **kwargs):
177    def gamma_method(self, **kwargs):
178        """Estimate the error and related properties of the Obs.
179
180        Parameters
181        ----------
182        S : float
183            specifies a custom value for the parameter S (default 2.0).
184            If set to 0 it is assumed that the data exhibits no
185            autocorrelation. In this case the error estimates coincides
186            with the sample standard error.
187        tau_exp : float
188            positive value triggers the critical slowing down analysis
189            (default 0.0).
190        N_sigma : float
191            number of standard deviations from zero until the tail is
192            attached to the autocorrelation function (default 1).
193        fft : bool
194            determines whether the fft algorithm is used for the computation
195            of the autocorrelation function (default True)
196        """
197
198        e_content = self.e_content
199        self.e_dvalue = {}
200        self.e_ddvalue = {}
201        self.e_tauint = {}
202        self.e_dtauint = {}
203        self.e_windowsize = {}
204        self.e_n_tauint = {}
205        self.e_n_dtauint = {}
206        e_gamma = {}
207        self.e_rho = {}
208        self.e_drho = {}
209        self._dvalue = 0
210        self.ddvalue = 0
211
212        self.S = {}
213        self.tau_exp = {}
214        self.N_sigma = {}
215
216        if kwargs.get('fft') is False:
217            fft = False
218        else:
219            fft = True
220
221        def _parse_kwarg(kwarg_name):
222            if kwarg_name in kwargs:
223                tmp = kwargs.get(kwarg_name)
224                if isinstance(tmp, (int, float)):
225                    if tmp < 0:
226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
227                    for e, e_name in enumerate(self.e_names):
228                        getattr(self, kwarg_name)[e_name] = tmp
229                else:
230                    raise TypeError(kwarg_name + ' is not in proper format.')
231            else:
232                for e, e_name in enumerate(self.e_names):
233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
235                    else:
236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
237
238        _parse_kwarg('S')
239        _parse_kwarg('tau_exp')
240        _parse_kwarg('N_sigma')
241
242        for e, e_name in enumerate(self.mc_names):
243            r_length = []
244            for r_name in e_content[e_name]:
245                if isinstance(self.idl[r_name], range):
246                    r_length.append(len(self.idl[r_name]))
247                else:
248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
249
250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
251            w_max = max(r_length) // 2
252            e_gamma[e_name] = np.zeros(w_max)
253            self.e_rho[e_name] = np.zeros(w_max)
254            self.e_drho[e_name] = np.zeros(w_max)
255
256            for r_name in e_content[e_name]:
257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
258
259            gamma_div = np.zeros(w_max)
260            for r_name in e_content[e_name]:
261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
262            gamma_div[gamma_div < 1] = 1.0
263            e_gamma[e_name] /= gamma_div[:w_max]
264
265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
266                self.e_tauint[e_name] = 0.5
267                self.e_dtauint[e_name] = 0.0
268                self.e_dvalue[e_name] = 0.0
269                self.e_ddvalue[e_name] = 0.0
270                self.e_windowsize[e_name] = 0
271                continue
272
273            gaps = []
274            for r_name in e_content[e_name]:
275                if isinstance(self.idl[r_name], range):
276                    gaps.append(1)
277                else:
278                    gaps.append(np.min(np.diff(self.idl[r_name])))
279
280            if not np.all([gi == gaps[0] for gi in gaps]):
281                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
282            else:
283                gapsize = gaps[0]
284
285            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
286            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
287            # Make sure no entry of tauint is smaller than 0.5
288            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
289            # hep-lat/0306017 eq. (42)
290            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
291            self.e_n_dtauint[e_name][0] = 0.0
292
293            def _compute_drho(i):
294                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
295                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
296
297            _compute_drho(gapsize)
298            if self.tau_exp[e_name] > 0:
299                texp = self.tau_exp[e_name]
300                # Critical slowing down analysis
301                if w_max // 2 <= 1:
302                    raise Exception("Need at least 8 samples for tau_exp error analysis")
303                for n in range(gapsize, w_max // 2, gapsize):
304                    _compute_drho(n + gapsize)
305                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
306                        # Bias correction hep-lat/0306017 eq. (49) included
307                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
308                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
309                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
310                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
311                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
312                        self.e_windowsize[e_name] = n
313                        break
314            else:
315                if self.S[e_name] == 0.0:
316                    self.e_tauint[e_name] = 0.5
317                    self.e_dtauint[e_name] = 0.0
318                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
319                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
320                    self.e_windowsize[e_name] = 0
321                else:
322                    # Standard automatic windowing procedure
323                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
324                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
325                    for n in range(1, w_max):
326                        if n < w_max // 2 - 2:
327                            _compute_drho(gapsize * n + gapsize)
328                        if g_w[n - 1] < 0 or n >= w_max - 1:
329                            n *= gapsize
330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
334                            self.e_windowsize[e_name] = n
335                            break
336
337            self._dvalue += self.e_dvalue[e_name] ** 2
338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
339
340        for e_name in self.cov_names:
341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
342            self.e_ddvalue[e_name] = 0
343            self._dvalue += self.e_dvalue[e_name]**2
344
345        self._dvalue = np.sqrt(self._dvalue)
346        if self._dvalue == 0.0:
347            self.ddvalue = 0.0
348        else:
349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
350        return

Estimate the error and related properties of the Obs.

Parameters
  • S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
  • tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
  • N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
  • fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
def details(self, ens_content=True):
387    def details(self, ens_content=True):
388        """Output detailed properties of the Obs.
389
390        Parameters
391        ----------
392        ens_content : bool
393            print details about the ensembles and replica if true.
394        """
395        if self.tag is not None:
396            print("Description:", self.tag)
397        if not hasattr(self, 'e_dvalue'):
398            print('Result\t %3.8e' % (self.value))
399        else:
400            if self.value == 0.0:
401                percentage = np.nan
402            else:
403                percentage = np.abs(self._dvalue / self.value) * 100
404            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
405            if len(self.e_names) > 1:
406                print(' Ensemble errors:')
407            e_content = self.e_content
408            for e_name in self.mc_names:
409                if isinstance(self.idl[e_content[e_name][0]], range):
410                    gap = self.idl[e_content[e_name][0]].step
411                else:
412                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
413
414                if len(self.e_names) > 1:
415                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
416                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
417                tau_string += f" in units of {gap} config"
418                if gap > 1:
419                    tau_string += "s"
420                if self.tau_exp[e_name] > 0:
421                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
422                else:
423                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
424                print(tau_string)
425            for e_name in self.cov_names:
426                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
427        if ens_content is True:
428            if len(self.e_names) == 1:
429                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
430            else:
431                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
432            my_string_list = []
433            for key, value in sorted(self.e_content.items()):
434                if key not in self.covobs:
435                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
436                    if len(value) == 1:
437                        my_string += f': {self.shape[value[0]]} configurations'
438                        if isinstance(self.idl[value[0]], range):
439                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
440                        else:
441                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
442                    else:
443                        sublist = []
444                        for v in value:
445                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
446                            my_substring += f': {self.shape[v]} configurations'
447                            if isinstance(self.idl[v], range):
448                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
449                            else:
450                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
451                            sublist.append(my_substring)
452
453                        my_string += '\n' + '\n'.join(sublist)
454                else:
455                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
456                my_string_list.append(my_string)
457            print('\n'.join(my_string_list))

Output detailed properties of the Obs.

Parameters
  • ens_content (bool): print details about the ensembles and replica if true.
def reweight(self, weight):
459    def reweight(self, weight):
460        """Reweight the obs with given rewighting factors.
461
462        Parameters
463        ----------
464        weight : Obs
465            Reweighting factor. An Observable that has to be defined on a superset of the
466            configurations in obs[i].idl for all i.
467        all_configs : bool
468            if True, the reweighted observables are normalized by the average of
469            the reweighting factor on all configurations in weight.idl and not
470            on the configurations in obs[i].idl. Default False.
471        """
472        return reweight(weight, [self])[0]

Reweight the obs with given rewighting factors.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def is_zero_within_error(self, sigma=1):
474    def is_zero_within_error(self, sigma=1):
475        """Checks whether the observable is zero within 'sigma' standard errors.
476
477        Parameters
478        ----------
479        sigma : int
480            Number of standard errors used for the check.
481
482        Works only properly when the gamma method was run.
483        """
484        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue

Checks whether the observable is zero within 'sigma' standard errors.

Parameters
  • sigma (int): Number of standard errors used for the check.
  • Works only properly when the gamma method was run.
def is_zero(self, atol=1e-10):
486    def is_zero(self, atol=1e-10):
487        """Checks whether the observable is zero within a given tolerance.
488
489        Parameters
490        ----------
491        atol : float
492            Absolute tolerance (for details see numpy documentation).
493        """
494        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())

Checks whether the observable is zero within a given tolerance.

Parameters
  • atol (float): Absolute tolerance (for details see numpy documentation).
def plot_tauint(self, save=None):
496    def plot_tauint(self, save=None):
497        """Plot integrated autocorrelation time for each ensemble.
498
499        Parameters
500        ----------
501        save : str
502            saves the figure to a file named 'save' if.
503        """
504        if not hasattr(self, 'e_dvalue'):
505            raise Exception('Run the gamma method first.')
506
507        for e, e_name in enumerate(self.mc_names):
508            fig = plt.figure()
509            plt.xlabel(r'$W$')
510            plt.ylabel(r'$\tau_\mathrm{int}$')
511            length = int(len(self.e_n_tauint[e_name]))
512            if self.tau_exp[e_name] > 0:
513                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
514                x_help = np.arange(2 * self.tau_exp[e_name])
515                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
516                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
517                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
518                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
519                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
520                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
521                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
522            else:
523                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
524                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
525
526            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
527            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
528            plt.legend()
529            plt.xlim(-0.5, xmax)
530            ylim = plt.ylim()
531            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
532            plt.draw()
533            if save:
534                fig.savefig(save + "_" + str(e))

Plot integrated autocorrelation time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rho(self, save=None):
536    def plot_rho(self, save=None):
537        """Plot normalized autocorrelation function time for each ensemble.
538
539        Parameters
540        ----------
541        save : str
542            saves the figure to a file named 'save' if.
543        """
544        if not hasattr(self, 'e_dvalue'):
545            raise Exception('Run the gamma method first.')
546        for e, e_name in enumerate(self.mc_names):
547            fig = plt.figure()
548            plt.xlabel('W')
549            plt.ylabel('rho')
550            length = int(len(self.e_drho[e_name]))
551            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
552            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
553            if self.tau_exp[e_name] > 0:
554                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
555                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
556                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
557                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
558            else:
559                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
560                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
561            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
562            plt.xlim(-0.5, xmax)
563            plt.draw()
564            if save:
565                fig.savefig(save + "_" + str(e))

Plot normalized autocorrelation function time for each ensemble.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def plot_rep_dist(self):
567    def plot_rep_dist(self):
568        """Plot replica distribution for each ensemble with more than one replicum."""
569        if not hasattr(self, 'e_dvalue'):
570            raise Exception('Run the gamma method first.')
571        for e, e_name in enumerate(self.mc_names):
572            if len(self.e_content[e_name]) == 1:
573                print('No replica distribution for a single replicum (', e_name, ')')
574                continue
575            r_length = []
576            sub_r_mean = 0
577            for r, r_name in enumerate(self.e_content[e_name]):
578                r_length.append(len(self.deltas[r_name]))
579                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
580            e_N = np.sum(r_length)
581            sub_r_mean /= e_N
582            arr = np.zeros(len(self.e_content[e_name]))
583            for r, r_name in enumerate(self.e_content[e_name]):
584                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
585            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
586            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
587            plt.draw()

Plot replica distribution for each ensemble with more than one replicum.

def plot_history(self, expand=True):
589    def plot_history(self, expand=True):
590        """Plot derived Monte Carlo history for each ensemble
591
592        Parameters
593        ----------
594        expand : bool
595            show expanded history for irregular Monte Carlo chains (default: True).
596        """
597        for e, e_name in enumerate(self.mc_names):
598            plt.figure()
599            r_length = []
600            tmp = []
601            tmp_expanded = []
602            for r, r_name in enumerate(self.e_content[e_name]):
603                tmp.append(self.deltas[r_name] + self.r_values[r_name])
604                if expand:
605                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
606                    r_length.append(len(tmp_expanded[-1]))
607                else:
608                    r_length.append(len(tmp[-1]))
609            e_N = np.sum(r_length)
610            x = np.arange(e_N)
611            y_test = np.concatenate(tmp, axis=0)
612            if expand:
613                y = np.concatenate(tmp_expanded, axis=0)
614            else:
615                y = y_test
616            plt.errorbar(x, y, fmt='.', markersize=3)
617            plt.xlim(-0.5, e_N - 0.5)
618            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
619            plt.draw()

Plot derived Monte Carlo history for each ensemble

Parameters
  • expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
def plot_piechart(self, save=None):
621    def plot_piechart(self, save=None):
622        """Plot piechart which shows the fractional contribution of each
623        ensemble to the error and returns a dictionary containing the fractions.
624
625        Parameters
626        ----------
627        save : str
628            saves the figure to a file named 'save' if.
629        """
630        if not hasattr(self, 'e_dvalue'):
631            raise Exception('Run the gamma method first.')
632        if np.isclose(0.0, self._dvalue, atol=1e-15):
633            raise Exception('Error is 0.0')
634        labels = self.e_names
635        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
636        fig1, ax1 = plt.subplots()
637        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
638        ax1.axis('equal')
639        plt.draw()
640        if save:
641            fig1.savefig(save)
642
643        return dict(zip(self.e_names, sizes))

Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.

Parameters
  • save (str): saves the figure to a file named 'save' if.
def dump(self, filename, datatype='json.gz', description='', **kwargs):
645    def dump(self, filename, datatype="json.gz", description="", **kwargs):
646        """Dump the Obs to a file 'name' of chosen format.
647
648        Parameters
649        ----------
650        filename : str
651            name of the file to be saved.
652        datatype : str
653            Format of the exported file. Supported formats include
654            "json.gz" and "pickle"
655        description : str
656            Description for output file, only relevant for json.gz format.
657        path : str
658            specifies a custom path for the file (default '.')
659        """
660        if 'path' in kwargs:
661            file_name = kwargs.get('path') + '/' + filename
662        else:
663            file_name = filename
664
665        if datatype == "json.gz":
666            from .input.json import dump_to_json
667            dump_to_json([self], file_name, description=description)
668        elif datatype == "pickle":
669            with open(file_name + '.p', 'wb') as fb:
670                pickle.dump(self, fb)
671        else:
672            raise Exception("Unknown datatype " + str(datatype))

Dump the Obs to a file 'name' of chosen format.

Parameters
  • filename (str): name of the file to be saved.
  • datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
  • description (str): Description for output file, only relevant for json.gz format.
  • path (str): specifies a custom path for the file (default '.')
def export_jackknife(self):
674    def export_jackknife(self):
675        """Export jackknife samples from the Obs
676
677        Returns
678        -------
679        numpy.ndarray
680            Returns a numpy array of length N + 1 where N is the number of samples
681            for the given ensemble and replicum. The zeroth entry of the array contains
682            the mean value of the Obs, entries 1 to N contain the N jackknife samples
683            derived from the Obs. The current implementation only works for observables
684            defined on exactly one ensemble and replicum. The derived jackknife samples
685            should agree with samples from a full jackknife analysis up to O(1/N).
686        """
687
688        if len(self.names) != 1:
689            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
690
691        name = self.names[0]
692        full_data = self.deltas[name] + self.r_values[name]
693        n = full_data.size
694        mean = self.value
695        tmp_jacks = np.zeros(n + 1)
696        tmp_jacks[0] = mean
697        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
698        return tmp_jacks

Export jackknife samples from the Obs

Returns
  • numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
def sqrt(self):
826    def sqrt(self):
827        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
def log(self):
829    def log(self):
830        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
def exp(self):
832    def exp(self):
833        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
def sin(self):
835    def sin(self):
836        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
def cos(self):
838    def cos(self):
839        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
def tan(self):
841    def tan(self):
842        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
def arcsin(self):
844    def arcsin(self):
845        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
def arccos(self):
847    def arccos(self):
848        return derived_observable(lambda x: anp.arccos(x[0]), [self])
def arctan(self):
850    def arctan(self):
851        return derived_observable(lambda x: anp.arctan(x[0]), [self])
def sinh(self):
853    def sinh(self):
854        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
def cosh(self):
856    def cosh(self):
857        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
def tanh(self):
859    def tanh(self):
860        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
def arcsinh(self):
862    def arcsinh(self):
863        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
def arccosh(self):
865    def arccosh(self):
866        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
def arctanh(self):
868    def arctanh(self):
869        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
class CObs:
872class CObs:
873    """Class for a complex valued observable."""
874    __slots__ = ['_real', '_imag', 'tag']
875
876    def __init__(self, real, imag=0.0):
877        self._real = real
878        self._imag = imag
879        self.tag = None
880
881    @property
882    def real(self):
883        return self._real
884
885    @property
886    def imag(self):
887        return self._imag
888
889    def gamma_method(self, **kwargs):
890        """Executes the gamma_method for the real and the imaginary part."""
891        if isinstance(self.real, Obs):
892            self.real.gamma_method(**kwargs)
893        if isinstance(self.imag, Obs):
894            self.imag.gamma_method(**kwargs)
895
896    def is_zero(self):
897        """Checks whether both real and imaginary part are zero within machine precision."""
898        return self.real == 0.0 and self.imag == 0.0
899
900    def conjugate(self):
901        return CObs(self.real, -self.imag)
902
903    def __add__(self, other):
904        if isinstance(other, np.ndarray):
905            return other + self
906        elif hasattr(other, 'real') and hasattr(other, 'imag'):
907            return CObs(self.real + other.real,
908                        self.imag + other.imag)
909        else:
910            return CObs(self.real + other, self.imag)
911
912    def __radd__(self, y):
913        return self + y
914
915    def __sub__(self, other):
916        if isinstance(other, np.ndarray):
917            return -1 * (other - self)
918        elif hasattr(other, 'real') and hasattr(other, 'imag'):
919            return CObs(self.real - other.real, self.imag - other.imag)
920        else:
921            return CObs(self.real - other, self.imag)
922
923    def __rsub__(self, other):
924        return -1 * (self - other)
925
926    def __mul__(self, other):
927        if isinstance(other, np.ndarray):
928            return other * self
929        elif hasattr(other, 'real') and hasattr(other, 'imag'):
930            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
931                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
932                                               [self.real, other.real, self.imag, other.imag],
933                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
934                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
935                                               [self.real, other.real, self.imag, other.imag],
936                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
937            elif getattr(other, 'imag', 0) != 0:
938                return CObs(self.real * other.real - self.imag * other.imag,
939                            self.imag * other.real + self.real * other.imag)
940            else:
941                return CObs(self.real * other.real, self.imag * other.real)
942        else:
943            return CObs(self.real * other, self.imag * other)
944
945    def __rmul__(self, other):
946        return self * other
947
948    def __truediv__(self, other):
949        if isinstance(other, np.ndarray):
950            return 1 / (other / self)
951        elif hasattr(other, 'real') and hasattr(other, 'imag'):
952            r = other.real ** 2 + other.imag ** 2
953            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
954        else:
955            return CObs(self.real / other, self.imag / other)
956
957    def __rtruediv__(self, other):
958        r = self.real ** 2 + self.imag ** 2
959        if hasattr(other, 'real') and hasattr(other, 'imag'):
960            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
961        else:
962            return CObs(self.real * other / r, -self.imag * other / r)
963
964    def __abs__(self):
965        return np.sqrt(self.real**2 + self.imag**2)
966
967    def __pos__(self):
968        return self
969
970    def __neg__(self):
971        return -1 * self
972
973    def __eq__(self, other):
974        return self.real == other.real and self.imag == other.imag
975
976    def __str__(self):
977        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
978
979    def __repr__(self):
980        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0)
876    def __init__(self, real, imag=0.0):
877        self._real = real
878        self._imag = imag
879        self.tag = None
def gamma_method(self, **kwargs):
889    def gamma_method(self, **kwargs):
890        """Executes the gamma_method for the real and the imaginary part."""
891        if isinstance(self.real, Obs):
892            self.real.gamma_method(**kwargs)
893        if isinstance(self.imag, Obs):
894            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self):
896    def is_zero(self):
897        """Checks whether both real and imaginary part are zero within machine precision."""
898        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self):
900    def conjugate(self):
901        return CObs(self.real, -self.imag)
def derived_observable(func, data, array_mode=False, **kwargs):
1105def derived_observable(func, data, array_mode=False, **kwargs):
1106    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1107
1108    Parameters
1109    ----------
1110    func : object
1111        arbitrary function of the form func(data, **kwargs). For the
1112        automatic differentiation to work, all numpy functions have to have
1113        the autograd wrapper (use 'import autograd.numpy as anp').
1114    data : list
1115        list of Obs, e.g. [obs1, obs2, obs3].
1116    num_grad : bool
1117        if True, numerical derivatives are used instead of autograd
1118        (default False). To control the numerical differentiation the
1119        kwargs of numdifftools.step_generators.MaxStepGenerator
1120        can be used.
1121    man_grad : list
1122        manually supply a list or an array which contains the jacobian
1123        of func. Use cautiously, supplying the wrong derivative will
1124        not be intercepted.
1125
1126    Notes
1127    -----
1128    For simple mathematical operations it can be practical to use anonymous
1129    functions. For the ratio of two observables one can e.g. use
1130
1131    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1132    """
1133
1134    data = np.asarray(data)
1135    raveled_data = data.ravel()
1136
1137    # Workaround for matrix operations containing non Obs data
1138    if not all(isinstance(x, Obs) for x in raveled_data):
1139        for i in range(len(raveled_data)):
1140            if isinstance(raveled_data[i], (int, float)):
1141                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1142
1143    allcov = {}
1144    for o in raveled_data:
1145        for name in o.cov_names:
1146            if name in allcov:
1147                if not np.allclose(allcov[name], o.covobs[name].cov):
1148                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1149            else:
1150                allcov[name] = o.covobs[name].cov
1151
1152    n_obs = len(raveled_data)
1153    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1154    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1155    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1156
1157    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1158    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1159
1160    if data.ndim == 1:
1161        values = np.array([o.value for o in data])
1162    else:
1163        values = np.vectorize(lambda x: x.value)(data)
1164
1165    new_values = func(values, **kwargs)
1166
1167    multi = int(isinstance(new_values, np.ndarray))
1168
1169    new_r_values = {}
1170    new_idl_d = {}
1171    for name in new_sample_names:
1172        idl = []
1173        tmp_values = np.zeros(n_obs)
1174        for i, item in enumerate(raveled_data):
1175            tmp_values[i] = item.r_values.get(name, item.value)
1176            tmp_idl = item.idl.get(name)
1177            if tmp_idl is not None:
1178                idl.append(tmp_idl)
1179        if multi > 0:
1180            tmp_values = np.array(tmp_values).reshape(data.shape)
1181        new_r_values[name] = func(tmp_values, **kwargs)
1182        new_idl_d[name] = _merge_idx(idl)
1183        if not is_merged[name]:
1184            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1185
1186    if 'man_grad' in kwargs:
1187        deriv = np.asarray(kwargs.get('man_grad'))
1188        if new_values.shape + data.shape != deriv.shape:
1189            raise Exception('Manual derivative does not have correct shape.')
1190    elif kwargs.get('num_grad') is True:
1191        if multi > 0:
1192            raise Exception('Multi mode currently not supported for numerical derivative')
1193        options = {
1194            'base_step': 0.1,
1195            'step_ratio': 2.5}
1196        for key in options.keys():
1197            kwarg = kwargs.get(key)
1198            if kwarg is not None:
1199                options[key] = kwarg
1200        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1201        if tmp_df.size == 1:
1202            deriv = np.array([tmp_df.real])
1203        else:
1204            deriv = tmp_df.real
1205    else:
1206        deriv = jacobian(func)(values, **kwargs)
1207
1208    final_result = np.zeros(new_values.shape, dtype=object)
1209
1210    if array_mode is True:
1211
1212        class _Zero_grad():
1213            def __init__(self, N):
1214                self.grad = np.zeros((N, 1))
1215
1216        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1217        d_extracted = {}
1218        g_extracted = {}
1219        for name in new_sample_names:
1220            d_extracted[name] = []
1221            ens_length = len(new_idl_d[name])
1222            for i_dat, dat in enumerate(data):
1223                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1224        for name in new_cov_names:
1225            g_extracted[name] = []
1226            zero_grad = _Zero_grad(new_covobs_lengths[name])
1227            for i_dat, dat in enumerate(data):
1228                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1229
1230    for i_val, new_val in np.ndenumerate(new_values):
1231        new_deltas = {}
1232        new_grad = {}
1233        if array_mode is True:
1234            for name in new_sample_names:
1235                ens_length = d_extracted[name][0].shape[-1]
1236                new_deltas[name] = np.zeros(ens_length)
1237                for i_dat, dat in enumerate(d_extracted[name]):
1238                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1239            for name in new_cov_names:
1240                new_grad[name] = 0
1241                for i_dat, dat in enumerate(g_extracted[name]):
1242                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1243        else:
1244            for j_obs, obs in np.ndenumerate(data):
1245                for name in obs.names:
1246                    if name in obs.cov_names:
1247                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1248                    else:
1249                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1250
1251        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1252
1253        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1254            raise Exception('The same name has been used for deltas and covobs!')
1255        new_samples = []
1256        new_means = []
1257        new_idl = []
1258        new_names_obs = []
1259        for name in new_names:
1260            if name not in new_covobs:
1261                new_samples.append(new_deltas[name])
1262                new_idl.append(new_idl_d[name])
1263                new_means.append(new_r_values[name][i_val])
1264                new_names_obs.append(name)
1265        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1266        for name in new_covobs:
1267            final_result[i_val].names.append(name)
1268        final_result[i_val]._covobs = new_covobs
1269        final_result[i_val]._value = new_val
1270        final_result[i_val].is_merged = is_merged
1271        final_result[i_val].reweighted = reweighted
1272
1273    if multi == 0:
1274        final_result = final_result.item()
1275
1276    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters
  • func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
  • data (list): list of Obs, e.g. [obs1, obs2, obs3].
  • num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
  • man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs):
1313def reweight(weight, obs, **kwargs):
1314    """Reweight a list of observables.
1315
1316    Parameters
1317    ----------
1318    weight : Obs
1319        Reweighting factor. An Observable that has to be defined on a superset of the
1320        configurations in obs[i].idl for all i.
1321    obs : list
1322        list of Obs, e.g. [obs1, obs2, obs3].
1323    all_configs : bool
1324        if True, the reweighted observables are normalized by the average of
1325        the reweighting factor on all configurations in weight.idl and not
1326        on the configurations in obs[i].idl. Default False.
1327    """
1328    result = []
1329    for i in range(len(obs)):
1330        if len(obs[i].cov_names):
1331            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1332        if not set(obs[i].names).issubset(weight.names):
1333            raise Exception('Error: Ensembles do not fit')
1334        for name in obs[i].names:
1335            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1336                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1337        new_samples = []
1338        w_deltas = {}
1339        for name in sorted(obs[i].names):
1340            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1341            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1342        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1343
1344        if kwargs.get('all_configs'):
1345            new_weight = weight
1346        else:
1347            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1348
1349        result.append(tmp_obs / new_weight)
1350        result[-1].reweighted = True
1351        result[-1].is_merged = obs[i].is_merged
1352
1353    return result

Reweight a list of observables.

Parameters
  • weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
  • obs (list): list of Obs, e.g. [obs1, obs2, obs3].
  • all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
def correlate(obs_a, obs_b):
1356def correlate(obs_a, obs_b):
1357    """Correlate two observables.
1358
1359    Parameters
1360    ----------
1361    obs_a : Obs
1362        First observable
1363    obs_b : Obs
1364        Second observable
1365
1366    Notes
1367    -----
1368    Keep in mind to only correlate primary observables which have not been reweighted
1369    yet. The reweighting has to be applied after correlating the observables.
1370    Currently only works if ensembles are identical (this is not strictly necessary).
1371    """
1372
1373    if sorted(obs_a.names) != sorted(obs_b.names):
1374        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1375    if len(obs_a.cov_names) or len(obs_b.cov_names):
1376        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1377    for name in obs_a.names:
1378        if obs_a.shape[name] != obs_b.shape[name]:
1379            raise Exception('Shapes of ensemble', name, 'do not fit')
1380        if obs_a.idl[name] != obs_b.idl[name]:
1381            raise Exception('idl of ensemble', name, 'do not fit')
1382
1383    if obs_a.reweighted is True:
1384        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1385    if obs_b.reweighted is True:
1386        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1387
1388    new_samples = []
1389    new_idl = []
1390    for name in sorted(obs_a.names):
1391        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1392        new_idl.append(obs_a.idl[name])
1393
1394    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1395    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1396    o.reweighted = obs_a.reweighted or obs_b.reweighted
1397    return o

Correlate two observables.

Parameters
  • obs_a (Obs): First observable
  • obs_b (Obs): Second observable
Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1400def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1401    r'''Calculates the error covariance matrix of a set of observables.
1402
1403    WARNING: This function should be used with care, especially for observables with support on multiple
1404             ensembles with differing autocorrelations. See the notes below for details.
1405
1406    The gamma method has to be applied first to all observables.
1407
1408    Parameters
1409    ----------
1410    obs : list or numpy.ndarray
1411        List or one dimensional array of Obs
1412    visualize : bool
1413        If True plots the corresponding normalized correlation matrix (default False).
1414    correlation : bool
1415        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1416    smooth : None or int
1417        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1418        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1419        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1420        small ones.
1421
1422    Notes
1423    -----
1424    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1425    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1426    in the absence of autocorrelation.
1427    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1428    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1429    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1430    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1431    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1432    '''
1433
1434    length = len(obs)
1435
1436    max_samples = np.max([o.N for o in obs])
1437    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1438        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1439
1440    cov = np.zeros((length, length))
1441    for i in range(length):
1442        for j in range(i, length):
1443            cov[i, j] = _covariance_element(obs[i], obs[j])
1444    cov = cov + cov.T - np.diag(np.diag(cov))
1445
1446    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1447
1448    if isinstance(smooth, int):
1449        corr = _smooth_eigenvalues(corr, smooth)
1450
1451    if visualize:
1452        plt.matshow(corr, vmin=-1, vmax=1)
1453        plt.set_cmap('RdBu')
1454        plt.colorbar()
1455        plt.draw()
1456
1457    if correlation is True:
1458        return corr
1459
1460    errors = [o.dvalue for o in obs]
1461    cov = np.diag(errors) @ corr @ np.diag(errors)
1462
1463    eigenvalues = np.linalg.eigh(cov)[0]
1464    if not np.all(eigenvalues >= 0):
1465        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1466
1467    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters
  • obs (list or numpy.ndarray): List or one dimensional array of Obs
  • visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
  • correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
  • smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None):
1547def import_jackknife(jacks, name, idl=None):
1548    """Imports jackknife samples and returns an Obs
1549
1550    Parameters
1551    ----------
1552    jacks : numpy.ndarray
1553        numpy array containing the mean value as zeroth entry and
1554        the N jackknife samples as first to Nth entry.
1555    name : str
1556        name of the ensemble the samples are defined on.
1557    """
1558    length = len(jacks) - 1
1559    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1560    samples = jacks[1:] @ prj
1561    mean = np.mean(samples)
1562    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1563    new_obs._value = jacks[0]
1564    return new_obs

Imports jackknife samples and returns an Obs

Parameters
  • jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
  • name (str): name of the ensemble the samples are defined on.
def merge_obs(list_of_obs):
1567def merge_obs(list_of_obs):
1568    """Combine all observables in list_of_obs into one new observable
1569
1570    Parameters
1571    ----------
1572    list_of_obs : list
1573        list of the Obs object to be combined
1574
1575    Notes
1576    -----
1577    It is not possible to combine obs which are based on the same replicum
1578    """
1579    replist = [item for obs in list_of_obs for item in obs.names]
1580    if (len(replist) == len(set(replist))) is False:
1581        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1582    if any([len(o.cov_names) for o in list_of_obs]):
1583        raise Exception('Not possible to merge data that contains covobs!')
1584    new_dict = {}
1585    idl_dict = {}
1586    for o in list_of_obs:
1587        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1588                        for key in set(o.deltas) | set(o.r_values)})
1589        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1590
1591    names = sorted(new_dict.keys())
1592    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1593    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1594    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1595    return o

Combine all observables in list_of_obs into one new observable

Parameters
  • list_of_obs (list): list of the Obs object to be combined
Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None):
1598def cov_Obs(means, cov, name, grad=None):
1599    """Create an Obs based on mean(s) and a covariance matrix
1600
1601    Parameters
1602    ----------
1603    mean : list of floats or float
1604        N mean value(s) of the new Obs
1605    cov : list or array
1606        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1607    name : str
1608        identifier for the covariance matrix
1609    grad : list or array
1610        Gradient of the Covobs wrt. the means belonging to cov.
1611    """
1612
1613    def covobs_to_obs(co):
1614        """Make an Obs out of a Covobs
1615
1616        Parameters
1617        ----------
1618        co : Covobs
1619            Covobs to be embedded into the Obs
1620        """
1621        o = Obs([], [], means=[])
1622        o._value = co.value
1623        o.names.append(co.name)
1624        o._covobs[co.name] = co
1625        o._dvalue = np.sqrt(co.errsq())
1626        return o
1627
1628    ol = []
1629    if isinstance(means, (float, int)):
1630        means = [means]
1631
1632    for i in range(len(means)):
1633        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1634    if ol[0].covobs[name].N != len(means):
1635        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1636    if len(ol) == 1:
1637        return ol[0]
1638    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters
  • mean (list of floats or float): N mean value(s) of the new Obs
  • cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
  • name (str): identifier for the covariance matrix
  • grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.