pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4from math import gcd
   5from functools import reduce
   6import numpy as np
   7import autograd.numpy as anp  # Thinly-wrapped numpy
   8from autograd import jacobian
   9import matplotlib.pyplot as plt
  10from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  11import numdifftools as nd
  12from itertools import groupby
  13from .covobs import Covobs
  14
  15# Improve print output of numpy.ndarrays containing Obs objects.
  16np.set_printoptions(formatter={'object': lambda x: str(x)})
  17
  18
  19class Obs:
  20    """Class for a general observable.
  21
  22    Instances of Obs are the basic objects of a pyerrors error analysis.
  23    They are initialized with a list which contains arrays of samples for
  24    different ensembles/replica and another list of same length which contains
  25    the names of the ensembles/replica. Mathematical operations can be
  26    performed on instances. The result is another instance of Obs. The error of
  27    an instance can be computed with the gamma_method. Also contains additional
  28    methods for output and visualization of the error calculation.
  29
  30    Attributes
  31    ----------
  32    S_global : float
  33        Standard value for S (default 2.0)
  34    S_dict : dict
  35        Dictionary for S values. If an entry for a given ensemble
  36        exists this overwrites the standard value for that ensemble.
  37    tau_exp_global : float
  38        Standard value for tau_exp (default 0.0)
  39    tau_exp_dict : dict
  40        Dictionary for tau_exp values. If an entry for a given ensemble exists
  41        this overwrites the standard value for that ensemble.
  42    N_sigma_global : float
  43        Standard value for N_sigma (default 1.0)
  44    N_sigma_dict : dict
  45        Dictionary for N_sigma values. If an entry for a given ensemble exists
  46        this overwrites the standard value for that ensemble.
  47    """
  48    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  49                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  50                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  51                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  52                 'idl', 'is_merged', 'tag', '_covobs', '__dict__']
  53
  54    S_global = 2.0
  55    S_dict = {}
  56    tau_exp_global = 0.0
  57    tau_exp_dict = {}
  58    N_sigma_global = 1.0
  59    N_sigma_dict = {}
  60    filter_eps = 1e-10
  61
  62    def __init__(self, samples, names, idl=None, **kwargs):
  63        """ Initialize Obs object.
  64
  65        Parameters
  66        ----------
  67        samples : list
  68            list of numpy arrays containing the Monte Carlo samples
  69        names : list
  70            list of strings labeling the individual samples
  71        idl : list, optional
  72            list of ranges or lists on which the samples are defined
  73        """
  74
  75        if kwargs.get("means") is None and len(samples):
  76            if len(samples) != len(names):
  77                raise Exception('Length of samples and names incompatible.')
  78            if idl is not None:
  79                if len(idl) != len(names):
  80                    raise Exception('Length of idl incompatible with samples and names.')
  81            name_length = len(names)
  82            if name_length > 1:
  83                if name_length != len(set(names)):
  84                    raise Exception('names are not unique.')
  85                if not all(isinstance(x, str) for x in names):
  86                    raise TypeError('All names have to be strings.')
  87            else:
  88                if not isinstance(names[0], str):
  89                    raise TypeError('All names have to be strings.')
  90            if min(len(x) for x in samples) <= 4:
  91                raise Exception('Samples have to have at least 5 entries.')
  92
  93        self.names = sorted(names)
  94        self.shape = {}
  95        self.r_values = {}
  96        self.deltas = {}
  97        self._covobs = {}
  98
  99        self._value = 0
 100        self.N = 0
 101        self.is_merged = {}
 102        self.idl = {}
 103        if idl is not None:
 104            for name, idx in sorted(zip(names, idl)):
 105                if isinstance(idx, range):
 106                    self.idl[name] = idx
 107                elif isinstance(idx, (list, np.ndarray)):
 108                    dc = np.unique(np.diff(idx))
 109                    if np.any(dc < 0):
 110                        raise Exception("Unsorted idx for idl[%s]" % (name))
 111                    if len(dc) == 1:
 112                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 113                    else:
 114                        self.idl[name] = list(idx)
 115                else:
 116                    raise Exception('incompatible type for idl[%s].' % (name))
 117        else:
 118            for name, sample in sorted(zip(names, samples)):
 119                self.idl[name] = range(1, len(sample) + 1)
 120
 121        if kwargs.get("means") is not None:
 122            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 123                self.shape[name] = len(self.idl[name])
 124                self.N += self.shape[name]
 125                self.r_values[name] = mean
 126                self.deltas[name] = sample
 127        else:
 128            for name, sample in sorted(zip(names, samples)):
 129                self.shape[name] = len(self.idl[name])
 130                self.N += self.shape[name]
 131                if len(sample) != self.shape[name]:
 132                    raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 133                self.r_values[name] = np.mean(sample)
 134                self.deltas[name] = sample - self.r_values[name]
 135                self._value += self.shape[name] * self.r_values[name]
 136            self._value /= self.N
 137
 138        self._dvalue = 0.0
 139        self.ddvalue = 0.0
 140        self.reweighted = False
 141
 142        self.tag = None
 143
 144    @property
 145    def value(self):
 146        return self._value
 147
 148    @property
 149    def dvalue(self):
 150        return self._dvalue
 151
 152    @property
 153    def e_names(self):
 154        return sorted(set([o.split('|')[0] for o in self.names]))
 155
 156    @property
 157    def cov_names(self):
 158        return sorted(set([o for o in self.covobs.keys()]))
 159
 160    @property
 161    def mc_names(self):
 162        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 163
 164    @property
 165    def e_content(self):
 166        res = {}
 167        for e, e_name in enumerate(self.e_names):
 168            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 169            if e_name in self.names:
 170                res[e_name].append(e_name)
 171        return res
 172
 173    @property
 174    def covobs(self):
 175        return self._covobs
 176
 177    def gamma_method(self, **kwargs):
 178        """Estimate the error and related properties of the Obs.
 179
 180        Parameters
 181        ----------
 182        S : float
 183            specifies a custom value for the parameter S (default 2.0).
 184            If set to 0 it is assumed that the data exhibits no
 185            autocorrelation. In this case the error estimates coincides
 186            with the sample standard error.
 187        tau_exp : float
 188            positive value triggers the critical slowing down analysis
 189            (default 0.0).
 190        N_sigma : float
 191            number of standard deviations from zero until the tail is
 192            attached to the autocorrelation function (default 1).
 193        fft : bool
 194            determines whether the fft algorithm is used for the computation
 195            of the autocorrelation function (default True)
 196        """
 197
 198        e_content = self.e_content
 199        self.e_dvalue = {}
 200        self.e_ddvalue = {}
 201        self.e_tauint = {}
 202        self.e_dtauint = {}
 203        self.e_windowsize = {}
 204        self.e_n_tauint = {}
 205        self.e_n_dtauint = {}
 206        e_gamma = {}
 207        self.e_rho = {}
 208        self.e_drho = {}
 209        self._dvalue = 0
 210        self.ddvalue = 0
 211
 212        self.S = {}
 213        self.tau_exp = {}
 214        self.N_sigma = {}
 215
 216        if kwargs.get('fft') is False:
 217            fft = False
 218        else:
 219            fft = True
 220
 221        def _parse_kwarg(kwarg_name):
 222            if kwarg_name in kwargs:
 223                tmp = kwargs.get(kwarg_name)
 224                if isinstance(tmp, (int, float)):
 225                    if tmp < 0:
 226                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 227                    for e, e_name in enumerate(self.e_names):
 228                        getattr(self, kwarg_name)[e_name] = tmp
 229                else:
 230                    raise TypeError(kwarg_name + ' is not in proper format.')
 231            else:
 232                for e, e_name in enumerate(self.e_names):
 233                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 234                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 235                    else:
 236                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 237
 238        _parse_kwarg('S')
 239        _parse_kwarg('tau_exp')
 240        _parse_kwarg('N_sigma')
 241
 242        for e, e_name in enumerate(self.mc_names):
 243            r_length = []
 244            for r_name in e_content[e_name]:
 245                if isinstance(self.idl[r_name], range):
 246                    r_length.append(len(self.idl[r_name]))
 247                else:
 248                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1))
 249
 250            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 251            w_max = max(r_length) // 2
 252            e_gamma[e_name] = np.zeros(w_max)
 253            self.e_rho[e_name] = np.zeros(w_max)
 254            self.e_drho[e_name] = np.zeros(w_max)
 255
 256            for r_name in e_content[e_name]:
 257                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft)
 258
 259            gamma_div = np.zeros(w_max)
 260            for r_name in e_content[e_name]:
 261                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft)
 262            gamma_div[gamma_div < 1] = 1.0
 263            e_gamma[e_name] /= gamma_div[:w_max]
 264
 265            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 266                self.e_tauint[e_name] = 0.5
 267                self.e_dtauint[e_name] = 0.0
 268                self.e_dvalue[e_name] = 0.0
 269                self.e_ddvalue[e_name] = 0.0
 270                self.e_windowsize[e_name] = 0
 271                continue
 272
 273            gaps = []
 274            for r_name in e_content[e_name]:
 275                if isinstance(self.idl[r_name], range):
 276                    gaps.append(1)
 277                else:
 278                    gaps.append(np.min(np.diff(self.idl[r_name])))
 279
 280            if not np.all([gi == gaps[0] for gi in gaps]):
 281                raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps)
 282            else:
 283                gapsize = gaps[0]
 284
 285            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 286            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 287            # Make sure no entry of tauint is smaller than 0.5
 288            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 289            # hep-lat/0306017 eq. (42)
 290            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N)
 291            self.e_n_dtauint[e_name][0] = 0.0
 292
 293            def _compute_drho(i):
 294                tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]
 295                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 296
 297            _compute_drho(gapsize)
 298            if self.tau_exp[e_name] > 0:
 299                texp = self.tau_exp[e_name]
 300                # Critical slowing down analysis
 301                if w_max // 2 <= 1:
 302                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 303                for n in range(gapsize, w_max // 2, gapsize):
 304                    _compute_drho(n + gapsize)
 305                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 306                        # Bias correction hep-lat/0306017 eq. (49) included
 307                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 308                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 309                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 310                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 311                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 312                        self.e_windowsize[e_name] = n
 313                        break
 314            else:
 315                if self.S[e_name] == 0.0:
 316                    self.e_tauint[e_name] = 0.5
 317                    self.e_dtauint[e_name] = 0.0
 318                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 319                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 320                    self.e_windowsize[e_name] = 0
 321                else:
 322                    # Standard automatic windowing procedure
 323                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1))
 324                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 325                    for n in range(1, w_max):
 326                        if n < w_max // 2 - 2:
 327                            _compute_drho(gapsize * n + gapsize)
 328                        if g_w[n - 1] < 0 or n >= w_max - 1:
 329                            n *= gapsize
 330                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 331                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 332                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 333                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N)
 334                            self.e_windowsize[e_name] = n
 335                            break
 336
 337            self._dvalue += self.e_dvalue[e_name] ** 2
 338            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 339
 340        for e_name in self.cov_names:
 341            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 342            self.e_ddvalue[e_name] = 0
 343            self._dvalue += self.e_dvalue[e_name]**2
 344
 345        self._dvalue = np.sqrt(self._dvalue)
 346        if self._dvalue == 0.0:
 347            self.ddvalue = 0.0
 348        else:
 349            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 350        return
 351
 352    def _calc_gamma(self, deltas, idx, shape, w_max, fft):
 353        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 354           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 355
 356        Parameters
 357        ----------
 358        deltas : list
 359            List of fluctuations
 360        idx : list
 361            List or range of configurations on which the deltas are defined.
 362        shape : int
 363            Number of configurations in idx.
 364        w_max : int
 365            Upper bound for the summation window.
 366        fft : bool
 367            determines whether the fft algorithm is used for the computation
 368            of the autocorrelation function.
 369        """
 370        gamma = np.zeros(w_max)
 371        deltas = _expand_deltas(deltas, idx, shape)
 372        new_shape = len(deltas)
 373        if fft:
 374            max_gamma = min(new_shape, w_max)
 375            # The padding for the fft has to be even
 376            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 377            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 378        else:
 379            for n in range(w_max):
 380                if new_shape - n >= 0:
 381                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 382
 383        return gamma
 384
 385    def details(self, ens_content=True):
 386        """Output detailed properties of the Obs.
 387
 388        Parameters
 389        ----------
 390        ens_content : bool
 391            print details about the ensembles and replica if true.
 392        """
 393        if self.tag is not None:
 394            print("Description:", self.tag)
 395        if not hasattr(self, 'e_dvalue'):
 396            print('Result\t %3.8e' % (self.value))
 397        else:
 398            if self.value == 0.0:
 399                percentage = np.nan
 400            else:
 401                percentage = np.abs(self._dvalue / self.value) * 100
 402            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 403            if len(self.e_names) > 1:
 404                print(' Ensemble errors:')
 405            e_content = self.e_content
 406            for e_name in self.mc_names:
 407                if isinstance(self.idl[e_content[e_name][0]], range):
 408                    gap = self.idl[e_content[e_name][0]].step
 409                else:
 410                    gap = np.min(np.diff(self.idl[e_content[e_name][0]]))
 411
 412                if len(self.e_names) > 1:
 413                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 414                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 415                tau_string += f" in units of {gap} config"
 416                if gap > 1:
 417                    tau_string += "s"
 418                if self.tau_exp[e_name] > 0:
 419                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 420                else:
 421                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 422                print(tau_string)
 423            for e_name in self.cov_names:
 424                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 425        if ens_content is True:
 426            if len(self.e_names) == 1:
 427                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 428            else:
 429                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 430            my_string_list = []
 431            for key, value in sorted(self.e_content.items()):
 432                if key not in self.covobs:
 433                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 434                    if len(value) == 1:
 435                        my_string += f': {self.shape[value[0]]} configurations'
 436                        if isinstance(self.idl[value[0]], range):
 437                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 438                        else:
 439                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 440                    else:
 441                        sublist = []
 442                        for v in value:
 443                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 444                            my_substring += f': {self.shape[v]} configurations'
 445                            if isinstance(self.idl[v], range):
 446                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 447                            else:
 448                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 449                            sublist.append(my_substring)
 450
 451                        my_string += '\n' + '\n'.join(sublist)
 452                else:
 453                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 454                my_string_list.append(my_string)
 455            print('\n'.join(my_string_list))
 456
 457    def reweight(self, weight):
 458        """Reweight the obs with given rewighting factors.
 459
 460        Parameters
 461        ----------
 462        weight : Obs
 463            Reweighting factor. An Observable that has to be defined on a superset of the
 464            configurations in obs[i].idl for all i.
 465        all_configs : bool
 466            if True, the reweighted observables are normalized by the average of
 467            the reweighting factor on all configurations in weight.idl and not
 468            on the configurations in obs[i].idl. Default False.
 469        """
 470        return reweight(weight, [self])[0]
 471
 472    def is_zero_within_error(self, sigma=1):
 473        """Checks whether the observable is zero within 'sigma' standard errors.
 474
 475        Parameters
 476        ----------
 477        sigma : int
 478            Number of standard errors used for the check.
 479
 480        Works only properly when the gamma method was run.
 481        """
 482        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 483
 484    def is_zero(self, atol=1e-10):
 485        """Checks whether the observable is zero within a given tolerance.
 486
 487        Parameters
 488        ----------
 489        atol : float
 490            Absolute tolerance (for details see numpy documentation).
 491        """
 492        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 493
 494    def plot_tauint(self, save=None):
 495        """Plot integrated autocorrelation time for each ensemble.
 496
 497        Parameters
 498        ----------
 499        save : str
 500            saves the figure to a file named 'save' if.
 501        """
 502        if not hasattr(self, 'e_dvalue'):
 503            raise Exception('Run the gamma method first.')
 504
 505        for e, e_name in enumerate(self.mc_names):
 506            fig = plt.figure()
 507            plt.xlabel(r'$W$')
 508            plt.ylabel(r'$\tau_\mathrm{int}$')
 509            length = int(len(self.e_n_tauint[e_name]))
 510            if self.tau_exp[e_name] > 0:
 511                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 512                x_help = np.arange(2 * self.tau_exp[e_name])
 513                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 514                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 515                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 516                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 517                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 518                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 519                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 520            else:
 521                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 522                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 523
 524            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 525            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 526            plt.legend()
 527            plt.xlim(-0.5, xmax)
 528            ylim = plt.ylim()
 529            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 530            plt.draw()
 531            if save:
 532                fig.savefig(save + "_" + str(e))
 533
 534    def plot_rho(self, save=None):
 535        """Plot normalized autocorrelation function time for each ensemble.
 536
 537        Parameters
 538        ----------
 539        save : str
 540            saves the figure to a file named 'save' if.
 541        """
 542        if not hasattr(self, 'e_dvalue'):
 543            raise Exception('Run the gamma method first.')
 544        for e, e_name in enumerate(self.mc_names):
 545            fig = plt.figure()
 546            plt.xlabel('W')
 547            plt.ylabel('rho')
 548            length = int(len(self.e_drho[e_name]))
 549            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 550            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 551            if self.tau_exp[e_name] > 0:
 552                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 553                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 554                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 555                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 556            else:
 557                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 558                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 559            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 560            plt.xlim(-0.5, xmax)
 561            plt.draw()
 562            if save:
 563                fig.savefig(save + "_" + str(e))
 564
 565    def plot_rep_dist(self):
 566        """Plot replica distribution for each ensemble with more than one replicum."""
 567        if not hasattr(self, 'e_dvalue'):
 568            raise Exception('Run the gamma method first.')
 569        for e, e_name in enumerate(self.mc_names):
 570            if len(self.e_content[e_name]) == 1:
 571                print('No replica distribution for a single replicum (', e_name, ')')
 572                continue
 573            r_length = []
 574            sub_r_mean = 0
 575            for r, r_name in enumerate(self.e_content[e_name]):
 576                r_length.append(len(self.deltas[r_name]))
 577                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 578            e_N = np.sum(r_length)
 579            sub_r_mean /= e_N
 580            arr = np.zeros(len(self.e_content[e_name]))
 581            for r, r_name in enumerate(self.e_content[e_name]):
 582                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 583            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 584            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 585            plt.draw()
 586
 587    def plot_history(self, expand=True):
 588        """Plot derived Monte Carlo history for each ensemble
 589
 590        Parameters
 591        ----------
 592        expand : bool
 593            show expanded history for irregular Monte Carlo chains (default: True).
 594        """
 595        for e, e_name in enumerate(self.mc_names):
 596            plt.figure()
 597            r_length = []
 598            tmp = []
 599            tmp_expanded = []
 600            for r, r_name in enumerate(self.e_content[e_name]):
 601                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 602                if expand:
 603                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name])
 604                    r_length.append(len(tmp_expanded[-1]))
 605                else:
 606                    r_length.append(len(tmp[-1]))
 607            e_N = np.sum(r_length)
 608            x = np.arange(e_N)
 609            y_test = np.concatenate(tmp, axis=0)
 610            if expand:
 611                y = np.concatenate(tmp_expanded, axis=0)
 612            else:
 613                y = y_test
 614            plt.errorbar(x, y, fmt='.', markersize=3)
 615            plt.xlim(-0.5, e_N - 0.5)
 616            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 617            plt.draw()
 618
 619    def plot_piechart(self, save=None):
 620        """Plot piechart which shows the fractional contribution of each
 621        ensemble to the error and returns a dictionary containing the fractions.
 622
 623        Parameters
 624        ----------
 625        save : str
 626            saves the figure to a file named 'save' if.
 627        """
 628        if not hasattr(self, 'e_dvalue'):
 629            raise Exception('Run the gamma method first.')
 630        if np.isclose(0.0, self._dvalue, atol=1e-15):
 631            raise Exception('Error is 0.0')
 632        labels = self.e_names
 633        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 634        fig1, ax1 = plt.subplots()
 635        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 636        ax1.axis('equal')
 637        plt.draw()
 638        if save:
 639            fig1.savefig(save)
 640
 641        return dict(zip(self.e_names, sizes))
 642
 643    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 644        """Dump the Obs to a file 'name' of chosen format.
 645
 646        Parameters
 647        ----------
 648        filename : str
 649            name of the file to be saved.
 650        datatype : str
 651            Format of the exported file. Supported formats include
 652            "json.gz" and "pickle"
 653        description : str
 654            Description for output file, only relevant for json.gz format.
 655        path : str
 656            specifies a custom path for the file (default '.')
 657        """
 658        if 'path' in kwargs:
 659            file_name = kwargs.get('path') + '/' + filename
 660        else:
 661            file_name = filename
 662
 663        if datatype == "json.gz":
 664            from .input.json import dump_to_json
 665            dump_to_json([self], file_name, description=description)
 666        elif datatype == "pickle":
 667            with open(file_name + '.p', 'wb') as fb:
 668                pickle.dump(self, fb)
 669        else:
 670            raise Exception("Unknown datatype " + str(datatype))
 671
 672    def export_jackknife(self):
 673        """Export jackknife samples from the Obs
 674
 675        Returns
 676        -------
 677        numpy.ndarray
 678            Returns a numpy array of length N + 1 where N is the number of samples
 679            for the given ensemble and replicum. The zeroth entry of the array contains
 680            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 681            derived from the Obs. The current implementation only works for observables
 682            defined on exactly one ensemble and replicum. The derived jackknife samples
 683            should agree with samples from a full jackknife analysis up to O(1/N).
 684        """
 685
 686        if len(self.names) != 1:
 687            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 688
 689        name = self.names[0]
 690        full_data = self.deltas[name] + self.r_values[name]
 691        n = full_data.size
 692        mean = self.value
 693        tmp_jacks = np.zeros(n + 1)
 694        tmp_jacks[0] = mean
 695        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 696        return tmp_jacks
 697
 698    def __float__(self):
 699        return float(self.value)
 700
 701    def __repr__(self):
 702        return 'Obs[' + str(self) + ']'
 703
 704    def __str__(self):
 705        return _format_uncertainty(self.value, self._dvalue)
 706
 707    def __hash__(self):
 708        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 709        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 710        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 711        hash_tuple += tuple([o.encode() for o in self.names])
 712        m = hashlib.md5()
 713        [m.update(o) for o in hash_tuple]
 714        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 715
 716    # Overload comparisons
 717    def __lt__(self, other):
 718        return self.value < other
 719
 720    def __le__(self, other):
 721        return self.value <= other
 722
 723    def __gt__(self, other):
 724        return self.value > other
 725
 726    def __ge__(self, other):
 727        return self.value >= other
 728
 729    def __eq__(self, other):
 730        return (self - other).is_zero()
 731
 732    def __ne__(self, other):
 733        return not (self - other).is_zero()
 734
 735    # Overload math operations
 736    def __add__(self, y):
 737        if isinstance(y, Obs):
 738            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 739        else:
 740            if isinstance(y, np.ndarray):
 741                return np.array([self + o for o in y])
 742            elif y.__class__.__name__ in ['Corr', 'CObs']:
 743                return NotImplemented
 744            else:
 745                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 746
 747    def __radd__(self, y):
 748        return self + y
 749
 750    def __mul__(self, y):
 751        if isinstance(y, Obs):
 752            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 753        else:
 754            if isinstance(y, np.ndarray):
 755                return np.array([self * o for o in y])
 756            elif isinstance(y, complex):
 757                return CObs(self * y.real, self * y.imag)
 758            elif y.__class__.__name__ in ['Corr', 'CObs']:
 759                return NotImplemented
 760            else:
 761                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 762
 763    def __rmul__(self, y):
 764        return self * y
 765
 766    def __sub__(self, y):
 767        if isinstance(y, Obs):
 768            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 769        else:
 770            if isinstance(y, np.ndarray):
 771                return np.array([self - o for o in y])
 772            elif y.__class__.__name__ in ['Corr', 'CObs']:
 773                return NotImplemented
 774            else:
 775                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 776
 777    def __rsub__(self, y):
 778        return -1 * (self - y)
 779
 780    def __pos__(self):
 781        return self
 782
 783    def __neg__(self):
 784        return -1 * self
 785
 786    def __truediv__(self, y):
 787        if isinstance(y, Obs):
 788            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 789        else:
 790            if isinstance(y, np.ndarray):
 791                return np.array([self / o for o in y])
 792            elif y.__class__.__name__ in ['Corr', 'CObs']:
 793                return NotImplemented
 794            else:
 795                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 796
 797    def __rtruediv__(self, y):
 798        if isinstance(y, Obs):
 799            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 800        else:
 801            if isinstance(y, np.ndarray):
 802                return np.array([o / self for o in y])
 803            elif y.__class__.__name__ in ['Corr', 'CObs']:
 804                return NotImplemented
 805            else:
 806                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 807
 808    def __pow__(self, y):
 809        if isinstance(y, Obs):
 810            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 811        else:
 812            return derived_observable(lambda x: x[0] ** y, [self])
 813
 814    def __rpow__(self, y):
 815        if isinstance(y, Obs):
 816            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 817        else:
 818            return derived_observable(lambda x: y ** x[0], [self])
 819
 820    def __abs__(self):
 821        return derived_observable(lambda x: anp.abs(x[0]), [self])
 822
 823    # Overload numpy functions
 824    def sqrt(self):
 825        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 826
 827    def log(self):
 828        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 829
 830    def exp(self):
 831        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 832
 833    def sin(self):
 834        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 835
 836    def cos(self):
 837        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 838
 839    def tan(self):
 840        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 841
 842    def arcsin(self):
 843        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 844
 845    def arccos(self):
 846        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 847
 848    def arctan(self):
 849        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 850
 851    def sinh(self):
 852        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 853
 854    def cosh(self):
 855        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 856
 857    def tanh(self):
 858        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 859
 860    def arcsinh(self):
 861        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 862
 863    def arccosh(self):
 864        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 865
 866    def arctanh(self):
 867        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 868
 869
 870class CObs:
 871    """Class for a complex valued observable."""
 872    __slots__ = ['_real', '_imag', 'tag']
 873
 874    def __init__(self, real, imag=0.0):
 875        self._real = real
 876        self._imag = imag
 877        self.tag = None
 878
 879    @property
 880    def real(self):
 881        return self._real
 882
 883    @property
 884    def imag(self):
 885        return self._imag
 886
 887    def gamma_method(self, **kwargs):
 888        """Executes the gamma_method for the real and the imaginary part."""
 889        if isinstance(self.real, Obs):
 890            self.real.gamma_method(**kwargs)
 891        if isinstance(self.imag, Obs):
 892            self.imag.gamma_method(**kwargs)
 893
 894    def is_zero(self):
 895        """Checks whether both real and imaginary part are zero within machine precision."""
 896        return self.real == 0.0 and self.imag == 0.0
 897
 898    def conjugate(self):
 899        return CObs(self.real, -self.imag)
 900
 901    def __add__(self, other):
 902        if isinstance(other, np.ndarray):
 903            return other + self
 904        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 905            return CObs(self.real + other.real,
 906                        self.imag + other.imag)
 907        else:
 908            return CObs(self.real + other, self.imag)
 909
 910    def __radd__(self, y):
 911        return self + y
 912
 913    def __sub__(self, other):
 914        if isinstance(other, np.ndarray):
 915            return -1 * (other - self)
 916        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 917            return CObs(self.real - other.real, self.imag - other.imag)
 918        else:
 919            return CObs(self.real - other, self.imag)
 920
 921    def __rsub__(self, other):
 922        return -1 * (self - other)
 923
 924    def __mul__(self, other):
 925        if isinstance(other, np.ndarray):
 926            return other * self
 927        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 928            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 929                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 930                                               [self.real, other.real, self.imag, other.imag],
 931                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 932                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 933                                               [self.real, other.real, self.imag, other.imag],
 934                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 935            elif getattr(other, 'imag', 0) != 0:
 936                return CObs(self.real * other.real - self.imag * other.imag,
 937                            self.imag * other.real + self.real * other.imag)
 938            else:
 939                return CObs(self.real * other.real, self.imag * other.real)
 940        else:
 941            return CObs(self.real * other, self.imag * other)
 942
 943    def __rmul__(self, other):
 944        return self * other
 945
 946    def __truediv__(self, other):
 947        if isinstance(other, np.ndarray):
 948            return 1 / (other / self)
 949        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 950            r = other.real ** 2 + other.imag ** 2
 951            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
 952        else:
 953            return CObs(self.real / other, self.imag / other)
 954
 955    def __rtruediv__(self, other):
 956        r = self.real ** 2 + self.imag ** 2
 957        if hasattr(other, 'real') and hasattr(other, 'imag'):
 958            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
 959        else:
 960            return CObs(self.real * other / r, -self.imag * other / r)
 961
 962    def __abs__(self):
 963        return np.sqrt(self.real**2 + self.imag**2)
 964
 965    def __pos__(self):
 966        return self
 967
 968    def __neg__(self):
 969        return -1 * self
 970
 971    def __eq__(self, other):
 972        return self.real == other.real and self.imag == other.imag
 973
 974    def __str__(self):
 975        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
 976
 977    def __repr__(self):
 978        return 'CObs[' + str(self) + ']'
 979
 980
 981def _format_uncertainty(value, dvalue):
 982    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
 983    if dvalue == 0.0:
 984        return str(value)
 985    fexp = np.floor(np.log10(dvalue))
 986    if fexp < 0.0:
 987        return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f')
 988    elif fexp == 0.0:
 989        return '{:.1f}({:1.1f})'.format(value, dvalue)
 990    else:
 991        return '{:.0f}({:2.0f})'.format(value, dvalue)
 992
 993
 994def _expand_deltas(deltas, idx, shape):
 995    """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0.
 996       If idx is of type range, the deltas are not changed
 997
 998    Parameters
 999    ----------
1000    deltas : list
1001        List of fluctuations
1002    idx : list
1003        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1004    shape : int
1005        Number of configs in idx.
1006    """
1007    if isinstance(idx, range):
1008        return deltas
1009    else:
1010        ret = np.zeros(idx[-1] - idx[0] + 1)
1011        for i in range(shape):
1012            ret[idx[i] - idx[0]] = deltas[i]
1013        return ret
1014
1015
1016def _merge_idx(idl):
1017    """Returns the union of all lists in idl as sorted list
1018
1019    Parameters
1020    ----------
1021    idl : list
1022        List of lists or ranges.
1023    """
1024
1025    # Use groupby to efficiently check whether all elements of idl are identical
1026    try:
1027        g = groupby(idl)
1028        if next(g, True) and not next(g, False):
1029            return idl[0]
1030    except Exception:
1031        pass
1032
1033    if np.all([type(idx) is range for idx in idl]):
1034        if len(set([idx[0] for idx in idl])) == 1:
1035            idstart = min([idx.start for idx in idl])
1036            idstop = max([idx.stop for idx in idl])
1037            idstep = min([idx.step for idx in idl])
1038            return range(idstart, idstop, idstep)
1039
1040    return sorted(set().union(*idl))
1041
1042
1043def _intersection_idx(idl):
1044    """Returns the intersection of all lists in idl as sorted list
1045
1046    Parameters
1047    ----------
1048    idl : list
1049        List of lists or ranges.
1050    """
1051
1052    def _lcm(*args):
1053        """Returns the lowest common multiple of args.
1054
1055        From python 3.9 onwards the math library contains an lcm function."""
1056        return reduce(lambda a, b: a * b // gcd(a, b), args)
1057
1058    # Use groupby to efficiently check whether all elements of idl are identical
1059    try:
1060        g = groupby(idl)
1061        if next(g, True) and not next(g, False):
1062            return idl[0]
1063    except Exception:
1064        pass
1065
1066    if np.all([type(idx) is range for idx in idl]):
1067        if len(set([idx[0] for idx in idl])) == 1:
1068            idstart = max([idx.start for idx in idl])
1069            idstop = min([idx.stop for idx in idl])
1070            idstep = _lcm(*[idx.step for idx in idl])
1071            return range(idstart, idstop, idstep)
1072
1073    return sorted(set.intersection(*[set(o) for o in idl]))
1074
1075
1076def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
1077    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1078       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1079       common divisor of the step sizes is used as new step size.
1080
1081    Parameters
1082    ----------
1083    deltas : list
1084        List of fluctuations
1085    idx : list
1086        List or range of configs on which the deltas are defined.
1087        Has to be a subset of new_idx and has to be sorted in ascending order.
1088    shape : list
1089        Number of configs in idx.
1090    new_idx : list
1091        List of configs that defines the new range, has to be sorted in ascending order.
1092    """
1093
1094    if type(idx) is range and type(new_idx) is range:
1095        if idx == new_idx:
1096            return deltas
1097    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1098    for i in range(shape):
1099        ret[idx[i] - new_idx[0]] = deltas[i]
1100    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
1101
1102
1103def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
1104    """Filter out all configurations with vanishing fluctuation such that they do not
1105       contribute to the error estimate anymore. Returns the new deltas and
1106       idx according to the filtering.
1107       A fluctuation is considered to be vanishing, if it is smaller than eps times
1108       the mean of the absolute values of all deltas in one list.
1109
1110    Parameters
1111    ----------
1112    deltas : list
1113        List of fluctuations
1114    idx : list
1115        List or ranges of configs on which the deltas are defined.
1116    eps : float
1117        Prefactor that enters the filter criterion.
1118    """
1119    new_deltas = []
1120    new_idx = []
1121    maxd = np.mean(np.fabs(deltas))
1122    for i in range(len(deltas)):
1123        if abs(deltas[i]) > eps * maxd:
1124            new_deltas.append(deltas[i])
1125            new_idx.append(idx[i])
1126    if new_idx:
1127        return np.array(new_deltas), new_idx
1128    else:
1129        return deltas, idx
1130
1131
1132def derived_observable(func, data, array_mode=False, **kwargs):
1133    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1134
1135    Parameters
1136    ----------
1137    func : object
1138        arbitrary function of the form func(data, **kwargs). For the
1139        automatic differentiation to work, all numpy functions have to have
1140        the autograd wrapper (use 'import autograd.numpy as anp').
1141    data : list
1142        list of Obs, e.g. [obs1, obs2, obs3].
1143    num_grad : bool
1144        if True, numerical derivatives are used instead of autograd
1145        (default False). To control the numerical differentiation the
1146        kwargs of numdifftools.step_generators.MaxStepGenerator
1147        can be used.
1148    man_grad : list
1149        manually supply a list or an array which contains the jacobian
1150        of func. Use cautiously, supplying the wrong derivative will
1151        not be intercepted.
1152
1153    Notes
1154    -----
1155    For simple mathematical operations it can be practical to use anonymous
1156    functions. For the ratio of two observables one can e.g. use
1157
1158    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1159    """
1160
1161    data = np.asarray(data)
1162    raveled_data = data.ravel()
1163
1164    # Workaround for matrix operations containing non Obs data
1165    if not all(isinstance(x, Obs) for x in raveled_data):
1166        for i in range(len(raveled_data)):
1167            if isinstance(raveled_data[i], (int, float)):
1168                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1169
1170    allcov = {}
1171    for o in raveled_data:
1172        for name in o.cov_names:
1173            if name in allcov:
1174                if not np.allclose(allcov[name], o.covobs[name].cov):
1175                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1176            else:
1177                allcov[name] = o.covobs[name].cov
1178
1179    n_obs = len(raveled_data)
1180    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1181    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1182    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1183
1184    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1185    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1186
1187    if data.ndim == 1:
1188        values = np.array([o.value for o in data])
1189    else:
1190        values = np.vectorize(lambda x: x.value)(data)
1191
1192    new_values = func(values, **kwargs)
1193
1194    multi = int(isinstance(new_values, np.ndarray))
1195
1196    new_r_values = {}
1197    new_idl_d = {}
1198    for name in new_sample_names:
1199        idl = []
1200        tmp_values = np.zeros(n_obs)
1201        for i, item in enumerate(raveled_data):
1202            tmp_values[i] = item.r_values.get(name, item.value)
1203            tmp_idl = item.idl.get(name)
1204            if tmp_idl is not None:
1205                idl.append(tmp_idl)
1206        if multi > 0:
1207            tmp_values = np.array(tmp_values).reshape(data.shape)
1208        new_r_values[name] = func(tmp_values, **kwargs)
1209        new_idl_d[name] = _merge_idx(idl)
1210        if not is_merged[name]:
1211            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1212
1213    if 'man_grad' in kwargs:
1214        deriv = np.asarray(kwargs.get('man_grad'))
1215        if new_values.shape + data.shape != deriv.shape:
1216            raise Exception('Manual derivative does not have correct shape.')
1217    elif kwargs.get('num_grad') is True:
1218        if multi > 0:
1219            raise Exception('Multi mode currently not supported for numerical derivative')
1220        options = {
1221            'base_step': 0.1,
1222            'step_ratio': 2.5}
1223        for key in options.keys():
1224            kwarg = kwargs.get(key)
1225            if kwarg is not None:
1226                options[key] = kwarg
1227        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1228        if tmp_df.size == 1:
1229            deriv = np.array([tmp_df.real])
1230        else:
1231            deriv = tmp_df.real
1232    else:
1233        deriv = jacobian(func)(values, **kwargs)
1234
1235    final_result = np.zeros(new_values.shape, dtype=object)
1236
1237    if array_mode is True:
1238
1239        class _Zero_grad():
1240            def __init__(self, N):
1241                self.grad = np.zeros((N, 1))
1242
1243        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1244        d_extracted = {}
1245        g_extracted = {}
1246        for name in new_sample_names:
1247            d_extracted[name] = []
1248            ens_length = len(new_idl_d[name])
1249            for i_dat, dat in enumerate(data):
1250                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1251        for name in new_cov_names:
1252            g_extracted[name] = []
1253            zero_grad = _Zero_grad(new_covobs_lengths[name])
1254            for i_dat, dat in enumerate(data):
1255                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1256
1257    for i_val, new_val in np.ndenumerate(new_values):
1258        new_deltas = {}
1259        new_grad = {}
1260        if array_mode is True:
1261            for name in new_sample_names:
1262                ens_length = d_extracted[name][0].shape[-1]
1263                new_deltas[name] = np.zeros(ens_length)
1264                for i_dat, dat in enumerate(d_extracted[name]):
1265                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1266            for name in new_cov_names:
1267                new_grad[name] = 0
1268                for i_dat, dat in enumerate(g_extracted[name]):
1269                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1270        else:
1271            for j_obs, obs in np.ndenumerate(data):
1272                for name in obs.names:
1273                    if name in obs.cov_names:
1274                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1275                    else:
1276                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1277
1278        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1279
1280        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1281            raise Exception('The same name has been used for deltas and covobs!')
1282        new_samples = []
1283        new_means = []
1284        new_idl = []
1285        new_names_obs = []
1286        for name in new_names:
1287            if name not in new_covobs:
1288                if is_merged[name]:
1289                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1290                else:
1291                    filtered_deltas = new_deltas[name]
1292                    filtered_idl_d = new_idl_d[name]
1293
1294                new_samples.append(filtered_deltas)
1295                new_idl.append(filtered_idl_d)
1296                new_means.append(new_r_values[name][i_val])
1297                new_names_obs.append(name)
1298        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1299        for name in new_covobs:
1300            final_result[i_val].names.append(name)
1301        final_result[i_val]._covobs = new_covobs
1302        final_result[i_val]._value = new_val
1303        final_result[i_val].is_merged = is_merged
1304        final_result[i_val].reweighted = reweighted
1305
1306    if multi == 0:
1307        final_result = final_result.item()
1308
1309    return final_result
1310
1311
1312def _reduce_deltas(deltas, idx_old, idx_new):
1313    """Extract deltas defined on idx_old on all configs of idx_new.
1314
1315    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1316    are ordered in an ascending order.
1317
1318    Parameters
1319    ----------
1320    deltas : list
1321        List of fluctuations
1322    idx_old : list
1323        List or range of configs on which the deltas are defined
1324    idx_new : list
1325        List of configs for which we want to extract the deltas.
1326        Has to be a subset of idx_old.
1327    """
1328    if not len(deltas) == len(idx_old):
1329        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1330    if type(idx_old) is range and type(idx_new) is range:
1331        if idx_old == idx_new:
1332            return deltas
1333    # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical
1334    try:
1335        g = groupby([idx_old, idx_new])
1336        if next(g, True) and not next(g, False):
1337            return deltas
1338    except Exception:
1339        pass
1340    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1341    if len(indices) < len(idx_new):
1342        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1343    return np.array(deltas)[indices]
1344
1345
1346def reweight(weight, obs, **kwargs):
1347    """Reweight a list of observables.
1348
1349    Parameters
1350    ----------
1351    weight : Obs
1352        Reweighting factor. An Observable that has to be defined on a superset of the
1353        configurations in obs[i].idl for all i.
1354    obs : list
1355        list of Obs, e.g. [obs1, obs2, obs3].
1356    all_configs : bool
1357        if True, the reweighted observables are normalized by the average of
1358        the reweighting factor on all configurations in weight.idl and not
1359        on the configurations in obs[i].idl. Default False.
1360    """
1361    result = []
1362    for i in range(len(obs)):
1363        if len(obs[i].cov_names):
1364            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1365        if not set(obs[i].names).issubset(weight.names):
1366            raise Exception('Error: Ensembles do not fit')
1367        for name in obs[i].names:
1368            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1369                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1370        new_samples = []
1371        w_deltas = {}
1372        for name in sorted(obs[i].names):
1373            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1374            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1375        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1376
1377        if kwargs.get('all_configs'):
1378            new_weight = weight
1379        else:
1380            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1381
1382        result.append(tmp_obs / new_weight)
1383        result[-1].reweighted = True
1384        result[-1].is_merged = obs[i].is_merged
1385
1386    return result
1387
1388
1389def correlate(obs_a, obs_b):
1390    """Correlate two observables.
1391
1392    Parameters
1393    ----------
1394    obs_a : Obs
1395        First observable
1396    obs_b : Obs
1397        Second observable
1398
1399    Notes
1400    -----
1401    Keep in mind to only correlate primary observables which have not been reweighted
1402    yet. The reweighting has to be applied after correlating the observables.
1403    Currently only works if ensembles are identical (this is not strictly necessary).
1404    """
1405
1406    if sorted(obs_a.names) != sorted(obs_b.names):
1407        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1408    if len(obs_a.cov_names) or len(obs_b.cov_names):
1409        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1410    for name in obs_a.names:
1411        if obs_a.shape[name] != obs_b.shape[name]:
1412            raise Exception('Shapes of ensemble', name, 'do not fit')
1413        if obs_a.idl[name] != obs_b.idl[name]:
1414            raise Exception('idl of ensemble', name, 'do not fit')
1415
1416    if obs_a.reweighted is True:
1417        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1418    if obs_b.reweighted is True:
1419        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1420
1421    new_samples = []
1422    new_idl = []
1423    for name in sorted(obs_a.names):
1424        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1425        new_idl.append(obs_a.idl[name])
1426
1427    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1428    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1429    o.reweighted = obs_a.reweighted or obs_b.reweighted
1430    return o
1431
1432
1433def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1434    r'''Calculates the error covariance matrix of a set of observables.
1435
1436    WARNING: This function should be used with care, especially for observables with support on multiple
1437             ensembles with differing autocorrelations. See the notes below for details.
1438
1439    The gamma method has to be applied first to all observables.
1440
1441    Parameters
1442    ----------
1443    obs : list or numpy.ndarray
1444        List or one dimensional array of Obs
1445    visualize : bool
1446        If True plots the corresponding normalized correlation matrix (default False).
1447    correlation : bool
1448        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1449    smooth : None or int
1450        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1451        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1452        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1453        small ones.
1454
1455    Notes
1456    -----
1457    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1458    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1459    in the absence of autocorrelation.
1460    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1461    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1462    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1463    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1464    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1465    '''
1466
1467    length = len(obs)
1468
1469    max_samples = np.max([o.N for o in obs])
1470    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1471        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1472
1473    cov = np.zeros((length, length))
1474    for i in range(length):
1475        for j in range(i, length):
1476            cov[i, j] = _covariance_element(obs[i], obs[j])
1477    cov = cov + cov.T - np.diag(np.diag(cov))
1478
1479    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1480
1481    if isinstance(smooth, int):
1482        corr = _smooth_eigenvalues(corr, smooth)
1483
1484    if visualize:
1485        plt.matshow(corr, vmin=-1, vmax=1)
1486        plt.set_cmap('RdBu')
1487        plt.colorbar()
1488        plt.draw()
1489
1490    if correlation is True:
1491        return corr
1492
1493    errors = [o.dvalue for o in obs]
1494    cov = np.diag(errors) @ corr @ np.diag(errors)
1495
1496    eigenvalues = np.linalg.eigh(cov)[0]
1497    if not np.all(eigenvalues >= 0):
1498        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1499
1500    return cov
1501
1502
1503def _smooth_eigenvalues(corr, E):
1504    """Eigenvalue smoothing as described in hep-lat/9412087
1505
1506    corr : np.ndarray
1507        correlation matrix
1508    E : integer
1509        Number of eigenvalues to be left substantially unchanged
1510    """
1511    if not (2 < E < corr.shape[0] - 1):
1512        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1513    vals, vec = np.linalg.eigh(corr)
1514    lambda_min = np.mean(vals[:-E])
1515    vals[vals < lambda_min] = lambda_min
1516    vals /= np.mean(vals)
1517    return vec @ np.diag(vals) @ vec.T
1518
1519
1520def _covariance_element(obs1, obs2):
1521    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1522
1523    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1524        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1525        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1526        return np.sum(deltas1 * deltas2)
1527
1528    if set(obs1.names).isdisjoint(set(obs2.names)):
1529        return 0.0
1530
1531    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1532        raise Exception('The gamma method has to be applied to both Obs first.')
1533
1534    dvalue = 0.0
1535
1536    for e_name in obs1.mc_names:
1537
1538        if e_name not in obs2.mc_names:
1539            continue
1540
1541        idl_d = {}
1542        for r_name in obs1.e_content[e_name]:
1543            if r_name not in obs2.e_content[e_name]:
1544                continue
1545            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1546
1547        gamma = 0.0
1548
1549        for r_name in obs1.e_content[e_name]:
1550            if r_name not in obs2.e_content[e_name]:
1551                continue
1552            if len(idl_d[r_name]) == 0:
1553                continue
1554            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1555
1556        if gamma == 0.0:
1557            continue
1558
1559        gamma_div = 0.0
1560        for r_name in obs1.e_content[e_name]:
1561            if r_name not in obs2.e_content[e_name]:
1562                continue
1563            if len(idl_d[r_name]) == 0:
1564                continue
1565            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1566        gamma /= gamma_div
1567
1568        dvalue += gamma
1569
1570    for e_name in obs1.cov_names:
1571
1572        if e_name not in obs2.cov_names:
1573            continue
1574
1575        dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)))
1576
1577    return dvalue
1578
1579
1580def import_jackknife(jacks, name, idl=None):
1581    """Imports jackknife samples and returns an Obs
1582
1583    Parameters
1584    ----------
1585    jacks : numpy.ndarray
1586        numpy array containing the mean value as zeroth entry and
1587        the N jackknife samples as first to Nth entry.
1588    name : str
1589        name of the ensemble the samples are defined on.
1590    """
1591    length = len(jacks) - 1
1592    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1593    samples = jacks[1:] @ prj
1594    mean = np.mean(samples)
1595    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1596    new_obs._value = jacks[0]
1597    return new_obs
1598
1599
1600def merge_obs(list_of_obs):
1601    """Combine all observables in list_of_obs into one new observable
1602
1603    Parameters
1604    ----------
1605    list_of_obs : list
1606        list of the Obs object to be combined
1607
1608    Notes
1609    -----
1610    It is not possible to combine obs which are based on the same replicum
1611    """
1612    replist = [item for obs in list_of_obs for item in obs.names]
1613    if (len(replist) == len(set(replist))) is False:
1614        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1615    if any([len(o.cov_names) for o in list_of_obs]):
1616        raise Exception('Not possible to merge data that contains covobs!')
1617    new_dict = {}
1618    idl_dict = {}
1619    for o in list_of_obs:
1620        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1621                        for key in set(o.deltas) | set(o.r_values)})
1622        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1623
1624    names = sorted(new_dict.keys())
1625    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1626    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1627    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1628    return o
1629
1630
1631def cov_Obs(means, cov, name, grad=None):
1632    """Create an Obs based on mean(s) and a covariance matrix
1633
1634    Parameters
1635    ----------
1636    mean : list of floats or float
1637        N mean value(s) of the new Obs
1638    cov : list or array
1639        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1640    name : str
1641        identifier for the covariance matrix
1642    grad : list or array
1643        Gradient of the Covobs wrt. the means belonging to cov.
1644    """
1645
1646    def covobs_to_obs(co):
1647        """Make an Obs out of a Covobs
1648
1649        Parameters
1650        ----------
1651        co : Covobs
1652            Covobs to be embedded into the Obs
1653        """
1654        o = Obs([], [], means=[])
1655        o._value = co.value
1656        o.names.append(co.name)
1657        o._covobs[co.name] = co
1658        o._dvalue = np.sqrt(co.errsq())
1659        return o
1660
1661    ol = []
1662    if isinstance(means, (float, int)):
1663        means = [means]
1664
1665    for i in range(len(means)):
1666        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1667    if ol[0].covobs[name].N != len(means):
1668        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1669    if len(ol) == 1:
1670        return ol[0]
1671    return ol

class CObs: View Source

871class CObs:
872    """Class for a complex valued observable."""
873    __slots__ = ['_real', '_imag', 'tag']
874
875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None
879
880    @property
881    def real(self):
882        return self._real
883
884    @property
885    def imag(self):
886        return self._imag
887
888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)
894
895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0
898
899    def conjugate(self):
900        return CObs(self.real, -self.imag)
901
902    def __add__(self, other):
903        if isinstance(other, np.ndarray):
904            return other + self
905        elif hasattr(other, 'real') and hasattr(other, 'imag'):
906            return CObs(self.real + other.real,
907                        self.imag + other.imag)
908        else:
909            return CObs(self.real + other, self.imag)
910
911    def __radd__(self, y):
912        return self + y
913
914    def __sub__(self, other):
915        if isinstance(other, np.ndarray):
916            return -1 * (other - self)
917        elif hasattr(other, 'real') and hasattr(other, 'imag'):
918            return CObs(self.real - other.real, self.imag - other.imag)
919        else:
920            return CObs(self.real - other, self.imag)
921
922    def __rsub__(self, other):
923        return -1 * (self - other)
924
925    def __mul__(self, other):
926        if isinstance(other, np.ndarray):
927            return other * self
928        elif hasattr(other, 'real') and hasattr(other, 'imag'):
929            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
930                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
931                                               [self.real, other.real, self.imag, other.imag],
932                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
933                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
934                                               [self.real, other.real, self.imag, other.imag],
935                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
936            elif getattr(other, 'imag', 0) != 0:
937                return CObs(self.real * other.real - self.imag * other.imag,
938                            self.imag * other.real + self.real * other.imag)
939            else:
940                return CObs(self.real * other.real, self.imag * other.real)
941        else:
942            return CObs(self.real * other, self.imag * other)
943
944    def __rmul__(self, other):
945        return self * other
946
947    def __truediv__(self, other):
948        if isinstance(other, np.ndarray):
949            return 1 / (other / self)
950        elif hasattr(other, 'real') and hasattr(other, 'imag'):
951            r = other.real ** 2 + other.imag ** 2
952            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
953        else:
954            return CObs(self.real / other, self.imag / other)
955
956    def __rtruediv__(self, other):
957        r = self.real ** 2 + self.imag ** 2
958        if hasattr(other, 'real') and hasattr(other, 'imag'):
959            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
960        else:
961            return CObs(self.real * other / r, -self.imag * other / r)
962
963    def __abs__(self):
964        return np.sqrt(self.real**2 + self.imag**2)
965
966    def __pos__(self):
967        return self
968
969    def __neg__(self):
970        return -1 * self
971
972    def __eq__(self, other):
973        return self.real == other.real and self.imag == other.imag
974
975    def __str__(self):
976        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
977
978    def __repr__(self):
979        return 'CObs[' + str(self) + ']'

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

875    def __init__(self, real, imag=0.0):
876        self._real = real
877        self._imag = imag
878        self.tag = None

def gamma_method(self, **kwargs): View Source

888    def gamma_method(self, **kwargs):
889        """Executes the gamma_method for the real and the imaginary part."""
890        if isinstance(self.real, Obs):
891            self.real.gamma_method(**kwargs)
892        if isinstance(self.imag, Obs):
893            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

895    def is_zero(self):
896        """Checks whether both real and imaginary part are zero within machine precision."""
897        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

899    def conjugate(self):
900        return CObs(self.real, -self.imag)

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1133def derived_observable(func, data, array_mode=False, **kwargs):
1134    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1135
1136    Parameters
1137    ----------
1138    func : object
1139        arbitrary function of the form func(data, **kwargs). For the
1140        automatic differentiation to work, all numpy functions have to have
1141        the autograd wrapper (use 'import autograd.numpy as anp').
1142    data : list
1143        list of Obs, e.g. [obs1, obs2, obs3].
1144    num_grad : bool
1145        if True, numerical derivatives are used instead of autograd
1146        (default False). To control the numerical differentiation the
1147        kwargs of numdifftools.step_generators.MaxStepGenerator
1148        can be used.
1149    man_grad : list
1150        manually supply a list or an array which contains the jacobian
1151        of func. Use cautiously, supplying the wrong derivative will
1152        not be intercepted.
1153
1154    Notes
1155    -----
1156    For simple mathematical operations it can be practical to use anonymous
1157    functions. For the ratio of two observables one can e.g. use
1158
1159    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1160    """
1161
1162    data = np.asarray(data)
1163    raveled_data = data.ravel()
1164
1165    # Workaround for matrix operations containing non Obs data
1166    if not all(isinstance(x, Obs) for x in raveled_data):
1167        for i in range(len(raveled_data)):
1168            if isinstance(raveled_data[i], (int, float)):
1169                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1170
1171    allcov = {}
1172    for o in raveled_data:
1173        for name in o.cov_names:
1174            if name in allcov:
1175                if not np.allclose(allcov[name], o.covobs[name].cov):
1176                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1177            else:
1178                allcov[name] = o.covobs[name].cov
1179
1180    n_obs = len(raveled_data)
1181    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1182    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1183    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1184
1185    is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names}
1186    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1187
1188    if data.ndim == 1:
1189        values = np.array([o.value for o in data])
1190    else:
1191        values = np.vectorize(lambda x: x.value)(data)
1192
1193    new_values = func(values, **kwargs)
1194
1195    multi = int(isinstance(new_values, np.ndarray))
1196
1197    new_r_values = {}
1198    new_idl_d = {}
1199    for name in new_sample_names:
1200        idl = []
1201        tmp_values = np.zeros(n_obs)
1202        for i, item in enumerate(raveled_data):
1203            tmp_values[i] = item.r_values.get(name, item.value)
1204            tmp_idl = item.idl.get(name)
1205            if tmp_idl is not None:
1206                idl.append(tmp_idl)
1207        if multi > 0:
1208            tmp_values = np.array(tmp_values).reshape(data.shape)
1209        new_r_values[name] = func(tmp_values, **kwargs)
1210        new_idl_d[name] = _merge_idx(idl)
1211        if not is_merged[name]:
1212            is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]])))
1213
1214    if 'man_grad' in kwargs:
1215        deriv = np.asarray(kwargs.get('man_grad'))
1216        if new_values.shape + data.shape != deriv.shape:
1217            raise Exception('Manual derivative does not have correct shape.')
1218    elif kwargs.get('num_grad') is True:
1219        if multi > 0:
1220            raise Exception('Multi mode currently not supported for numerical derivative')
1221        options = {
1222            'base_step': 0.1,
1223            'step_ratio': 2.5}
1224        for key in options.keys():
1225            kwarg = kwargs.get(key)
1226            if kwarg is not None:
1227                options[key] = kwarg
1228        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1229        if tmp_df.size == 1:
1230            deriv = np.array([tmp_df.real])
1231        else:
1232            deriv = tmp_df.real
1233    else:
1234        deriv = jacobian(func)(values, **kwargs)
1235
1236    final_result = np.zeros(new_values.shape, dtype=object)
1237
1238    if array_mode is True:
1239
1240        class _Zero_grad():
1241            def __init__(self, N):
1242                self.grad = np.zeros((N, 1))
1243
1244        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1245        d_extracted = {}
1246        g_extracted = {}
1247        for name in new_sample_names:
1248            d_extracted[name] = []
1249            ens_length = len(new_idl_d[name])
1250            for i_dat, dat in enumerate(data):
1251                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1252        for name in new_cov_names:
1253            g_extracted[name] = []
1254            zero_grad = _Zero_grad(new_covobs_lengths[name])
1255            for i_dat, dat in enumerate(data):
1256                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1257
1258    for i_val, new_val in np.ndenumerate(new_values):
1259        new_deltas = {}
1260        new_grad = {}
1261        if array_mode is True:
1262            for name in new_sample_names:
1263                ens_length = d_extracted[name][0].shape[-1]
1264                new_deltas[name] = np.zeros(ens_length)
1265                for i_dat, dat in enumerate(d_extracted[name]):
1266                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1267            for name in new_cov_names:
1268                new_grad[name] = 0
1269                for i_dat, dat in enumerate(g_extracted[name]):
1270                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1271        else:
1272            for j_obs, obs in np.ndenumerate(data):
1273                for name in obs.names:
1274                    if name in obs.cov_names:
1275                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1276                    else:
1277                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
1278
1279        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1280
1281        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1282            raise Exception('The same name has been used for deltas and covobs!')
1283        new_samples = []
1284        new_means = []
1285        new_idl = []
1286        new_names_obs = []
1287        for name in new_names:
1288            if name not in new_covobs:
1289                if is_merged[name]:
1290                    filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
1291                else:
1292                    filtered_deltas = new_deltas[name]
1293                    filtered_idl_d = new_idl_d[name]
1294
1295                new_samples.append(filtered_deltas)
1296                new_idl.append(filtered_idl_d)
1297                new_means.append(new_r_values[name][i_val])
1298                new_names_obs.append(name)
1299        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1300        for name in new_covobs:
1301            final_result[i_val].names.append(name)
1302        final_result[i_val]._covobs = new_covobs
1303        final_result[i_val]._value = new_val
1304        final_result[i_val].is_merged = is_merged
1305        final_result[i_val].reweighted = reweighted
1306
1307    if multi == 0:
1308        final_result = final_result.item()
1309
1310    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1347def reweight(weight, obs, **kwargs):
1348    """Reweight a list of observables.
1349
1350    Parameters
1351    ----------
1352    weight : Obs
1353        Reweighting factor. An Observable that has to be defined on a superset of the
1354        configurations in obs[i].idl for all i.
1355    obs : list
1356        list of Obs, e.g. [obs1, obs2, obs3].
1357    all_configs : bool
1358        if True, the reweighted observables are normalized by the average of
1359        the reweighting factor on all configurations in weight.idl and not
1360        on the configurations in obs[i].idl. Default False.
1361    """
1362    result = []
1363    for i in range(len(obs)):
1364        if len(obs[i].cov_names):
1365            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1366        if not set(obs[i].names).issubset(weight.names):
1367            raise Exception('Error: Ensembles do not fit')
1368        for name in obs[i].names:
1369            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1370                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1371        new_samples = []
1372        w_deltas = {}
1373        for name in sorted(obs[i].names):
1374            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1375            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1376        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1377
1378        if kwargs.get('all_configs'):
1379            new_weight = weight
1380        else:
1381            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1382
1383        result.append(tmp_obs / new_weight)
1384        result[-1].reweighted = True
1385        result[-1].is_merged = obs[i].is_merged
1386
1387    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1390def correlate(obs_a, obs_b):
1391    """Correlate two observables.
1392
1393    Parameters
1394    ----------
1395    obs_a : Obs
1396        First observable
1397    obs_b : Obs
1398        Second observable
1399
1400    Notes
1401    -----
1402    Keep in mind to only correlate primary observables which have not been reweighted
1403    yet. The reweighting has to be applied after correlating the observables.
1404    Currently only works if ensembles are identical (this is not strictly necessary).
1405    """
1406
1407    if sorted(obs_a.names) != sorted(obs_b.names):
1408        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1409    if len(obs_a.cov_names) or len(obs_b.cov_names):
1410        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1411    for name in obs_a.names:
1412        if obs_a.shape[name] != obs_b.shape[name]:
1413            raise Exception('Shapes of ensemble', name, 'do not fit')
1414        if obs_a.idl[name] != obs_b.idl[name]:
1415            raise Exception('idl of ensemble', name, 'do not fit')
1416
1417    if obs_a.reweighted is True:
1418        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1419    if obs_b.reweighted is True:
1420        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1421
1422    new_samples = []
1423    new_idl = []
1424    for name in sorted(obs_a.names):
1425        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1426        new_idl.append(obs_a.idl[name])
1427
1428    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1429    o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names}
1430    o.reweighted = obs_a.reweighted or obs_b.reweighted
1431    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1434def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1435    r'''Calculates the error covariance matrix of a set of observables.
1436
1437    WARNING: This function should be used with care, especially for observables with support on multiple
1438             ensembles with differing autocorrelations. See the notes below for details.
1439
1440    The gamma method has to be applied first to all observables.
1441
1442    Parameters
1443    ----------
1444    obs : list or numpy.ndarray
1445        List or one dimensional array of Obs
1446    visualize : bool
1447        If True plots the corresponding normalized correlation matrix (default False).
1448    correlation : bool
1449        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1450    smooth : None or int
1451        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1452        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1453        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1454        small ones.
1455
1456    Notes
1457    -----
1458    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1459    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1460    in the absence of autocorrelation.
1461    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1462    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1463    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1464    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1465    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1466    '''
1467
1468    length = len(obs)
1469
1470    max_samples = np.max([o.N for o in obs])
1471    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1472        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1473
1474    cov = np.zeros((length, length))
1475    for i in range(length):
1476        for j in range(i, length):
1477            cov[i, j] = _covariance_element(obs[i], obs[j])
1478    cov = cov + cov.T - np.diag(np.diag(cov))
1479
1480    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1481
1482    if isinstance(smooth, int):
1483        corr = _smooth_eigenvalues(corr, smooth)
1484
1485    if visualize:
1486        plt.matshow(corr, vmin=-1, vmax=1)
1487        plt.set_cmap('RdBu')
1488        plt.colorbar()
1489        plt.draw()
1490
1491    if correlation is True:
1492        return corr
1493
1494    errors = [o.dvalue for o in obs]
1495    cov = np.diag(errors) @ corr @ np.diag(errors)
1496
1497    eigenvalues = np.linalg.eigh(cov)[0]
1498    if not np.all(eigenvalues >= 0):
1499        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1500
1501    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1581def import_jackknife(jacks, name, idl=None):
1582    """Imports jackknife samples and returns an Obs
1583
1584    Parameters
1585    ----------
1586    jacks : numpy.ndarray
1587        numpy array containing the mean value as zeroth entry and
1588        the N jackknife samples as first to Nth entry.
1589    name : str
1590        name of the ensemble the samples are defined on.
1591    """
1592    length = len(jacks) - 1
1593    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1594    samples = jacks[1:] @ prj
1595    mean = np.mean(samples)
1596    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1597    new_obs._value = jacks[0]
1598    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def merge_obs(list_of_obs): View Source

1601def merge_obs(list_of_obs):
1602    """Combine all observables in list_of_obs into one new observable
1603
1604    Parameters
1605    ----------
1606    list_of_obs : list
1607        list of the Obs object to be combined
1608
1609    Notes
1610    -----
1611    It is not possible to combine obs which are based on the same replicum
1612    """
1613    replist = [item for obs in list_of_obs for item in obs.names]
1614    if (len(replist) == len(set(replist))) is False:
1615        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1616    if any([len(o.cov_names) for o in list_of_obs]):
1617        raise Exception('Not possible to merge data that contains covobs!')
1618    new_dict = {}
1619    idl_dict = {}
1620    for o in list_of_obs:
1621        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1622                        for key in set(o.deltas) | set(o.r_values)})
1623        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1624
1625    names = sorted(new_dict.keys())
1626    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1627    o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
1628    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1629    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1632def cov_Obs(means, cov, name, grad=None):
1633    """Create an Obs based on mean(s) and a covariance matrix
1634
1635    Parameters
1636    ----------
1637    mean : list of floats or float
1638        N mean value(s) of the new Obs
1639    cov : list or array
1640        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1641    name : str
1642        identifier for the covariance matrix
1643    grad : list or array
1644        Gradient of the Covobs wrt. the means belonging to cov.
1645    """
1646
1647    def covobs_to_obs(co):
1648        """Make an Obs out of a Covobs
1649
1650        Parameters
1651        ----------
1652        co : Covobs
1653            Covobs to be embedded into the Obs
1654        """
1655        o = Obs([], [], means=[])
1656        o._value = co.value
1657        o.names.append(co.name)
1658        o._covobs[co.name] = co
1659        o._dvalue = np.sqrt(co.errsq())
1660        return o
1661
1662    ol = []
1663    if isinstance(means, (float, int)):
1664        means = [means]
1665
1666    for i in range(len(means)):
1667        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1668    if ol[0].covobs[name].N != len(means):
1669        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1670    if len(ol) == 1:
1671        return ol[0]
1672    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.