diff --git a/pyerrors/__init__.py b/pyerrors/__init__.py index 4ef64352..2f194514 100644 --- a/pyerrors/__init__.py +++ b/pyerrors/__init__.py @@ -5,8 +5,8 @@ It is based on the **gamma method** [arXiv:hep-lat/0306017](https://arxiv.org/ab - **automatic differentiation** as suggested in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) (partly based on the [autograd](https://github.com/HIPS/autograd) package) - **treatment of slow modes** in the simulation as suggested in [arXiv:1009.5228](https://arxiv.org/abs/1009.5228) - coherent **error propagation** for data from **different Markov chains** -- **non-linear fits with x- and y-errors** and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289] -- **real and complex matrix operations** and their error propagation based on automatic differentiation (cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...) +- **non-linear fits with x- and y-errors** and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) +- **real and complex matrix operations** and their error propagation based on automatic differentiation (Cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...) ## Getting started @@ -15,15 +15,21 @@ import numpy as np import pyerrors as pe my_obs = pe.Obs([samples], ['ensemble_name']) -my_new_obs = 2 * np.log(my_obs) / my_obs +my_new_obs = 2 * np.log(my_obs) / my_obs ** 2 my_new_obs.gamma_method() -my_new_obs.details() print(my_new_obs) +> 0.31498(72) + +iamzero = my_new_obs - my_new_obs +iamzero.gamma_method() +print(iamzero == 0.0) +> True ``` + # The `Obs` class `pyerrors` introduces a new datatype, `Obs`, which simplifies error propagation and estimation for auto- and cross-correlated data. -An `Obs` object can be initialized with two arguments, the first is a list containining the samples for an Observable from a Monte Carlo chain. +An `Obs` object can be initialized with two arguments, the first is a list containing the samples for an Observable from a Monte Carlo chain. The samples can either be provided as python list or as numpy array. The second argument is a list containing the names of the respective Monte Carlo chains as strings. These strings uniquely identify a Monte Carlo chain/ensemble. @@ -59,15 +65,62 @@ my_m_eff = np.log(my_obs1 / my_obs2) ## Error estimation -The error propagation is based on the gamma method introduced in [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). +The error estimation within `pyerrors` is based on the gamma method introduced in [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). +After having arrived at the derived quantity of interest the `gamma_method` can be called as detailed in the following example. +Example: +```python +my_sum.gamma_method() +print(my_sum) +> 1.70(57) +my_sum.details() +> Result 1.70000000e+00 +/- 5.72046658e-01 +/- 7.56746598e-02 (33.650%) +> t_int 2.71422900e+00 +/- 6.40320983e-01 S = 2.00 +> 1000 samples in 1 ensemble: +> · Ensemble 'ensemble_name' : 1000 configurations (from 1 to 1000) + +``` + +The standard value for the automatic windowing procedure is $S=2$. Other values for $S$ can be passed to the `gamma_method` as parameter. + +Example: +```python +my_sum.gamma_method(S=3.0) +my_sum.details() +> Result 1.70000000e+00 +/- 6.30675201e-01 +/- 1.04585650e-01 (37.099%) +> t_int 3.29909703e+00 +/- 9.77310102e-01 S = 3.00 +> 1000 samples in 1 ensemble: +> · Ensemble 'ensemble_name' : 1000 configurations (from 1 to 1000) + +``` + +The integrated autocorrelation time $\tau_\mathrm{int}$ and the autocorrelation function $\rho(W)$ can be monitored via the methods ´pyerrors.obs.Obs.plot_tauint` and ´pyerrors.obs.Obs.plot_tauint`. + +Example: +```python +my_sum.plot_tauint() +my_sum.plot_rho() +``` + +### Exponential tails + +Slow modes in the Monte Carlo history can be accounted for by attaching an exponential tail to the autocorrelation function $\rho$ as suggested in [arXiv:1009.5228](https://arxiv.org/abs/1009.5228). The longest autocorrelation time in the history, $\tau_\mathrm{exp}$, can be passed to the `gamma_method` as parameter. In this case the automatic windowing procedure is vacated and the parameter $S$ does not affect the error estimate. + +Example: +```python +my_sum.gamma_method(tau_exp=7.2) +my_sum.details() +> Result 1.70000000e+00 +/- 6.28097762e-01 +/- 5.79077524e-02 (36.947%) +> t_int 3.27218667e+00 +/- 7.99583654e-01 tau_exp = 7.20, N_sigma = 1 +> 1000 samples in 1 ensemble: +> · Ensemble 'ensemble_name' : 1000 configurations (from 1 to 1000) +``` For the full API see `pyerrors.obs.Obs.gamma_method` -### Exponential tails ## Multiple ensembles/replica -Error propagation for multiple ensembles (Markov chains with different simulation parameters) is handeled automatically. Ensembles are uniquely identified by their `name`. +Error propagation for multiple ensembles (Markov chains with different simulation parameters) is handled automatically. Ensembles are uniquely identified by their `name`. Example: ```python @@ -76,29 +129,46 @@ obs2 = pe.Obs([samples2], ['ensemble2']) my_sum = obs1 + obs2 my_sum.details() -> Result 2.00596631e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) +> Result 2.00697958e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) > 1500 samples in 2 ensembles: -> ensemble1: ['ensemble1'] -> ensemble2: ['ensemble2'] - +> · Ensemble 'ensemble1' : 1000 configurations (from 1 to 1000) +> · Ensemble 'ensemble2' : 500 configurations (from 1 to 500) ``` -`pyerrors` identifies multiple replica (independent Markov chains with identical simulation parameters) by the vertical bar `|` in the name of the dataset. +`pyerrors` identifies multiple replica (independent Markov chains with identical simulation parameters) by the vertical bar `|` in the name of the data set. Example: ```python obs1 = pe.Obs([samples1], ['ensemble1|r01']) obs2 = pe.Obs([samples2], ['ensemble1|r02']) -my_sum = obs1 + obs2 -my_sum.details() -> Result 2.00596631e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) +> my_sum = obs1 + obs2 +> my_sum.details() +> Result 2.00697958e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) > 1500 samples in 1 ensemble: -> ensemble1: ['ensemble1|r01', 'ensemble1|r02'] +> · Ensemble 'ensemble1' +> · Replicum 'r01' : 1000 configurations (from 1 to 1000) +> · Replicum 'r02' : 500 configurations (from 1 to 500) ``` + +### Error estimation for multiple ensembles + +In order to keep track of different error analysis parameters for different ensembles one can make use of global dictionaries as detailed in the following example. + +Example: +```python +pe.Obs.S_dict['ensemble1'] = 2.5 +pe.Obs.tau_exp_dict['ensemble2'] = 8.0 +pe.Obs.tau_exp_dict['ensemble3'] = 2.0 +``` + +In case the `gamma_method` is called without any parameters it will use the values specified in the dictionaries for the respective ensembles. +Passing arguments to the `gamma_method` still dominates over the dictionaries. + + ## Irregular Monte Carlo chains -Irregular Monte Carlo chains can be initilized with the parameter `idl`. +Irregular Monte Carlo chains can be initialized with the parameter `idl`. Example: ```python @@ -113,7 +183,7 @@ obs3 = pe.Obs([samples3], ['ensemble1'], idl=[[2, 9, 28, 29, 501]]) ``` **Warning:** Irregular Monte Carlo chains can result in odd patterns in the autocorrelation functions. -Make sure to check the with e.g. `pyerrors.obs.Obs.plot_rho` or `pyerrors.obs.Obs.plot_tauint`. +Make sure to check the autocorrelation time with e.g. `pyerrors.obs.Obs.plot_rho` or `pyerrors.obs.Obs.plot_tauint`. For the full API see `pyerrors.obs.Obs` @@ -121,7 +191,28 @@ For the full API see `pyerrors.obs.Obs` For the full API see `pyerrors.correlators.Corr` # Complex observables -`pyerrors.obs.CObs` + +`pyerrors` can handle complex valued observables via the class `pyerrors.obs.CObs`. +`CObs` are initialized with a real and an imaginary part which both can be `Obs` valued. + +Example: +```python +my_real_part = pe.Obs([samples1], ['ensemble1']) +my_imag_part = pe.Obs([samples2], ['ensemble1']) + +my_cobs = pe.CObs(my_real_part, my_imag_part) +my_cobs.gamma_method() +print(my_cobs) +> (0.9959(91)+0.659(28)j) +``` + +Elementary mathematical operations are overloaded and samples are properly propagated as for the `Obs` class. +```python +my_derived_cobs = (my_cobs + my_cobs.conjugate()) / np.abs(my_cobs) +my_derived_cobs.gamma_method() +print(my_derived_cobs) +> (1.668(23)+0.0j) +``` # Optimization / fits / roots `pyerrors.fits` @@ -130,6 +221,13 @@ For the full API see `pyerrors.correlators.Corr` # Matrix operations `pyerrors.linalg` +# Export data +The preferred exported file format within `pyerrors` is + +## Jackknife samples +For comparison with other analysis workflows `pyerrors` can generate jackknife samples from an `Obs` object. +See `pyerrors.obs.Obs.export_jackknife` for details. + # Input `pyerrors.input` ''' @@ -137,6 +235,7 @@ from .obs import * from .correlators import * from .fits import * from . import dirac +from . import input from . import linalg from . import misc from . import mpm diff --git a/pyerrors/correlators.py b/pyerrors/correlators.py index c2ae3eda..7e333cdb 100644 --- a/pyerrors/correlators.py +++ b/pyerrors/correlators.py @@ -14,7 +14,7 @@ class Corr: Everything, this class does, can be achieved using lists or arrays of Obs. But it is simply more convenient to have a dedicated object for correlators. - One often wants to add or multiply correlators of the same length at every timeslice and it is inconvinient + One often wants to add or multiply correlators of the same length at every timeslice and it is inconvenient to iterate over all timeslices for every operation. This is especially true, when dealing with smearing matrices. The correlator can have two types of content: An Obs at every timeslice OR a GEVP @@ -79,16 +79,16 @@ class Corr: else: raise Exception("Reweighting status of correlator corrupted.") - def gamma_method(self): + def gamma_method(self, **kwargs): """Apply the gamma method to the content of the Corr.""" for item in self.content: if not(item is None): if self.N == 1: - item[0].gamma_method() + item[0].gamma_method(**kwargs) else: for i in range(self.N): for j in range(self.N): - item[i, j].gamma_method() + item[i, j].gamma_method(**kwargs) # We need to project the Correlator with a Vector to get a single value at each timeslice. # The method can use one or two vectors. @@ -238,7 +238,15 @@ class Corr: return Corr(self.content[::-1]) def correlate(self, partner): - """Correlate the correlator with another correlator or Obs""" + """Correlate the correlator with another correlator or Obs + + Parameters + ---------- + partner : Obs or Corr + partner to correlate the correlator with. + Can either be an Obs which is correlated with all entries of the + correlator or a Corr of same length. + """ new_content = [] for x0, t_slice in enumerate(self.content): if t_slice is None: @@ -309,7 +317,7 @@ class Corr: Parameters ---------- symmetric : bool - decides whether symmertic of simple finite differences are used. Default: True + decides whether symmetric of simple finite differences are used. Default: True """ if not symmetric: newcontent = [] @@ -350,10 +358,11 @@ class Corr: Parameters ---------- variant : str - log: uses the standard effective mass log(C(t) / C(t+1)) - cosh : Use periodicitiy of the correlator by solving C(t) / C(t+1) = cosh(m * (t - T/2)) / cosh(m * (t + 1 - T/2)) for m. + log : uses the standard effective mass log(C(t) / C(t+1)) + cosh, periodic : Use periodicitiy of the correlator by solving C(t) / C(t+1) = cosh(m * (t - T/2)) / cosh(m * (t + 1 - T/2)) for m. sinh : Use anti-periodicitiy of the correlator by solving C(t) / C(t+1) = sinh(m * (t - T/2)) / sinh(m * (t + 1 - T/2)) for m. See, e.g., arXiv:1205.5380 + arccosh : Uses the explicit form of the symmetrized correlator (not recommended) guess : float guess for the root finder, only relevant for the root variant """ @@ -406,7 +415,7 @@ class Corr: return np.arccosh(Corr(newcontent, padding_back=1, padding_front=1)) else: - raise Exception('Unkown variant.') + raise Exception('Unknown variant.') def fit(self, function, fitrange=None, silent=False, **kwargs): """Fits function to the data @@ -424,7 +433,7 @@ class Corr: if self.N != 1: raise Exception("Correlator must be projected before fitting") - # The default behaviour is: + # The default behavior is: # 1 use explicit fitrange # if none is provided, use the range of the corr # if this is also not set, use the whole length of the corr (This could come with a warning!) @@ -442,7 +451,7 @@ class Corr: return result def plateau(self, plateau_range=None, method="fit"): - """ Extract a plateu value from a Corr object + """ Extract a plateau value from a Corr object Parameters ---------- @@ -573,7 +582,7 @@ class Corr: return def dump(self, filename): - """Dumps the Corr into a pickel file + """Dumps the Corr into a pickle file Parameters ---------- diff --git a/pyerrors/fits.py b/pyerrors/fits.py index d3dfab4a..1b595883 100644 --- a/pyerrors/fits.py +++ b/pyerrors/fits.py @@ -21,7 +21,7 @@ class Fit_result(Sequence): ---------- fit_parameters : list results for the individual fit parameters, - also accesible via indices. + also accessible via indices. """ def __init__(self): @@ -456,7 +456,7 @@ def _standard_fit(x, y, func, silent=False, **kwargs): raise Exception('x and y input have to have the same length') if len(x.shape) > 2: - raise Exception('Unkown format for x values') + raise Exception('Unknown format for x values') if not callable(func): raise TypeError('func has to be a function.') diff --git a/pyerrors/input/hadrons.py b/pyerrors/input/hadrons.py index c184ad8f..5f3d2646 100644 --- a/pyerrors/input/hadrons.py +++ b/pyerrors/input/hadrons.py @@ -38,7 +38,7 @@ def _get_files(path, filestem): if not all(np.diff(cnfg_numbers) == np.diff(cnfg_numbers)[0]): raise Exception('Configurations are not evenly spaced.') - return files + return files, cnfg_numbers def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): @@ -61,7 +61,7 @@ def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): from other modules with similar structures. """ - files = _get_files(path, filestem) + files, cnfg_numbers = _get_files(path, filestem) corr_data = [] infos = [] @@ -78,7 +78,7 @@ def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): l_obs = [] for c in corr_data.T: - l_obs.append(Obs([c], [ens_id])) + l_obs.append(Obs([c], [ens_id], idl=[cnfg_numbers])) corr = Corr(l_obs) corr.tag = r", ".join(infos) @@ -98,7 +98,7 @@ def read_ExternalLeg_hd5(path, filestem, ens_id, order='F'): 'C' for the last index changing fastest (16 3x3 matrices), """ - files = _get_files(path, filestem) + files, cnfg_numbers = _get_files(path, filestem) mom = None @@ -118,8 +118,8 @@ def read_ExternalLeg_hd5(path, filestem, ens_id, order='F'): matrix = np.empty((rolled_array.shape[:-1]), dtype=object) for si, sj, ci, cj in np.ndindex(rolled_array.shape[:-1]): - real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id]) - imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id]) + real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[cnfg_numbers]) + imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[cnfg_numbers]) matrix[si, sj, ci, cj] = CObs(real, imag) matrix[si, sj, ci, cj].gamma_method() @@ -139,7 +139,7 @@ def read_Bilinear_hd5(path, filestem, ens_id, order='F'): 'C' for the last index changing fastest (16 3x3 matrices), """ - files = _get_files(path, filestem) + files, cnfg_numbers = _get_files(path, filestem) mom_in = None mom_out = None @@ -173,8 +173,8 @@ def read_Bilinear_hd5(path, filestem, ens_id, order='F'): matrix = np.empty((rolled_array.shape[:-1]), dtype=object) for si, sj, ci, cj in np.ndindex(rolled_array.shape[:-1]): - real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id]) - imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id]) + real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[cnfg_numbers]) + imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[cnfg_numbers]) matrix[si, sj, ci, cj] = CObs(real, imag) matrix[si, sj, ci, cj].gamma_method() diff --git a/pyerrors/linalg.py b/pyerrors/linalg.py index 210f79fa..d993f291 100644 --- a/pyerrors/linalg.py +++ b/pyerrors/linalg.py @@ -1,25 +1,27 @@ import numpy as np from autograd import jacobian import autograd.numpy as anp # Thinly-wrapped numpy -from .obs import derived_observable, CObs, Obs +from .obs import derived_observable, CObs, Obs, _merge_idx, _expand_deltas_for_merge, _filter_zeroes from functools import partial from autograd.extend import defvjp def derived_array(func, data, **kwargs): - """Construct a derived Obs according to func(data, **kwargs) of matrix value data - using automatic differentiation. + """Construct a derived Obs for a matrix valued function according to func(data, **kwargs) using automatic differentiation. Parameters ---------- - func -- arbitrary function of the form func(data, **kwargs). For the - automatic differentiation to work, all numpy functions have to have - the autograd wrapper (use 'import autograd.numpy as anp'). - data -- list of Obs, e.g. [obs1, obs2, obs3]. - man_grad -- manually supply a list or an array which contains the jacobian - of func. Use cautiously, supplying the wrong derivative will - not be intercepted. + func : object + arbitrary function of the form func(data, **kwargs). For the + automatic differentiation to work, all numpy functions have to have + the autograd wrapper (use 'import autograd.numpy as anp'). + data : list + list of Obs, e.g. [obs1, obs2, obs3]. + man_grad : list + manually supply a list or an array which contains the jacobian + of func. Use cautiously, supplying the wrong derivative will + not be intercepted. """ data = np.asarray(data) @@ -30,25 +32,29 @@ def derived_array(func, data, **kwargs): if isinstance(i_data, Obs): first_name = i_data.names[0] first_shape = i_data.shape[first_name] + first_idl = i_data.idl[first_name] break for i in range(len(raveled_data)): if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name]) + raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl]) n_obs = len(raveled_data) new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) - new_shape = {} - for i_data in raveled_data: - for name in new_names: - tmp = i_data.shape.get(name) + is_merged = len(list(filter(lambda o: o.is_merged is True, raveled_data))) > 0 + reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 + new_idl_d = {} + for name in new_names: + idl = [] + for i_data in raveled_data: + tmp = i_data.idl.get(name) if tmp is not None: - if new_shape.get(name) is None: - new_shape[name] = tmp - else: - if new_shape[name] != tmp: - raise Exception('Shapes of ensemble', name, 'do not match.') + idl.append(tmp) + new_idl_d[name] = _merge_idx(idl) + if not is_merged: + is_merged = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) + if data.ndim == 1: values = np.array([o.value for o in data]) else: @@ -71,8 +77,6 @@ def derived_array(func, data, **kwargs): deriv = np.asarray(kwargs.get('man_grad')) if new_values.shape + data.shape != deriv.shape: raise Exception('Manual derivative does not have correct shape.') - elif kwargs.get('num_grad') is True: - raise Exception('Multi mode currently not supported for numerical derivative') else: deriv = jacobian(func)(values, **kwargs) @@ -82,8 +86,8 @@ def derived_array(func, data, **kwargs): for name in new_names: d_extracted[name] = [] for i_dat, dat in enumerate(data): - ens_length = dat.ravel()[0].shape[name] - d_extracted[name].append(np.array([o.deltas[name] for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) + ens_length = len(new_idl_d[name]) + d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas[name], o.idl[name], o.shape[name], new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) for i_val, new_val in np.ndenumerate(new_values): new_deltas = {} @@ -95,12 +99,21 @@ def derived_array(func, data, **kwargs): new_samples = [] new_means = [] - for name in new_names: - new_samples.append(new_deltas[name]) + new_idl = [] + if is_merged: + filtered_names, filtered_deltas, filtered_idl_d = _filter_zeroes(new_names, new_deltas, new_idl_d) + else: + filtered_names = new_names + filtered_deltas = new_deltas + filtered_idl_d = new_idl_d + for name in filtered_names: + new_samples.append(filtered_deltas[name]) new_means.append(new_r_values[name][i_val]) - - final_result[i_val] = Obs(new_samples, new_names, means=new_means) + new_idl.append(filtered_idl_d[name]) + final_result[i_val] = Obs(new_samples, filtered_names, means=new_means, idl=new_idl) final_result[i_val]._value = new_val + final_result[i_val].is_merged = is_merged + final_result[i_val].reweighted = reweighted return final_result @@ -162,7 +175,7 @@ def inv(x): def cholesky(x): - """Cholesky decompostion of Obs or CObs valued matrices.""" + """Cholesky decomposition of Obs or CObs valued matrices.""" return _mat_mat_op(anp.linalg.cholesky, x) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 4362b91b..24fa91ca 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -28,11 +28,14 @@ class Obs: exists this overwrites the standard value for that ensemble. tau_exp_global : float Standard value for tau_exp (default 0.0) - tau_exp_dict :dict + tau_exp_dict : dict Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble. N_sigma_global : float Standard value for N_sigma (default 1.0) + N_sigma_dict : dict + Dictionary for N_sigma values. If an entry for a given ensemble exists + this overwrites the standard value for that ensemble. """ __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', @@ -45,6 +48,7 @@ class Obs: tau_exp_global = 0.0 tau_exp_dict = {} N_sigma_global = 1.0 + N_sigma_dict = {} filter_eps = 1e-10 def __init__(self, samples, names, idl=None, means=None, **kwargs): @@ -55,7 +59,7 @@ class Obs: samples : list list of numpy arrays containing the Monte Carlo samples names : list - list of strings labeling the indivdual samples + list of strings labeling the individual samples idl : list, optional list of ranges or lists on which the samples are defined means : list, optional @@ -66,12 +70,15 @@ class Obs: if means is None: if len(samples) != len(names): raise Exception('Length of samples and names incompatible.') + if idl is not None: + if len(idl) != len(names): + raise Exception('Length of idl incompatible with samples and names.') if len(names) != len(set(names)): - raise Exception('Names are not unique.') + raise Exception('names are not unique.') if not all(isinstance(x, str) for x in names): raise TypeError('All names have to be strings.') if min(len(x) for x in samples) <= 4: - raise Exception('Samples have to have at least 4 entries.') + raise Exception('Samples have to have at least 5 entries.') self.names = sorted(names) self.shape = {} @@ -183,72 +190,39 @@ class Obs: self.S = {} self.tau_exp = {} + self.N_sigma = {} if kwargs.get('fft') is False: fft = False else: fft = True - if 'S' in kwargs: - tmp = kwargs.get('S') - if isinstance(tmp, list): - if len(tmp) != len(self.e_names): - raise Exception('Length of S array does not match ensembles.') - for e, e_name in enumerate(self.e_names): - if tmp[e] <= 0: - raise Exception('S has to be larger than 0.') - self.S[e_name] = tmp[e] - else: - if isinstance(tmp, (int, float)): - if tmp <= 0: - raise Exception('S has to be larger than 0.') - for e, e_name in enumerate(self.e_names): - self.S[e_name] = tmp - else: - raise TypeError('S is not in proper format.') - else: - for e, e_name in enumerate(self.e_names): - if e_name in Obs.S_dict: - self.S[e_name] = Obs.S_dict[e_name] - else: - self.S[e_name] = Obs.S_global - - if 'tau_exp' in kwargs: - tmp = kwargs.get('tau_exp') - if isinstance(tmp, list): - if len(tmp) != len(self.e_names): - raise Exception('Length of tau_exp array does not match ensembles.') - for e, e_name in enumerate(self.e_names): - if tmp[e] < 0: - raise Exception('tau_exp smaller than 0.') - self.tau_exp[e_name] = tmp[e] - else: + def _parse_kwarg(kwarg_name): + if kwarg_name in kwargs: + tmp = kwargs.get(kwarg_name) if isinstance(tmp, (int, float)): if tmp < 0: - raise Exception('tau_exp smaller than 0.') + raise Exception(kwarg_name + ' has to be larger or equal to 0.') for e, e_name in enumerate(self.e_names): - self.tau_exp[e_name] = tmp + getattr(self, kwarg_name)[e_name] = tmp else: - raise TypeError('tau_exp is not in proper format.') - else: - for e, e_name in enumerate(self.e_names): - if e_name in Obs.tau_exp_dict: - self.tau_exp[e_name] = Obs.tau_exp_dict[e_name] - else: - self.tau_exp[e_name] = Obs.tau_exp_global + raise TypeError(kwarg_name + ' is not in proper format.') + else: + for e, e_name in enumerate(self.e_names): + if e_name in getattr(Obs, kwarg_name + '_dict'): + getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] + else: + getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') - if 'N_sigma' in kwargs: - self.N_sigma = kwargs.get('N_sigma') - if not isinstance(self.N_sigma, (int, float)): - raise TypeError('N_sigma is not a number.') - else: - self.N_sigma = Obs.N_sigma_global + _parse_kwarg('S') + _parse_kwarg('tau_exp') + _parse_kwarg('N_sigma') for e, e_name in enumerate(self.e_names): r_length = [] for r_name in e_content[e_name]: - if self.idl[r_name] is range: + if isinstance(self.idl[r_name], range): r_length.append(len(self.idl[r_name])) else: r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) @@ -260,11 +234,11 @@ class Obs: self.e_drho[e_name] = np.zeros(w_max) for r_name in e_content[e_name]: - e_gamma[e_name] += self.calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) + e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) gamma_div = np.zeros(w_max) for r_name in e_content[e_name]: - gamma_div += self.calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) + gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) e_gamma[e_name] /= gamma_div[:w_max] if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero @@ -293,9 +267,11 @@ class Obs: # if type(self.idl[e_name]) is range: # scale tau_exp according to step size # texp /= self.idl[e_name].step # Critical slowing down analysis + if w_max // 2 <= 1: + raise Exception("Need at least 8 samples for tau_exp error analysis") for n in range(1, w_max // 2): _compute_drho(n + 1) - if (self.e_rho[e_name][n] - self.N_sigma * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: + if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: # Bias correction hep-lat/0306017 eq. (49) included self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) @@ -329,39 +305,26 @@ class Obs: self.ddvalue = np.sqrt(self.ddvalue) / self.dvalue return - def expand_deltas(self, deltas, idx, shape): - """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. - If idx is of type range, the deltas are not changed - - Parameters - ---------- - deltas -- List of fluctuations - idx -- List or range of configs on which the deltas are defined. - shape -- Number of configs in idx. - """ - if type(idx) is range: - return deltas - else: - ret = np.zeros(idx[-1] - idx[0] + 1) - for i in range(shape): - ret[idx[i] - idx[0]] = deltas[i] - return ret - - def calc_gamma(self, deltas, idx, shape, w_max, fft): + def _calc_gamma(self, deltas, idx, shape, w_max, fft): """Calculate Gamma_{AA} from the deltas, which are defined on idx. idx is assumed to be a contiguous range (possibly with a stepsize != 1) Parameters ---------- - deltas -- List of fluctuations - idx -- List or range of configs on which the deltas are defined. - shape -- Number of configs in idx. - w_max -- Upper bound for the summation window - fft -- boolean, which determines whether the fft algorithm is used for - the computation of the autocorrelation function + deltas : list + List of fluctuations + idx : list + List or range of configurations on which the deltas are defined. + shape : int + Number of configurations in idx. + w_max : int + Upper bound for the summation window. + fft : bool + determines whether the fft algorithm is used for the computation + of the autocorrelation function. """ gamma = np.zeros(w_max) - deltas = self.expand_deltas(deltas, idx, shape) + deltas = _expand_deltas(deltas, idx, shape) new_shape = len(deltas) if fft: max_gamma = min(new_shape, w_max) @@ -375,12 +338,16 @@ class Obs: return gamma - def print(self, level=1): - warnings.warn("Method 'print' renamed to 'details'", DeprecationWarning) - self.details(level > 1) - def details(self, ens_content=True): - """Output detailed properties of the Obs.""" + """Output detailed properties of the Obs. + + Parameters + ---------- + ens_content : bool + print details about the ensembles and replica if true. + """ + if self.tag is not None: + print("Description:", self.tag) if self.value == 0.0: percentage = np.nan else: @@ -393,22 +360,50 @@ class Obs: if len(self.e_names) > 1: print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) if self.tau_exp[e_name] > 0: - print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma)) + print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) else: print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) - if self.tag is not None: - print("Description:", self.tag) if ens_content is True: if len(self.e_names) == 1: print(self.N, 'samples in', len(self.e_names), 'ensemble:') else: print(self.N, 'samples in', len(self.e_names), 'ensembles:') - m = max(map(len, list(self.e_content.keys()))) + 1 - print('\n'.join([' ' + key.rjust(m) + ': ' + str(value) for key, value in sorted(self.e_content.items())])) + my_string_list = [] + for key, value in sorted(self.e_content.items()): + my_string = ' ' + "\u00B7 Ensemble '" + key + "' " + if len(value) == 1: + my_string += f': {self.shape[value[0]]} configurations' + if isinstance(self.idl[value[0]], range): + my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' + else: + my_string += ' (irregular range)' + else: + sublist = [] + for v in value: + my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " + my_substring += f': {self.shape[v]} configurations' + if isinstance(self.idl[v], range): + my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' + else: + my_substring += ' (irregular range)' + sublist.append(my_substring) + + my_string += '\n' + '\n'.join(sublist) + my_string_list.append(my_string) + print('\n'.join(my_string_list)) + + def print(self, level=1): + warnings.warn("Method 'print' renamed to 'details'", DeprecationWarning) + self.details(level > 1) def is_zero_within_error(self, sigma=1): """Checks whether the observable is zero within 'sigma' standard errors. + Parameters + ---------- + sigma : int + Number of standard errors used for the check. + Works only properly when the gamma method was run. """ return self.is_zero() or np.abs(self.value) <= sigma * self.dvalue @@ -418,7 +413,13 @@ class Obs: return np.isclose(0.0, self.value) and all(np.allclose(0.0, delta) for delta in self.deltas.values()) def plot_tauint(self, save=None): - """Plot integrated autocorrelation time for each ensemble.""" + """Plot integrated autocorrelation time for each ensemble. + + Parameters + ---------- + save : str + saves the figure to a file named 'save' if. + """ if not hasattr(self, 'e_names'): raise Exception('Run the gamma method first.') @@ -495,7 +496,13 @@ class Obs: plt.draw() def plot_history(self, expand=True): - """Plot derived Monte Carlo history for each ensemble.""" + """Plot derived Monte Carlo history for each ensemble + + Parameters + ---------- + expand : bool + show expanded history for irregular Monte Carlo chains (default: True). + """ if not hasattr(self, 'e_names'): raise Exception('Run the gamma method first.') @@ -505,7 +512,7 @@ class Obs: tmp = [] for r, r_name in enumerate(self.e_content[e_name]): if expand: - tmp.append(self.expand_deltas(self.deltas[r_name], self.idl[r_name], self.shape[r_name]) + self.r_values[r_name]) + tmp.append(_expand_deltas(self.deltas[r_name], self.idl[r_name], self.shape[r_name]) + self.r_values[r_name]) else: tmp.append(self.deltas[r_name] + self.r_values[r_name]) r_length.append(len(tmp[-1])) @@ -538,6 +545,8 @@ class Obs: Parameters ---------- + name : str + name of the file to be saved. path : str specifies a custom path for the file (default '.') """ @@ -548,6 +557,34 @@ class Obs: with open(file_name, 'wb') as fb: pickle.dump(self, fb) + def export_jackknife(self): + """Export jackknife samples from the Obs + + Returns + ------- + numpy.ndarray + Returns a numpy array of length N + 1 where N is the number of samples + for the given ensemble and replicum. The zeroth entry of the array contains + the mean value of the Obs, entries 1 to N contain the N jackknife samples + derived from the Obs. The current implementation only works for observables + defined on exactly one ensemble and replicum. The derived jackknife samples + should agree with samples from a full jackknife analysis up to O(1/N). + """ + + if len(self.names) != 1: + raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") + + name = self.names[0] + full_data = self.deltas[name] + self.r_values[name] + n = full_data.size + mean = np.mean(full_data) + tmp_jacks = np.zeros(n + 1) + tmp_jacks[0] = self.value + for i in range(n): + tmp_jacks[i + 1] = (n * mean - full_data[i]) / (n - 1) + + return tmp_jacks + def __float__(self): return float(self.value) @@ -829,7 +866,29 @@ class CObs: return 'CObs[' + str(self) + ']' -def merge_idx(idl): +def _expand_deltas(deltas, idx, shape): + """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. + If idx is of type range, the deltas are not changed + + Parameters + ---------- + deltas : list + List of fluctuations + idx : list + List or range of configs on which the deltas are defined. + shape : int + Number of configs in idx. + """ + if isinstance(idx, range): + return deltas + else: + ret = np.zeros(idx[-1] - idx[0] + 1) + for i in range(shape): + ret[idx[i] - idx[0]] = deltas[i] + return ret + + +def _merge_idx(idl): """Returns the union of all lists in idl Parameters @@ -856,7 +915,7 @@ def merge_idx(idl): return list(set().union(*idl)) -def expand_deltas_for_merge(deltas, idx, shape, new_idx): +def _expand_deltas_for_merge(deltas, idx, shape, new_idx): """Expand deltas defined on idx to the list of configs that is defined by new_idx. New, empy entries are filled by 0. If idx and new_idx are of type range, the smallest common divisor of the step sizes is used as new step size. @@ -883,7 +942,7 @@ def expand_deltas_for_merge(deltas, idx, shape, new_idx): return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) -def filter_zeroes(names, deltas, idl, eps=Obs.filter_eps): +def _filter_zeroes(names, deltas, idl, eps=Obs.filter_eps): """Filter out all configurations with vanishing fluctuation such that they do not contribute to the error estimate anymore. Returns the new names, deltas and idl according to the filtering. @@ -976,7 +1035,7 @@ def derived_observable(func, data, **kwargs): tmp = i_data.idl.get(name) if tmp is not None: idl.append(tmp) - new_idl_d[name] = merge_idx(idl) + new_idl_d[name] = _merge_idx(idl) if not is_merged: is_merged = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) @@ -1038,13 +1097,13 @@ def derived_observable(func, data, **kwargs): new_deltas = {} for j_obs, obs in np.ndenumerate(data): for name in obs.names: - new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) + new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) new_samples = [] new_means = [] new_idl = [] if is_merged: - filtered_names, filtered_deltas, filtered_idl_d = filter_zeroes(new_names, new_deltas, new_idl_d) + filtered_names, filtered_deltas, filtered_idl_d = _filter_zeroes(new_names, new_deltas, new_idl_d) else: filtered_names = new_names filtered_deltas = new_deltas @@ -1064,7 +1123,7 @@ def derived_observable(func, data, **kwargs): return final_result -def reduce_deltas(deltas, idx_old, idx_new): +def _reduce_deltas(deltas, idx_old, idx_new): """Extract deltas defined on idx_old on all configs of idx_new. Parameters @@ -1078,7 +1137,7 @@ def reduce_deltas(deltas, idx_old, idx_new): Has to be a subset of idx_old. """ if not len(deltas) == len(idx_old): - raise Exception('Lenght of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) + raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) if type(idx_old) is range and type(idx_new) is range: if idx_old == idx_new: return deltas @@ -1094,7 +1153,7 @@ def reduce_deltas(deltas, idx_old, idx_new): pos = j break if pos < 0: - raise Exception('Error in reduce_deltas: Config %d not in idx_old' % (idx_new[i])) + raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) ret[i] = deltas[j] return np.array(ret) @@ -1124,7 +1183,7 @@ def reweight(weight, obs, **kwargs): new_samples = [] w_deltas = {} for name in sorted(weight.names): - w_deltas[name] = reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) + w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) tmp_obs = Obs(new_samples, sorted(weight.names), idl=[obs[i].idl[name] for name in sorted(weight.names)]) @@ -1192,6 +1251,10 @@ def covariance(obs1, obs2, correlation=False, **kwargs): Parameters ---------- + obs1 : Obs + First Obs + obs2 : Obs + Second Obs correlation : bool if true the correlation instead of the covariance is returned (default False) @@ -1202,7 +1265,7 @@ def covariance(obs1, obs2, correlation=False, **kwargs): for name in sorted(set(obs1.names + obs2.names)): if (obs1.shape.get(name) != obs2.shape.get(name)) and (obs1.shape.get(name) is not None) and (obs2.shape.get(name) is not None): raise Exception('Shapes of ensemble', name, 'do not fit') - if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], merge_idx([obs1.idl[name], obs2.idl[name]])]]))): + if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], _merge_idx([obs1.idl[name], obs2.idl[name]])]]))): raise Exception('Shapes of ensemble', name, 'do not fit') if not hasattr(obs1, 'e_names') or not hasattr(obs2, 'e_names'): @@ -1311,8 +1374,8 @@ def covariance2(obs1, obs2, correlation=False, **kwargs): for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - idl_d[r_name] = merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) - if idl_d[r_name] is range: + idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) + if isinstance(idl_d[r_name], range): r_length.append(len(idl_d[r_name])) else: r_length.append((idl_d[r_name][-1] - idl_d[r_name][0] + 1)) @@ -1385,7 +1448,7 @@ def covariance3(obs1, obs2, correlation=False, **kwargs): for name in sorted(set(obs1.names + obs2.names)): if (obs1.shape.get(name) != obs2.shape.get(name)) and (obs1.shape.get(name) is not None) and (obs2.shape.get(name) is not None): raise Exception('Shapes of ensemble', name, 'do not fit') - if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], merge_idx([obs1.idl[name], obs2.idl[name]])]]))): + if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], _merge_idx([obs1.idl[name], obs2.idl[name]])]]))): raise Exception('Shapes of ensemble', name, 'do not fit') if not hasattr(obs1, 'e_names') or not hasattr(obs2, 'e_names'): @@ -1430,7 +1493,16 @@ def covariance3(obs1, obs2, correlation=False, **kwargs): def pseudo_Obs(value, dvalue, name, samples=1000): """Generate a pseudo Obs with given value, dvalue and name - The standard number of samples is a 1000. This can be adjusted. + Parameters + ---------- + value : float + central value of the Obs to be generated. + dvalue : float + error of the Obs to be generated. + name : str + name of the ensemble for which the Obs is to be generated. + samples: int + number of samples for the Obs (default 1000). """ if dvalue <= 0.0: return Obs([np.zeros(samples) + value], [name]) @@ -1471,7 +1543,13 @@ def dump_object(obj, name, **kwargs): def load_object(path): - """Load object from pickle file. """ + """Load object from pickle file. + + Parameters + ---------- + path : str + path to the file + """ with open(path, 'rb') as file: return pickle.load(file) diff --git a/tests/linalg_test.py b/tests/linalg_test.py index 41a391a3..2b6f200c 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -51,6 +51,51 @@ def test_multi_dot(): assert e.is_zero(), t +def test_matmul_irregular_histories(): + dim = 2 + length = 500 + + standard_array = [] + for i in range(dim ** 2): + standard_array.append(pe.Obs([np.random.normal(1.1, 0.2, length)], ['ens1'])) + standard_matrix = np.array(standard_array).reshape((dim, dim)) + + for idl in [range(1, 501, 2), range(250, 273), [2, 8, 19, 20, 78]]: + irregular_array = [] + for i in range(dim ** 2): + irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl])) + irregular_matrix = np.array(irregular_array).reshape((dim, dim)) + + t1 = standard_matrix @ irregular_matrix + t2 = pe.linalg.matmul(standard_matrix, irregular_matrix) + + assert np.all([o.is_zero() for o in (t1 - t2).ravel()]) + assert np.all([o.is_merged for o in t1.ravel()]) + assert np.all([o.is_merged for o in t2.ravel()]) + + +def test_irregular_matrix_inverse(): + dim = 3 + length = 500 + + for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]: + irregular_array = [] + for i in range(dim ** 2): + irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)])) + irregular_matrix = np.array(irregular_array).reshape((dim, dim)) + + invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T + + inverse = pe.linalg.inv(invertible_irregular_matrix) + + assert np.allclose(np.linalg.inv(np.vectorize(lambda x: x.value)(invertible_irregular_matrix)) - np.vectorize(lambda x: x.value)(inverse), 0.0) + + check1 = pe.linalg.matmul(invertible_irregular_matrix, inverse) + assert np.all([o.is_zero() for o in (check1 - np.identity(dim)).ravel()]) + check2 = invertible_irregular_matrix @ inverse + assert np.all([o.is_zero() for o in (check2 - np.identity(dim)).ravel()]) + + def test_matrix_inverse(): content = [] for t in range(9): diff --git a/tests/pyerrors_test.py b/tests/obs_test.py similarity index 87% rename from tests/pyerrors_test.py rename to tests/obs_test.py index 8f2ca2bd..f0adcc49 100644 --- a/tests/pyerrors_test.py +++ b/tests/obs_test.py @@ -117,6 +117,65 @@ def test_gamma_method_persistance(): assert ddvalue == my_obs.ddvalue +def test_gamma_method_kwargs(): + + my_obs = pe.Obs([np.random.normal(1, 0.8, 5)], ['ens'], idl=[[1, 2, 3, 6, 17]]) + + pe.Obs.S_dict['ens13.7'] = 3 + + my_obs.gamma_method() + assert my_obs.S['ens'] == pe.Obs.S_global + assert my_obs.tau_exp['ens'] == pe.Obs.tau_exp_global + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global + + my_obs.gamma_method(S=3.71) + assert my_obs.S['ens'] == 3.71 + assert my_obs.tau_exp['ens'] == pe.Obs.tau_exp_global + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global + + my_obs.gamma_method(tau_exp=17) + assert my_obs.S['ens'] == pe.Obs.S_global + assert my_obs.tau_exp['ens'] == 17 + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global + + my_obs.gamma_method(tau_exp=1.7, N_sigma=2.123) + assert my_obs.S['ens'] == pe.Obs.S_global + assert my_obs.tau_exp['ens'] == 1.7 + assert my_obs.N_sigma['ens'] == 2.123 + + pe.Obs.S_dict['ens'] = 3 + pe.Obs.S_dict['ens|23'] = 7 + + my_obs.gamma_method() + assert my_obs.S['ens'] == pe.Obs.S_dict['ens'] == 3 + assert my_obs.tau_exp['ens'] == pe.Obs.tau_exp_global + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global + + pe.Obs.tau_exp_dict['ens'] = 4 + pe.Obs.N_sigma_dict['ens'] = 4 + + my_obs.gamma_method() + assert my_obs.S['ens'] == pe.Obs.S_dict['ens'] == 3 + assert my_obs.tau_exp['ens'] == pe.Obs.tau_exp_dict['ens'] == 4 + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_dict['ens'] == 4 + + my_obs.gamma_method(S=1.1, tau_exp=1.2, N_sigma=1.3) + assert my_obs.S['ens'] == 1.1 + assert my_obs.tau_exp['ens'] == 1.2 + assert my_obs.N_sigma['ens'] == 1.3 + + pe.Obs.S_dict = {} + pe.Obs.tau_exp_dict = {} + pe.Obs.N_sigma_dict = {} + + my_obs = pe.Obs([np.random.normal(1, 0.8, 5)], ['ens']) + + my_obs.gamma_method() + assert my_obs.S['ens'] == pe.Obs.S_global + assert my_obs.tau_exp['ens'] == pe.Obs.tau_exp_global + assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global + + def test_covariance_is_variance(): value = np.random.normal(5, 10) dvalue = np.abs(np.random.normal(0, 1)) @@ -445,3 +504,21 @@ def test_covariance2_symmetry(): cov_ba = pe.covariance2(a, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) + + +def test_jackknife(): + full_data = np.random.normal(1.1, 0.87, 5487) + + my_obs = pe.Obs([full_data], ['test']) + + n = full_data.size + mean = np.mean(full_data) + tmp_jacks = np.zeros(n + 1) + tmp_jacks[0] = mean + for i in range(n): + tmp_jacks[i + 1] = (n * mean - full_data[i]) / (n - 1) + + assert np.allclose(tmp_jacks, my_obs.export_jackknife()) + my_new_obs = my_obs + pe.Obs([full_data], ['test2']) + with pytest.raises(Exception): + my_new_obs.export_jackknife()