diff --git a/docs/pyerrors/obs.html b/docs/pyerrors/obs.html index 0b47e142..fed265cc 100644 --- a/docs/pyerrors/obs.html +++ b/docs/pyerrors/obs.html @@ -1468,7 +1468,7 @@ 1138 return idinter 1139 1140 -1141def _expand_deltas_for_merge(deltas, idx, shape, new_idx): +1141def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor): 1142 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1143 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1144 common divisor of the step sizes is used as new step size. @@ -1484,599 +1484,624 @@ 1154 Number of configs in idx. 1155 new_idx : list 1156 List of configs that defines the new range, has to be sorted in ascending order. -1157 """ -1158 -1159 if type(idx) is range and type(new_idx) is range: -1160 if idx == new_idx: -1161 return deltas -1162 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) -1163 for i in range(shape): -1164 ret[idx[i] - new_idx[0]] = deltas[i] -1165 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) -1166 -1167 -1168def derived_observable(func, data, array_mode=False, **kwargs): -1169 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. -1170 -1171 Parameters -1172 ---------- -1173 func : object -1174 arbitrary function of the form func(data, **kwargs). For the -1175 automatic differentiation to work, all numpy functions have to have -1176 the autograd wrapper (use 'import autograd.numpy as anp'). -1177 data : list -1178 list of Obs, e.g. [obs1, obs2, obs3]. -1179 num_grad : bool -1180 if True, numerical derivatives are used instead of autograd -1181 (default False). To control the numerical differentiation the -1182 kwargs of numdifftools.step_generators.MaxStepGenerator -1183 can be used. -1184 man_grad : list -1185 manually supply a list or an array which contains the jacobian -1186 of func. Use cautiously, supplying the wrong derivative will -1187 not be intercepted. -1188 -1189 Notes -1190 ----- -1191 For simple mathematical operations it can be practical to use anonymous -1192 functions. For the ratio of two observables one can e.g. use +1157 scalefactor : float +1158 An additional scaling factor that can be applied to scale the fluctuations, +1159 e.g., when Obs with differing numbers of replica are merged. +1160 """ +1161 if type(idx) is range and type(new_idx) is range: +1162 if idx == new_idx: +1163 if scalefactor == 1: +1164 return deltas +1165 else: +1166 return deltas * scalefactor +1167 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) +1168 for i in range(shape): +1169 ret[idx[i] - new_idx[0]] = deltas[i] +1170 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor +1171 +1172 +1173def derived_observable(func, data, array_mode=False, **kwargs): +1174 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. +1175 +1176 Parameters +1177 ---------- +1178 func : object +1179 arbitrary function of the form func(data, **kwargs). For the +1180 automatic differentiation to work, all numpy functions have to have +1181 the autograd wrapper (use 'import autograd.numpy as anp'). +1182 data : list +1183 list of Obs, e.g. [obs1, obs2, obs3]. +1184 num_grad : bool +1185 if True, numerical derivatives are used instead of autograd +1186 (default False). To control the numerical differentiation the +1187 kwargs of numdifftools.step_generators.MaxStepGenerator +1188 can be used. +1189 man_grad : list +1190 manually supply a list or an array which contains the jacobian +1191 of func. Use cautiously, supplying the wrong derivative will +1192 not be intercepted. 1193 -1194 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) -1195 """ -1196 -1197 data = np.asarray(data) -1198 raveled_data = data.ravel() -1199 -1200 # Workaround for matrix operations containing non Obs data -1201 if not all(isinstance(x, Obs) for x in raveled_data): -1202 for i in range(len(raveled_data)): -1203 if isinstance(raveled_data[i], (int, float)): -1204 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") -1205 -1206 allcov = {} -1207 for o in raveled_data: -1208 for name in o.cov_names: -1209 if name in allcov: -1210 if not np.allclose(allcov[name], o.covobs[name].cov): -1211 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -1212 else: -1213 allcov[name] = o.covobs[name].cov -1214 -1215 n_obs = len(raveled_data) -1216 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) -1217 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) -1218 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1194 Notes +1195 ----- +1196 For simple mathematical operations it can be practical to use anonymous +1197 functions. For the ratio of two observables one can e.g. use +1198 +1199 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) +1200 """ +1201 +1202 data = np.asarray(data) +1203 raveled_data = data.ravel() +1204 +1205 # Workaround for matrix operations containing non Obs data +1206 if not all(isinstance(x, Obs) for x in raveled_data): +1207 for i in range(len(raveled_data)): +1208 if isinstance(raveled_data[i], (int, float)): +1209 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") +1210 +1211 allcov = {} +1212 for o in raveled_data: +1213 for name in o.cov_names: +1214 if name in allcov: +1215 if not np.allclose(allcov[name], o.covobs[name].cov): +1216 raise Exception('Inconsistent covariance matrices for %s!' % (name)) +1217 else: +1218 allcov[name] = o.covobs[name].cov 1219 -1220 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 -1221 -1222 if data.ndim == 1: -1223 values = np.array([o.value for o in data]) -1224 else: -1225 values = np.vectorize(lambda x: x.value)(data) +1220 n_obs = len(raveled_data) +1221 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) +1222 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) +1223 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1224 +1225 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1226 -1227 new_values = func(values, **kwargs) -1228 -1229 multi = int(isinstance(new_values, np.ndarray)) -1230 -1231 new_r_values = {} -1232 new_idl_d = {} -1233 for name in new_sample_names: -1234 idl = [] -1235 tmp_values = np.zeros(n_obs) -1236 for i, item in enumerate(raveled_data): -1237 tmp_values[i] = item.r_values.get(name, item.value) -1238 tmp_idl = item.idl.get(name) -1239 if tmp_idl is not None: -1240 idl.append(tmp_idl) -1241 if multi > 0: -1242 tmp_values = np.array(tmp_values).reshape(data.shape) -1243 new_r_values[name] = func(tmp_values, **kwargs) -1244 new_idl_d[name] = _merge_idx(idl) -1245 -1246 if 'man_grad' in kwargs: -1247 deriv = np.asarray(kwargs.get('man_grad')) -1248 if new_values.shape + data.shape != deriv.shape: -1249 raise Exception('Manual derivative does not have correct shape.') -1250 elif kwargs.get('num_grad') is True: -1251 if multi > 0: -1252 raise Exception('Multi mode currently not supported for numerical derivative') -1253 options = { -1254 'base_step': 0.1, -1255 'step_ratio': 2.5} -1256 for key in options.keys(): -1257 kwarg = kwargs.get(key) -1258 if kwarg is not None: -1259 options[key] = kwarg -1260 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) -1261 if tmp_df.size == 1: -1262 deriv = np.array([tmp_df.real]) -1263 else: -1264 deriv = tmp_df.real -1265 else: -1266 deriv = jacobian(func)(values, **kwargs) -1267 -1268 final_result = np.zeros(new_values.shape, dtype=object) +1227 if data.ndim == 1: +1228 values = np.array([o.value for o in data]) +1229 else: +1230 values = np.vectorize(lambda x: x.value)(data) +1231 +1232 new_values = func(values, **kwargs) +1233 +1234 multi = int(isinstance(new_values, np.ndarray)) +1235 +1236 new_r_values = {} +1237 new_idl_d = {} +1238 for name in new_sample_names: +1239 idl = [] +1240 tmp_values = np.zeros(n_obs) +1241 for i, item in enumerate(raveled_data): +1242 tmp_values[i] = item.r_values.get(name, item.value) +1243 tmp_idl = item.idl.get(name) +1244 if tmp_idl is not None: +1245 idl.append(tmp_idl) +1246 if multi > 0: +1247 tmp_values = np.array(tmp_values).reshape(data.shape) +1248 new_r_values[name] = func(tmp_values, **kwargs) +1249 new_idl_d[name] = _merge_idx(idl) +1250 +1251 def _compute_scalefactor_missing_rep(obs): +1252 """ +1253 Computes the scale factor that is to be multiplied with the deltas +1254 in the case where Obs with different subsets of replica are merged. +1255 Returns a dictionary with the scale factor for each Monte Carlo name. +1256 +1257 Parameters +1258 ---------- +1259 obs : Obs +1260 The observable corresponding to the deltas that are to be scaled +1261 """ +1262 scalef_d = {} +1263 for mc_name in obs.mc_names: +1264 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] +1265 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] +1266 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): +1267 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) +1268 return scalef_d 1269 -1270 if array_mode is True: -1271 -1272 class _Zero_grad(): -1273 def __init__(self, N): -1274 self.grad = np.zeros((N, 1)) -1275 -1276 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) -1277 d_extracted = {} -1278 g_extracted = {} -1279 for name in new_sample_names: -1280 d_extracted[name] = [] -1281 ens_length = len(new_idl_d[name]) -1282 for i_dat, dat in enumerate(data): -1283 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) -1284 for name in new_cov_names: -1285 g_extracted[name] = [] -1286 zero_grad = _Zero_grad(new_covobs_lengths[name]) -1287 for i_dat, dat in enumerate(data): -1288 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) -1289 -1290 for i_val, new_val in np.ndenumerate(new_values): -1291 new_deltas = {} -1292 new_grad = {} -1293 if array_mode is True: -1294 for name in new_sample_names: -1295 ens_length = d_extracted[name][0].shape[-1] -1296 new_deltas[name] = np.zeros(ens_length) -1297 for i_dat, dat in enumerate(d_extracted[name]): -1298 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1299 for name in new_cov_names: -1300 new_grad[name] = 0 -1301 for i_dat, dat in enumerate(g_extracted[name]): -1302 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1303 else: -1304 for j_obs, obs in np.ndenumerate(data): -1305 for name in obs.names: -1306 if name in obs.cov_names: -1307 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad -1308 else: -1309 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) -1310 -1311 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} -1312 -1313 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): -1314 raise Exception('The same name has been used for deltas and covobs!') -1315 new_samples = [] -1316 new_means = [] -1317 new_idl = [] -1318 new_names_obs = [] -1319 for name in new_names: -1320 if name not in new_covobs: -1321 new_samples.append(new_deltas[name]) -1322 new_idl.append(new_idl_d[name]) -1323 new_means.append(new_r_values[name][i_val]) -1324 new_names_obs.append(name) -1325 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) -1326 for name in new_covobs: -1327 final_result[i_val].names.append(name) -1328 final_result[i_val]._covobs = new_covobs -1329 final_result[i_val]._value = new_val -1330 final_result[i_val].reweighted = reweighted -1331 -1332 if multi == 0: -1333 final_result = final_result.item() -1334 -1335 return final_result -1336 +1270 if 'man_grad' in kwargs: +1271 deriv = np.asarray(kwargs.get('man_grad')) +1272 if new_values.shape + data.shape != deriv.shape: +1273 raise Exception('Manual derivative does not have correct shape.') +1274 elif kwargs.get('num_grad') is True: +1275 if multi > 0: +1276 raise Exception('Multi mode currently not supported for numerical derivative') +1277 options = { +1278 'base_step': 0.1, +1279 'step_ratio': 2.5} +1280 for key in options.keys(): +1281 kwarg = kwargs.get(key) +1282 if kwarg is not None: +1283 options[key] = kwarg +1284 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) +1285 if tmp_df.size == 1: +1286 deriv = np.array([tmp_df.real]) +1287 else: +1288 deriv = tmp_df.real +1289 else: +1290 deriv = jacobian(func)(values, **kwargs) +1291 +1292 final_result = np.zeros(new_values.shape, dtype=object) +1293 +1294 if array_mode is True: +1295 +1296 class _Zero_grad(): +1297 def __init__(self, N): +1298 self.grad = np.zeros((N, 1)) +1299 +1300 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) +1301 d_extracted = {} +1302 g_extracted = {} +1303 for name in new_sample_names: +1304 d_extracted[name] = [] +1305 ens_length = len(new_idl_d[name]) +1306 for i_dat, dat in enumerate(data): +1307 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) +1308 for name in new_cov_names: +1309 g_extracted[name] = [] +1310 zero_grad = _Zero_grad(new_covobs_lengths[name]) +1311 for i_dat, dat in enumerate(data): +1312 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) +1313 +1314 for i_val, new_val in np.ndenumerate(new_values): +1315 new_deltas = {} +1316 new_grad = {} +1317 if array_mode is True: +1318 for name in new_sample_names: +1319 ens_length = d_extracted[name][0].shape[-1] +1320 new_deltas[name] = np.zeros(ens_length) +1321 for i_dat, dat in enumerate(d_extracted[name]): +1322 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1323 for name in new_cov_names: +1324 new_grad[name] = 0 +1325 for i_dat, dat in enumerate(g_extracted[name]): +1326 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1327 else: +1328 for j_obs, obs in np.ndenumerate(data): +1329 scalef_d = _compute_scalefactor_missing_rep(obs) +1330 for name in obs.names: +1331 if name in obs.cov_names: +1332 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad +1333 else: +1334 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) +1335 +1336 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1337 -1338def _reduce_deltas(deltas, idx_old, idx_new): -1339 """Extract deltas defined on idx_old on all configs of idx_new. -1340 -1341 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they -1342 are ordered in an ascending order. -1343 -1344 Parameters -1345 ---------- -1346 deltas : list -1347 List of fluctuations -1348 idx_old : list -1349 List or range of configs on which the deltas are defined -1350 idx_new : list -1351 List of configs for which we want to extract the deltas. -1352 Has to be a subset of idx_old. -1353 """ -1354 if not len(deltas) == len(idx_old): -1355 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) -1356 if type(idx_old) is range and type(idx_new) is range: -1357 if idx_old == idx_new: -1358 return deltas -1359 if _check_lists_equal([idx_old, idx_new]): -1360 return deltas -1361 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] -1362 if len(indices) < len(idx_new): -1363 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') -1364 return np.array(deltas)[indices] +1338 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): +1339 raise Exception('The same name has been used for deltas and covobs!') +1340 new_samples = [] +1341 new_means = [] +1342 new_idl = [] +1343 new_names_obs = [] +1344 for name in new_names: +1345 if name not in new_covobs: +1346 new_samples.append(new_deltas[name]) +1347 new_idl.append(new_idl_d[name]) +1348 new_means.append(new_r_values[name][i_val]) +1349 new_names_obs.append(name) +1350 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) +1351 for name in new_covobs: +1352 final_result[i_val].names.append(name) +1353 final_result[i_val]._covobs = new_covobs +1354 final_result[i_val]._value = new_val +1355 final_result[i_val].reweighted = reweighted +1356 +1357 if multi == 0: +1358 final_result = final_result.item() +1359 +1360 return final_result +1361 +1362 +1363def _reduce_deltas(deltas, idx_old, idx_new): +1364 """Extract deltas defined on idx_old on all configs of idx_new. 1365 -1366 -1367def reweight(weight, obs, **kwargs): -1368 """Reweight a list of observables. -1369 -1370 Parameters -1371 ---------- -1372 weight : Obs -1373 Reweighting factor. An Observable that has to be defined on a superset of the -1374 configurations in obs[i].idl for all i. -1375 obs : list -1376 list of Obs, e.g. [obs1, obs2, obs3]. -1377 all_configs : bool -1378 if True, the reweighted observables are normalized by the average of -1379 the reweighting factor on all configurations in weight.idl and not -1380 on the configurations in obs[i].idl. Default False. -1381 """ -1382 result = [] -1383 for i in range(len(obs)): -1384 if len(obs[i].cov_names): -1385 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') -1386 if not set(obs[i].names).issubset(weight.names): -1387 raise Exception('Error: Ensembles do not fit') -1388 for name in obs[i].names: -1389 if not set(obs[i].idl[name]).issubset(weight.idl[name]): -1390 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) -1391 new_samples = [] -1392 w_deltas = {} -1393 for name in sorted(obs[i].names): -1394 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) -1395 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) -1396 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1397 -1398 if kwargs.get('all_configs'): -1399 new_weight = weight -1400 else: -1401 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1402 -1403 result.append(tmp_obs / new_weight) -1404 result[-1].reweighted = True -1405 -1406 return result -1407 -1408 -1409def correlate(obs_a, obs_b): -1410 """Correlate two observables. -1411 -1412 Parameters -1413 ---------- -1414 obs_a : Obs -1415 First observable -1416 obs_b : Obs -1417 Second observable -1418 -1419 Notes -1420 ----- -1421 Keep in mind to only correlate primary observables which have not been reweighted -1422 yet. The reweighting has to be applied after correlating the observables. -1423 Currently only works if ensembles are identical (this is not strictly necessary). -1424 """ -1425 -1426 if sorted(obs_a.names) != sorted(obs_b.names): -1427 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") -1428 if len(obs_a.cov_names) or len(obs_b.cov_names): -1429 raise Exception('Error: Not possible to correlate Obs that contain covobs!') -1430 for name in obs_a.names: -1431 if obs_a.shape[name] != obs_b.shape[name]: -1432 raise Exception('Shapes of ensemble', name, 'do not fit') -1433 if obs_a.idl[name] != obs_b.idl[name]: -1434 raise Exception('idl of ensemble', name, 'do not fit') -1435 -1436 if obs_a.reweighted is True: -1437 warnings.warn("The first observable is already reweighted.", RuntimeWarning) -1438 if obs_b.reweighted is True: -1439 warnings.warn("The second observable is already reweighted.", RuntimeWarning) -1440 -1441 new_samples = [] -1442 new_idl = [] -1443 for name in sorted(obs_a.names): -1444 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) -1445 new_idl.append(obs_a.idl[name]) -1446 -1447 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) -1448 o.reweighted = obs_a.reweighted or obs_b.reweighted -1449 return o +1366 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they +1367 are ordered in an ascending order. +1368 +1369 Parameters +1370 ---------- +1371 deltas : list +1372 List of fluctuations +1373 idx_old : list +1374 List or range of configs on which the deltas are defined +1375 idx_new : list +1376 List of configs for which we want to extract the deltas. +1377 Has to be a subset of idx_old. +1378 """ +1379 if not len(deltas) == len(idx_old): +1380 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) +1381 if type(idx_old) is range and type(idx_new) is range: +1382 if idx_old == idx_new: +1383 return deltas +1384 if _check_lists_equal([idx_old, idx_new]): +1385 return deltas +1386 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] +1387 if len(indices) < len(idx_new): +1388 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') +1389 return np.array(deltas)[indices] +1390 +1391 +1392def reweight(weight, obs, **kwargs): +1393 """Reweight a list of observables. +1394 +1395 Parameters +1396 ---------- +1397 weight : Obs +1398 Reweighting factor. An Observable that has to be defined on a superset of the +1399 configurations in obs[i].idl for all i. +1400 obs : list +1401 list of Obs, e.g. [obs1, obs2, obs3]. +1402 all_configs : bool +1403 if True, the reweighted observables are normalized by the average of +1404 the reweighting factor on all configurations in weight.idl and not +1405 on the configurations in obs[i].idl. Default False. +1406 """ +1407 result = [] +1408 for i in range(len(obs)): +1409 if len(obs[i].cov_names): +1410 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') +1411 if not set(obs[i].names).issubset(weight.names): +1412 raise Exception('Error: Ensembles do not fit') +1413 for name in obs[i].names: +1414 if not set(obs[i].idl[name]).issubset(weight.idl[name]): +1415 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) +1416 new_samples = [] +1417 w_deltas = {} +1418 for name in sorted(obs[i].names): +1419 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) +1420 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) +1421 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1422 +1423 if kwargs.get('all_configs'): +1424 new_weight = weight +1425 else: +1426 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1427 +1428 result.append(tmp_obs / new_weight) +1429 result[-1].reweighted = True +1430 +1431 return result +1432 +1433 +1434def correlate(obs_a, obs_b): +1435 """Correlate two observables. +1436 +1437 Parameters +1438 ---------- +1439 obs_a : Obs +1440 First observable +1441 obs_b : Obs +1442 Second observable +1443 +1444 Notes +1445 ----- +1446 Keep in mind to only correlate primary observables which have not been reweighted +1447 yet. The reweighting has to be applied after correlating the observables. +1448 Currently only works if ensembles are identical (this is not strictly necessary). +1449 """ 1450 -1451 -1452def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): -1453 r'''Calculates the error covariance matrix of a set of observables. -1454 -1455 WARNING: This function should be used with care, especially for observables with support on multiple -1456 ensembles with differing autocorrelations. See the notes below for details. -1457 -1458 The gamma method has to be applied first to all observables. -1459 -1460 Parameters -1461 ---------- -1462 obs : list or numpy.ndarray -1463 List or one dimensional array of Obs -1464 visualize : bool -1465 If True plots the corresponding normalized correlation matrix (default False). -1466 correlation : bool -1467 If True the correlation matrix instead of the error covariance matrix is returned (default False). -1468 smooth : None or int -1469 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue -1470 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the -1471 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely -1472 small ones. -1473 -1474 Notes -1475 ----- -1476 The error covariance is defined such that it agrees with the squared standard error for two identical observables -1477 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ -1478 in the absence of autocorrelation. -1479 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite -1480 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. -1481 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. -1482 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ -1483 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). -1484 ''' -1485 -1486 length = len(obs) -1487 -1488 max_samples = np.max([o.N for o in obs]) -1489 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: -1490 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) -1491 -1492 cov = np.zeros((length, length)) -1493 for i in range(length): -1494 for j in range(i, length): -1495 cov[i, j] = _covariance_element(obs[i], obs[j]) -1496 cov = cov + cov.T - np.diag(np.diag(cov)) -1497 -1498 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) -1499 -1500 if isinstance(smooth, int): -1501 corr = _smooth_eigenvalues(corr, smooth) -1502 -1503 if visualize: -1504 plt.matshow(corr, vmin=-1, vmax=1) -1505 plt.set_cmap('RdBu') -1506 plt.colorbar() -1507 plt.draw() -1508 -1509 if correlation is True: -1510 return corr -1511 -1512 errors = [o.dvalue for o in obs] -1513 cov = np.diag(errors) @ corr @ np.diag(errors) -1514 -1515 eigenvalues = np.linalg.eigh(cov)[0] -1516 if not np.all(eigenvalues >= 0): -1517 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) -1518 -1519 return cov -1520 -1521 -1522def _smooth_eigenvalues(corr, E): -1523 """Eigenvalue smoothing as described in hep-lat/9412087 +1451 if sorted(obs_a.names) != sorted(obs_b.names): +1452 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") +1453 if len(obs_a.cov_names) or len(obs_b.cov_names): +1454 raise Exception('Error: Not possible to correlate Obs that contain covobs!') +1455 for name in obs_a.names: +1456 if obs_a.shape[name] != obs_b.shape[name]: +1457 raise Exception('Shapes of ensemble', name, 'do not fit') +1458 if obs_a.idl[name] != obs_b.idl[name]: +1459 raise Exception('idl of ensemble', name, 'do not fit') +1460 +1461 if obs_a.reweighted is True: +1462 warnings.warn("The first observable is already reweighted.", RuntimeWarning) +1463 if obs_b.reweighted is True: +1464 warnings.warn("The second observable is already reweighted.", RuntimeWarning) +1465 +1466 new_samples = [] +1467 new_idl = [] +1468 for name in sorted(obs_a.names): +1469 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) +1470 new_idl.append(obs_a.idl[name]) +1471 +1472 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) +1473 o.reweighted = obs_a.reweighted or obs_b.reweighted +1474 return o +1475 +1476 +1477def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): +1478 r'''Calculates the error covariance matrix of a set of observables. +1479 +1480 WARNING: This function should be used with care, especially for observables with support on multiple +1481 ensembles with differing autocorrelations. See the notes below for details. +1482 +1483 The gamma method has to be applied first to all observables. +1484 +1485 Parameters +1486 ---------- +1487 obs : list or numpy.ndarray +1488 List or one dimensional array of Obs +1489 visualize : bool +1490 If True plots the corresponding normalized correlation matrix (default False). +1491 correlation : bool +1492 If True the correlation matrix instead of the error covariance matrix is returned (default False). +1493 smooth : None or int +1494 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue +1495 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the +1496 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely +1497 small ones. +1498 +1499 Notes +1500 ----- +1501 The error covariance is defined such that it agrees with the squared standard error for two identical observables +1502 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ +1503 in the absence of autocorrelation. +1504 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite +1505 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. +1506 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. +1507 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ +1508 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). +1509 ''' +1510 +1511 length = len(obs) +1512 +1513 max_samples = np.max([o.N for o in obs]) +1514 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: +1515 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) +1516 +1517 cov = np.zeros((length, length)) +1518 for i in range(length): +1519 for j in range(i, length): +1520 cov[i, j] = _covariance_element(obs[i], obs[j]) +1521 cov = cov + cov.T - np.diag(np.diag(cov)) +1522 +1523 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1524 -1525 corr : np.ndarray -1526 correlation matrix -1527 E : integer -1528 Number of eigenvalues to be left substantially unchanged -1529 """ -1530 if not (2 < E < corr.shape[0] - 1): -1531 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") -1532 vals, vec = np.linalg.eigh(corr) -1533 lambda_min = np.mean(vals[:-E]) -1534 vals[vals < lambda_min] = lambda_min -1535 vals /= np.mean(vals) -1536 return vec @ np.diag(vals) @ vec.T -1537 -1538 -1539def _covariance_element(obs1, obs2): -1540 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" -1541 -1542 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): -1543 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) -1544 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) -1545 return np.sum(deltas1 * deltas2) +1525 if isinstance(smooth, int): +1526 corr = _smooth_eigenvalues(corr, smooth) +1527 +1528 if visualize: +1529 plt.matshow(corr, vmin=-1, vmax=1) +1530 plt.set_cmap('RdBu') +1531 plt.colorbar() +1532 plt.draw() +1533 +1534 if correlation is True: +1535 return corr +1536 +1537 errors = [o.dvalue for o in obs] +1538 cov = np.diag(errors) @ corr @ np.diag(errors) +1539 +1540 eigenvalues = np.linalg.eigh(cov)[0] +1541 if not np.all(eigenvalues >= 0): +1542 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) +1543 +1544 return cov +1545 1546 -1547 if set(obs1.names).isdisjoint(set(obs2.names)): -1548 return 0.0 +1547def _smooth_eigenvalues(corr, E): +1548 """Eigenvalue smoothing as described in hep-lat/9412087 1549 -1550 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): -1551 raise Exception('The gamma method has to be applied to both Obs first.') -1552 -1553 dvalue = 0.0 -1554 -1555 for e_name in obs1.mc_names: -1556 -1557 if e_name not in obs2.mc_names: -1558 continue -1559 -1560 idl_d = {} -1561 for r_name in obs1.e_content[e_name]: -1562 if r_name not in obs2.e_content[e_name]: -1563 continue -1564 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) -1565 -1566 gamma = 0.0 -1567 -1568 for r_name in obs1.e_content[e_name]: -1569 if r_name not in obs2.e_content[e_name]: -1570 continue -1571 if len(idl_d[r_name]) == 0: -1572 continue -1573 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) +1550 corr : np.ndarray +1551 correlation matrix +1552 E : integer +1553 Number of eigenvalues to be left substantially unchanged +1554 """ +1555 if not (2 < E < corr.shape[0] - 1): +1556 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") +1557 vals, vec = np.linalg.eigh(corr) +1558 lambda_min = np.mean(vals[:-E]) +1559 vals[vals < lambda_min] = lambda_min +1560 vals /= np.mean(vals) +1561 return vec @ np.diag(vals) @ vec.T +1562 +1563 +1564def _covariance_element(obs1, obs2): +1565 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" +1566 +1567 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): +1568 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) +1569 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) +1570 return np.sum(deltas1 * deltas2) +1571 +1572 if set(obs1.names).isdisjoint(set(obs2.names)): +1573 return 0.0 1574 -1575 if gamma == 0.0: -1576 continue +1575 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): +1576 raise Exception('The gamma method has to be applied to both Obs first.') 1577 -1578 gamma_div = 0.0 -1579 for r_name in obs1.e_content[e_name]: -1580 if r_name not in obs2.e_content[e_name]: -1581 continue -1582 if len(idl_d[r_name]) == 0: -1583 continue -1584 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) -1585 gamma /= gamma_div -1586 -1587 dvalue += gamma -1588 -1589 for e_name in obs1.cov_names: +1578 dvalue = 0.0 +1579 +1580 for e_name in obs1.mc_names: +1581 +1582 if e_name not in obs2.mc_names: +1583 continue +1584 +1585 idl_d = {} +1586 for r_name in obs1.e_content[e_name]: +1587 if r_name not in obs2.e_content[e_name]: +1588 continue +1589 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1590 -1591 if e_name not in obs2.cov_names: -1592 continue -1593 -1594 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() -1595 -1596 return dvalue -1597 -1598 -1599def import_jackknife(jacks, name, idl=None): -1600 """Imports jackknife samples and returns an Obs -1601 -1602 Parameters -1603 ---------- -1604 jacks : numpy.ndarray -1605 numpy array containing the mean value as zeroth entry and -1606 the N jackknife samples as first to Nth entry. -1607 name : str -1608 name of the ensemble the samples are defined on. -1609 """ -1610 length = len(jacks) - 1 -1611 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) -1612 samples = jacks[1:] @ prj -1613 mean = np.mean(samples) -1614 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) -1615 new_obs._value = jacks[0] -1616 return new_obs -1617 +1591 gamma = 0.0 +1592 +1593 for r_name in obs1.e_content[e_name]: +1594 if r_name not in obs2.e_content[e_name]: +1595 continue +1596 if len(idl_d[r_name]) == 0: +1597 continue +1598 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) +1599 +1600 if gamma == 0.0: +1601 continue +1602 +1603 gamma_div = 0.0 +1604 for r_name in obs1.e_content[e_name]: +1605 if r_name not in obs2.e_content[e_name]: +1606 continue +1607 if len(idl_d[r_name]) == 0: +1608 continue +1609 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) +1610 gamma /= gamma_div +1611 +1612 dvalue += gamma +1613 +1614 for e_name in obs1.cov_names: +1615 +1616 if e_name not in obs2.cov_names: +1617 continue 1618 -1619def import_bootstrap(boots, name, random_numbers): -1620 """Imports bootstrap samples and returns an Obs -1621 -1622 Parameters -1623 ---------- -1624 boots : numpy.ndarray -1625 numpy array containing the mean value as zeroth entry and -1626 the N bootstrap samples as first to Nth entry. -1627 name : str -1628 name of the ensemble the samples are defined on. -1629 random_numbers : np.ndarray -1630 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, -1631 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo -1632 chain to be reconstructed. -1633 """ -1634 samples, length = random_numbers.shape -1635 if samples != len(boots) - 1: -1636 raise ValueError("Random numbers do not have the correct shape.") -1637 -1638 if samples < length: -1639 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") -1640 -1641 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length +1619 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() +1620 +1621 return dvalue +1622 +1623 +1624def import_jackknife(jacks, name, idl=None): +1625 """Imports jackknife samples and returns an Obs +1626 +1627 Parameters +1628 ---------- +1629 jacks : numpy.ndarray +1630 numpy array containing the mean value as zeroth entry and +1631 the N jackknife samples as first to Nth entry. +1632 name : str +1633 name of the ensemble the samples are defined on. +1634 """ +1635 length = len(jacks) - 1 +1636 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) +1637 samples = jacks[1:] @ prj +1638 mean = np.mean(samples) +1639 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) +1640 new_obs._value = jacks[0] +1641 return new_obs 1642 -1643 samples = scipy.linalg.lstsq(proj, boots[1:])[0] -1644 ret = Obs([samples], [name]) -1645 ret._value = boots[0] -1646 return ret -1647 -1648 -1649def merge_obs(list_of_obs): -1650 """Combine all observables in list_of_obs into one new observable -1651 -1652 Parameters -1653 ---------- -1654 list_of_obs : list -1655 list of the Obs object to be combined -1656 -1657 Notes -1658 ----- -1659 It is not possible to combine obs which are based on the same replicum -1660 """ -1661 replist = [item for obs in list_of_obs for item in obs.names] -1662 if (len(replist) == len(set(replist))) is False: -1663 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) -1664 if any([len(o.cov_names) for o in list_of_obs]): -1665 raise Exception('Not possible to merge data that contains covobs!') -1666 new_dict = {} -1667 idl_dict = {} -1668 for o in list_of_obs: -1669 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) -1670 for key in set(o.deltas) | set(o.r_values)}) -1671 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) +1643 +1644def import_bootstrap(boots, name, random_numbers): +1645 """Imports bootstrap samples and returns an Obs +1646 +1647 Parameters +1648 ---------- +1649 boots : numpy.ndarray +1650 numpy array containing the mean value as zeroth entry and +1651 the N bootstrap samples as first to Nth entry. +1652 name : str +1653 name of the ensemble the samples are defined on. +1654 random_numbers : np.ndarray +1655 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, +1656 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo +1657 chain to be reconstructed. +1658 """ +1659 samples, length = random_numbers.shape +1660 if samples != len(boots) - 1: +1661 raise ValueError("Random numbers do not have the correct shape.") +1662 +1663 if samples < length: +1664 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") +1665 +1666 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length +1667 +1668 samples = scipy.linalg.lstsq(proj, boots[1:])[0] +1669 ret = Obs([samples], [name]) +1670 ret._value = boots[0] +1671 return ret 1672 -1673 names = sorted(new_dict.keys()) -1674 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) -1675 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) -1676 return o -1677 -1678 -1679def cov_Obs(means, cov, name, grad=None): -1680 """Create an Obs based on mean(s) and a covariance matrix +1673 +1674def merge_obs(list_of_obs): +1675 """Combine all observables in list_of_obs into one new observable +1676 +1677 Parameters +1678 ---------- +1679 list_of_obs : list +1680 list of the Obs object to be combined 1681 -1682 Parameters -1683 ---------- -1684 mean : list of floats or float -1685 N mean value(s) of the new Obs -1686 cov : list or array -1687 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance -1688 name : str -1689 identifier for the covariance matrix -1690 grad : list or array -1691 Gradient of the Covobs wrt. the means belonging to cov. -1692 """ -1693 -1694 def covobs_to_obs(co): -1695 """Make an Obs out of a Covobs -1696 -1697 Parameters -1698 ---------- -1699 co : Covobs -1700 Covobs to be embedded into the Obs -1701 """ -1702 o = Obs([], [], means=[]) -1703 o._value = co.value -1704 o.names.append(co.name) -1705 o._covobs[co.name] = co -1706 o._dvalue = np.sqrt(co.errsq()) -1707 return o -1708 -1709 ol = [] -1710 if isinstance(means, (float, int)): -1711 means = [means] -1712 -1713 for i in range(len(means)): -1714 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) -1715 if ol[0].covobs[name].N != len(means): -1716 raise Exception('You have to provide %d mean values!' % (ol[0].N)) -1717 if len(ol) == 1: -1718 return ol[0] -1719 return ol -1720 +1682 Notes +1683 ----- +1684 It is not possible to combine obs which are based on the same replicum +1685 """ +1686 replist = [item for obs in list_of_obs for item in obs.names] +1687 if (len(replist) == len(set(replist))) is False: +1688 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) +1689 if any([len(o.cov_names) for o in list_of_obs]): +1690 raise Exception('Not possible to merge data that contains covobs!') +1691 new_dict = {} +1692 idl_dict = {} +1693 for o in list_of_obs: +1694 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) +1695 for key in set(o.deltas) | set(o.r_values)}) +1696 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) +1697 +1698 names = sorted(new_dict.keys()) +1699 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) +1700 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) +1701 return o +1702 +1703 +1704def cov_Obs(means, cov, name, grad=None): +1705 """Create an Obs based on mean(s) and a covariance matrix +1706 +1707 Parameters +1708 ---------- +1709 mean : list of floats or float +1710 N mean value(s) of the new Obs +1711 cov : list or array +1712 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance +1713 name : str +1714 identifier for the covariance matrix +1715 grad : list or array +1716 Gradient of the Covobs wrt. the means belonging to cov. +1717 """ +1718 +1719 def covobs_to_obs(co): +1720 """Make an Obs out of a Covobs 1721 -1722def _determine_gap(o, e_content, e_name): -1723 gaps = [] -1724 for r_name in e_content[e_name]: -1725 if isinstance(o.idl[r_name], range): -1726 gaps.append(o.idl[r_name].step) -1727 else: -1728 gaps.append(np.min(np.diff(o.idl[r_name]))) -1729 -1730 gap = min(gaps) -1731 if not np.all([gi % gap == 0 for gi in gaps]): -1732 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) +1722 Parameters +1723 ---------- +1724 co : Covobs +1725 Covobs to be embedded into the Obs +1726 """ +1727 o = Obs([], [], means=[]) +1728 o._value = co.value +1729 o.names.append(co.name) +1730 o._covobs[co.name] = co +1731 o._dvalue = np.sqrt(co.errsq()) +1732 return o 1733 -1734 return gap -1735 -1736 -1737def _check_lists_equal(idl): -1738 ''' -1739 Use groupby to efficiently check whether all elements of idl are identical. -1740 Returns True if all elements are equal, otherwise False. -1741 -1742 Parameters -1743 ---------- -1744 idl : list of lists, ranges or np.ndarrays -1745 ''' -1746 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) -1747 if next(g, True) and not next(g, False): -1748 return True -1749 return False +1734 ol = [] +1735 if isinstance(means, (float, int)): +1736 means = [means] +1737 +1738 for i in range(len(means)): +1739 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) +1740 if ol[0].covobs[name].N != len(means): +1741 raise Exception('You have to provide %d mean values!' % (ol[0].N)) +1742 if len(ol) == 1: +1743 return ol[0] +1744 return ol +1745 +1746 +1747def _determine_gap(o, e_content, e_name): +1748 gaps = [] +1749 for r_name in e_content[e_name]: +1750 if isinstance(o.idl[r_name], range): +1751 gaps.append(o.idl[r_name].step) +1752 else: +1753 gaps.append(np.min(np.diff(o.idl[r_name]))) +1754 +1755 gap = min(gaps) +1756 if not np.all([gi % gap == 0 for gi in gaps]): +1757 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) +1758 +1759 return gap +1760 +1761 +1762def _check_lists_equal(idl): +1763 ''' +1764 Use groupby to efficiently check whether all elements of idl are identical. +1765 Returns True if all elements are equal, otherwise False. +1766 +1767 Parameters +1768 ---------- +1769 idl : list of lists, ranges or np.ndarrays +1770 ''' +1771 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) +1772 if next(g, True) and not next(g, False): +1773 return True +1774 return False @@ -5277,174 +5302,194 @@ should agree with samples from a full bootstrap analysis up to O(1/N). -
1169def derived_observable(func, data, array_mode=False, **kwargs): -1170 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. -1171 -1172 Parameters -1173 ---------- -1174 func : object -1175 arbitrary function of the form func(data, **kwargs). For the -1176 automatic differentiation to work, all numpy functions have to have -1177 the autograd wrapper (use 'import autograd.numpy as anp'). -1178 data : list -1179 list of Obs, e.g. [obs1, obs2, obs3]. -1180 num_grad : bool -1181 if True, numerical derivatives are used instead of autograd -1182 (default False). To control the numerical differentiation the -1183 kwargs of numdifftools.step_generators.MaxStepGenerator -1184 can be used. -1185 man_grad : list -1186 manually supply a list or an array which contains the jacobian -1187 of func. Use cautiously, supplying the wrong derivative will -1188 not be intercepted. -1189 -1190 Notes -1191 ----- -1192 For simple mathematical operations it can be practical to use anonymous -1193 functions. For the ratio of two observables one can e.g. use +@@ -5491,46 +5536,46 @@ functions. For the ratio of two observables one can e.g. use1174def derived_observable(func, data, array_mode=False, **kwargs): +1175 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. +1176 +1177 Parameters +1178 ---------- +1179 func : object +1180 arbitrary function of the form func(data, **kwargs). For the +1181 automatic differentiation to work, all numpy functions have to have +1182 the autograd wrapper (use 'import autograd.numpy as anp'). +1183 data : list +1184 list of Obs, e.g. [obs1, obs2, obs3]. +1185 num_grad : bool +1186 if True, numerical derivatives are used instead of autograd +1187 (default False). To control the numerical differentiation the +1188 kwargs of numdifftools.step_generators.MaxStepGenerator +1189 can be used. +1190 man_grad : list +1191 manually supply a list or an array which contains the jacobian +1192 of func. Use cautiously, supplying the wrong derivative will +1193 not be intercepted. 1194 -1195 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) -1196 """ -1197 -1198 data = np.asarray(data) -1199 raveled_data = data.ravel() -1200 -1201 # Workaround for matrix operations containing non Obs data -1202 if not all(isinstance(x, Obs) for x in raveled_data): -1203 for i in range(len(raveled_data)): -1204 if isinstance(raveled_data[i], (int, float)): -1205 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") -1206 -1207 allcov = {} -1208 for o in raveled_data: -1209 for name in o.cov_names: -1210 if name in allcov: -1211 if not np.allclose(allcov[name], o.covobs[name].cov): -1212 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -1213 else: -1214 allcov[name] = o.covobs[name].cov -1215 -1216 n_obs = len(raveled_data) -1217 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) -1218 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) -1219 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1195 Notes +1196 ----- +1197 For simple mathematical operations it can be practical to use anonymous +1198 functions. For the ratio of two observables one can e.g. use +1199 +1200 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) +1201 """ +1202 +1203 data = np.asarray(data) +1204 raveled_data = data.ravel() +1205 +1206 # Workaround for matrix operations containing non Obs data +1207 if not all(isinstance(x, Obs) for x in raveled_data): +1208 for i in range(len(raveled_data)): +1209 if isinstance(raveled_data[i], (int, float)): +1210 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") +1211 +1212 allcov = {} +1213 for o in raveled_data: +1214 for name in o.cov_names: +1215 if name in allcov: +1216 if not np.allclose(allcov[name], o.covobs[name].cov): +1217 raise Exception('Inconsistent covariance matrices for %s!' % (name)) +1218 else: +1219 allcov[name] = o.covobs[name].cov 1220 -1221 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 -1222 -1223 if data.ndim == 1: -1224 values = np.array([o.value for o in data]) -1225 else: -1226 values = np.vectorize(lambda x: x.value)(data) +1221 n_obs = len(raveled_data) +1222 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) +1223 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) +1224 new_sample_names = sorted(set(new_names) - set(new_cov_names)) +1225 +1226 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1227 -1228 new_values = func(values, **kwargs) -1229 -1230 multi = int(isinstance(new_values, np.ndarray)) -1231 -1232 new_r_values = {} -1233 new_idl_d = {} -1234 for name in new_sample_names: -1235 idl = [] -1236 tmp_values = np.zeros(n_obs) -1237 for i, item in enumerate(raveled_data): -1238 tmp_values[i] = item.r_values.get(name, item.value) -1239 tmp_idl = item.idl.get(name) -1240 if tmp_idl is not None: -1241 idl.append(tmp_idl) -1242 if multi > 0: -1243 tmp_values = np.array(tmp_values).reshape(data.shape) -1244 new_r_values[name] = func(tmp_values, **kwargs) -1245 new_idl_d[name] = _merge_idx(idl) -1246 -1247 if 'man_grad' in kwargs: -1248 deriv = np.asarray(kwargs.get('man_grad')) -1249 if new_values.shape + data.shape != deriv.shape: -1250 raise Exception('Manual derivative does not have correct shape.') -1251 elif kwargs.get('num_grad') is True: -1252 if multi > 0: -1253 raise Exception('Multi mode currently not supported for numerical derivative') -1254 options = { -1255 'base_step': 0.1, -1256 'step_ratio': 2.5} -1257 for key in options.keys(): -1258 kwarg = kwargs.get(key) -1259 if kwarg is not None: -1260 options[key] = kwarg -1261 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) -1262 if tmp_df.size == 1: -1263 deriv = np.array([tmp_df.real]) -1264 else: -1265 deriv = tmp_df.real -1266 else: -1267 deriv = jacobian(func)(values, **kwargs) -1268 -1269 final_result = np.zeros(new_values.shape, dtype=object) +1228 if data.ndim == 1: +1229 values = np.array([o.value for o in data]) +1230 else: +1231 values = np.vectorize(lambda x: x.value)(data) +1232 +1233 new_values = func(values, **kwargs) +1234 +1235 multi = int(isinstance(new_values, np.ndarray)) +1236 +1237 new_r_values = {} +1238 new_idl_d = {} +1239 for name in new_sample_names: +1240 idl = [] +1241 tmp_values = np.zeros(n_obs) +1242 for i, item in enumerate(raveled_data): +1243 tmp_values[i] = item.r_values.get(name, item.value) +1244 tmp_idl = item.idl.get(name) +1245 if tmp_idl is not None: +1246 idl.append(tmp_idl) +1247 if multi > 0: +1248 tmp_values = np.array(tmp_values).reshape(data.shape) +1249 new_r_values[name] = func(tmp_values, **kwargs) +1250 new_idl_d[name] = _merge_idx(idl) +1251 +1252 def _compute_scalefactor_missing_rep(obs): +1253 """ +1254 Computes the scale factor that is to be multiplied with the deltas +1255 in the case where Obs with different subsets of replica are merged. +1256 Returns a dictionary with the scale factor for each Monte Carlo name. +1257 +1258 Parameters +1259 ---------- +1260 obs : Obs +1261 The observable corresponding to the deltas that are to be scaled +1262 """ +1263 scalef_d = {} +1264 for mc_name in obs.mc_names: +1265 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] +1266 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] +1267 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): +1268 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) +1269 return scalef_d 1270 -1271 if array_mode is True: -1272 -1273 class _Zero_grad(): -1274 def __init__(self, N): -1275 self.grad = np.zeros((N, 1)) -1276 -1277 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) -1278 d_extracted = {} -1279 g_extracted = {} -1280 for name in new_sample_names: -1281 d_extracted[name] = [] -1282 ens_length = len(new_idl_d[name]) -1283 for i_dat, dat in enumerate(data): -1284 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) -1285 for name in new_cov_names: -1286 g_extracted[name] = [] -1287 zero_grad = _Zero_grad(new_covobs_lengths[name]) -1288 for i_dat, dat in enumerate(data): -1289 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) -1290 -1291 for i_val, new_val in np.ndenumerate(new_values): -1292 new_deltas = {} -1293 new_grad = {} -1294 if array_mode is True: -1295 for name in new_sample_names: -1296 ens_length = d_extracted[name][0].shape[-1] -1297 new_deltas[name] = np.zeros(ens_length) -1298 for i_dat, dat in enumerate(d_extracted[name]): -1299 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1300 for name in new_cov_names: -1301 new_grad[name] = 0 -1302 for i_dat, dat in enumerate(g_extracted[name]): -1303 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) -1304 else: -1305 for j_obs, obs in np.ndenumerate(data): -1306 for name in obs.names: -1307 if name in obs.cov_names: -1308 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad -1309 else: -1310 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) -1311 -1312 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} -1313 -1314 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): -1315 raise Exception('The same name has been used for deltas and covobs!') -1316 new_samples = [] -1317 new_means = [] -1318 new_idl = [] -1319 new_names_obs = [] -1320 for name in new_names: -1321 if name not in new_covobs: -1322 new_samples.append(new_deltas[name]) -1323 new_idl.append(new_idl_d[name]) -1324 new_means.append(new_r_values[name][i_val]) -1325 new_names_obs.append(name) -1326 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) -1327 for name in new_covobs: -1328 final_result[i_val].names.append(name) -1329 final_result[i_val]._covobs = new_covobs -1330 final_result[i_val]._value = new_val -1331 final_result[i_val].reweighted = reweighted -1332 -1333 if multi == 0: -1334 final_result = final_result.item() -1335 -1336 return final_result +1271 if 'man_grad' in kwargs: +1272 deriv = np.asarray(kwargs.get('man_grad')) +1273 if new_values.shape + data.shape != deriv.shape: +1274 raise Exception('Manual derivative does not have correct shape.') +1275 elif kwargs.get('num_grad') is True: +1276 if multi > 0: +1277 raise Exception('Multi mode currently not supported for numerical derivative') +1278 options = { +1279 'base_step': 0.1, +1280 'step_ratio': 2.5} +1281 for key in options.keys(): +1282 kwarg = kwargs.get(key) +1283 if kwarg is not None: +1284 options[key] = kwarg +1285 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) +1286 if tmp_df.size == 1: +1287 deriv = np.array([tmp_df.real]) +1288 else: +1289 deriv = tmp_df.real +1290 else: +1291 deriv = jacobian(func)(values, **kwargs) +1292 +1293 final_result = np.zeros(new_values.shape, dtype=object) +1294 +1295 if array_mode is True: +1296 +1297 class _Zero_grad(): +1298 def __init__(self, N): +1299 self.grad = np.zeros((N, 1)) +1300 +1301 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) +1302 d_extracted = {} +1303 g_extracted = {} +1304 for name in new_sample_names: +1305 d_extracted[name] = [] +1306 ens_length = len(new_idl_d[name]) +1307 for i_dat, dat in enumerate(data): +1308 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) +1309 for name in new_cov_names: +1310 g_extracted[name] = [] +1311 zero_grad = _Zero_grad(new_covobs_lengths[name]) +1312 for i_dat, dat in enumerate(data): +1313 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) +1314 +1315 for i_val, new_val in np.ndenumerate(new_values): +1316 new_deltas = {} +1317 new_grad = {} +1318 if array_mode is True: +1319 for name in new_sample_names: +1320 ens_length = d_extracted[name][0].shape[-1] +1321 new_deltas[name] = np.zeros(ens_length) +1322 for i_dat, dat in enumerate(d_extracted[name]): +1323 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1324 for name in new_cov_names: +1325 new_grad[name] = 0 +1326 for i_dat, dat in enumerate(g_extracted[name]): +1327 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) +1328 else: +1329 for j_obs, obs in np.ndenumerate(data): +1330 scalef_d = _compute_scalefactor_missing_rep(obs) +1331 for name in obs.names: +1332 if name in obs.cov_names: +1333 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad +1334 else: +1335 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) +1336 +1337 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} +1338 +1339 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): +1340 raise Exception('The same name has been used for deltas and covobs!') +1341 new_samples = [] +1342 new_means = [] +1343 new_idl = [] +1344 new_names_obs = [] +1345 for name in new_names: +1346 if name not in new_covobs: +1347 new_samples.append(new_deltas[name]) +1348 new_idl.append(new_idl_d[name]) +1349 new_means.append(new_r_values[name][i_val]) +1350 new_names_obs.append(name) +1351 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) +1352 for name in new_covobs: +1353 final_result[i_val].names.append(name) +1354 final_result[i_val]._covobs = new_covobs +1355 final_result[i_val]._value = new_val +1356 final_result[i_val].reweighted = reweighted +1357 +1358 if multi == 0: +1359 final_result = final_result.item() +1360 +1361 return final_result
1368def reweight(weight, obs, **kwargs): -1369 """Reweight a list of observables. -1370 -1371 Parameters -1372 ---------- -1373 weight : Obs -1374 Reweighting factor. An Observable that has to be defined on a superset of the -1375 configurations in obs[i].idl for all i. -1376 obs : list -1377 list of Obs, e.g. [obs1, obs2, obs3]. -1378 all_configs : bool -1379 if True, the reweighted observables are normalized by the average of -1380 the reweighting factor on all configurations in weight.idl and not -1381 on the configurations in obs[i].idl. Default False. -1382 """ -1383 result = [] -1384 for i in range(len(obs)): -1385 if len(obs[i].cov_names): -1386 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') -1387 if not set(obs[i].names).issubset(weight.names): -1388 raise Exception('Error: Ensembles do not fit') -1389 for name in obs[i].names: -1390 if not set(obs[i].idl[name]).issubset(weight.idl[name]): -1391 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) -1392 new_samples = [] -1393 w_deltas = {} -1394 for name in sorted(obs[i].names): -1395 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) -1396 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) -1397 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1398 -1399 if kwargs.get('all_configs'): -1400 new_weight = weight -1401 else: -1402 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) -1403 -1404 result.append(tmp_obs / new_weight) -1405 result[-1].reweighted = True -1406 -1407 return result +@@ -5564,47 +5609,47 @@ on the configurations in obs[i].idl. Default False.1393def reweight(weight, obs, **kwargs): +1394 """Reweight a list of observables. +1395 +1396 Parameters +1397 ---------- +1398 weight : Obs +1399 Reweighting factor. An Observable that has to be defined on a superset of the +1400 configurations in obs[i].idl for all i. +1401 obs : list +1402 list of Obs, e.g. [obs1, obs2, obs3]. +1403 all_configs : bool +1404 if True, the reweighted observables are normalized by the average of +1405 the reweighting factor on all configurations in weight.idl and not +1406 on the configurations in obs[i].idl. Default False. +1407 """ +1408 result = [] +1409 for i in range(len(obs)): +1410 if len(obs[i].cov_names): +1411 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') +1412 if not set(obs[i].names).issubset(weight.names): +1413 raise Exception('Error: Ensembles do not fit') +1414 for name in obs[i].names: +1415 if not set(obs[i].idl[name]).issubset(weight.idl[name]): +1416 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) +1417 new_samples = [] +1418 w_deltas = {} +1419 for name in sorted(obs[i].names): +1420 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) +1421 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) +1422 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1423 +1424 if kwargs.get('all_configs'): +1425 new_weight = weight +1426 else: +1427 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) +1428 +1429 result.append(tmp_obs / new_weight) +1430 result[-1].reweighted = True +1431 +1432 return result
1410def correlate(obs_a, obs_b): -1411 """Correlate two observables. -1412 -1413 Parameters -1414 ---------- -1415 obs_a : Obs -1416 First observable -1417 obs_b : Obs -1418 Second observable -1419 -1420 Notes -1421 ----- -1422 Keep in mind to only correlate primary observables which have not been reweighted -1423 yet. The reweighting has to be applied after correlating the observables. -1424 Currently only works if ensembles are identical (this is not strictly necessary). -1425 """ -1426 -1427 if sorted(obs_a.names) != sorted(obs_b.names): -1428 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") -1429 if len(obs_a.cov_names) or len(obs_b.cov_names): -1430 raise Exception('Error: Not possible to correlate Obs that contain covobs!') -1431 for name in obs_a.names: -1432 if obs_a.shape[name] != obs_b.shape[name]: -1433 raise Exception('Shapes of ensemble', name, 'do not fit') -1434 if obs_a.idl[name] != obs_b.idl[name]: -1435 raise Exception('idl of ensemble', name, 'do not fit') -1436 -1437 if obs_a.reweighted is True: -1438 warnings.warn("The first observable is already reweighted.", RuntimeWarning) -1439 if obs_b.reweighted is True: -1440 warnings.warn("The second observable is already reweighted.", RuntimeWarning) -1441 -1442 new_samples = [] -1443 new_idl = [] -1444 for name in sorted(obs_a.names): -1445 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) -1446 new_idl.append(obs_a.idl[name]) -1447 -1448 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) -1449 o.reweighted = obs_a.reweighted or obs_b.reweighted -1450 return o +@@ -5639,74 +5684,74 @@ Currently only works if ensembles are identical (this is not strictly necessary)1435def correlate(obs_a, obs_b): +1436 """Correlate two observables. +1437 +1438 Parameters +1439 ---------- +1440 obs_a : Obs +1441 First observable +1442 obs_b : Obs +1443 Second observable +1444 +1445 Notes +1446 ----- +1447 Keep in mind to only correlate primary observables which have not been reweighted +1448 yet. The reweighting has to be applied after correlating the observables. +1449 Currently only works if ensembles are identical (this is not strictly necessary). +1450 """ +1451 +1452 if sorted(obs_a.names) != sorted(obs_b.names): +1453 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") +1454 if len(obs_a.cov_names) or len(obs_b.cov_names): +1455 raise Exception('Error: Not possible to correlate Obs that contain covobs!') +1456 for name in obs_a.names: +1457 if obs_a.shape[name] != obs_b.shape[name]: +1458 raise Exception('Shapes of ensemble', name, 'do not fit') +1459 if obs_a.idl[name] != obs_b.idl[name]: +1460 raise Exception('idl of ensemble', name, 'do not fit') +1461 +1462 if obs_a.reweighted is True: +1463 warnings.warn("The first observable is already reweighted.", RuntimeWarning) +1464 if obs_b.reweighted is True: +1465 warnings.warn("The second observable is already reweighted.", RuntimeWarning) +1466 +1467 new_samples = [] +1468 new_idl = [] +1469 for name in sorted(obs_a.names): +1470 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) +1471 new_idl.append(obs_a.idl[name]) +1472 +1473 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) +1474 o.reweighted = obs_a.reweighted or obs_b.reweighted +1475 return o
1453def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): -1454 r'''Calculates the error covariance matrix of a set of observables. -1455 -1456 WARNING: This function should be used with care, especially for observables with support on multiple -1457 ensembles with differing autocorrelations. See the notes below for details. -1458 -1459 The gamma method has to be applied first to all observables. -1460 -1461 Parameters -1462 ---------- -1463 obs : list or numpy.ndarray -1464 List or one dimensional array of Obs -1465 visualize : bool -1466 If True plots the corresponding normalized correlation matrix (default False). -1467 correlation : bool -1468 If True the correlation matrix instead of the error covariance matrix is returned (default False). -1469 smooth : None or int -1470 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue -1471 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the -1472 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely -1473 small ones. -1474 -1475 Notes -1476 ----- -1477 The error covariance is defined such that it agrees with the squared standard error for two identical observables -1478 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ -1479 in the absence of autocorrelation. -1480 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite -1481 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. -1482 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. -1483 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ -1484 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). -1485 ''' -1486 -1487 length = len(obs) -1488 -1489 max_samples = np.max([o.N for o in obs]) -1490 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: -1491 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) -1492 -1493 cov = np.zeros((length, length)) -1494 for i in range(length): -1495 for j in range(i, length): -1496 cov[i, j] = _covariance_element(obs[i], obs[j]) -1497 cov = cov + cov.T - np.diag(np.diag(cov)) -1498 -1499 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) -1500 -1501 if isinstance(smooth, int): -1502 corr = _smooth_eigenvalues(corr, smooth) -1503 -1504 if visualize: -1505 plt.matshow(corr, vmin=-1, vmax=1) -1506 plt.set_cmap('RdBu') -1507 plt.colorbar() -1508 plt.draw() -1509 -1510 if correlation is True: -1511 return corr -1512 -1513 errors = [o.dvalue for o in obs] -1514 cov = np.diag(errors) @ corr @ np.diag(errors) -1515 -1516 eigenvalues = np.linalg.eigh(cov)[0] -1517 if not np.all(eigenvalues >= 0): -1518 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) -1519 -1520 return cov +@@ -5758,24 +5803,24 @@ This construction ensures that the estimated covariance matrix is positive semi-1478def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): +1479 r'''Calculates the error covariance matrix of a set of observables. +1480 +1481 WARNING: This function should be used with care, especially for observables with support on multiple +1482 ensembles with differing autocorrelations. See the notes below for details. +1483 +1484 The gamma method has to be applied first to all observables. +1485 +1486 Parameters +1487 ---------- +1488 obs : list or numpy.ndarray +1489 List or one dimensional array of Obs +1490 visualize : bool +1491 If True plots the corresponding normalized correlation matrix (default False). +1492 correlation : bool +1493 If True the correlation matrix instead of the error covariance matrix is returned (default False). +1494 smooth : None or int +1495 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue +1496 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the +1497 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely +1498 small ones. +1499 +1500 Notes +1501 ----- +1502 The error covariance is defined such that it agrees with the squared standard error for two identical observables +1503 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ +1504 in the absence of autocorrelation. +1505 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite +1506 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. +1507 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. +1508 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ +1509 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). +1510 ''' +1511 +1512 length = len(obs) +1513 +1514 max_samples = np.max([o.N for o in obs]) +1515 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: +1516 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) +1517 +1518 cov = np.zeros((length, length)) +1519 for i in range(length): +1520 for j in range(i, length): +1521 cov[i, j] = _covariance_element(obs[i], obs[j]) +1522 cov = cov + cov.T - np.diag(np.diag(cov)) +1523 +1524 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) +1525 +1526 if isinstance(smooth, int): +1527 corr = _smooth_eigenvalues(corr, smooth) +1528 +1529 if visualize: +1530 plt.matshow(corr, vmin=-1, vmax=1) +1531 plt.set_cmap('RdBu') +1532 plt.colorbar() +1533 plt.draw() +1534 +1535 if correlation is True: +1536 return corr +1537 +1538 errors = [o.dvalue for o in obs] +1539 cov = np.diag(errors) @ corr @ np.diag(errors) +1540 +1541 eigenvalues = np.linalg.eigh(cov)[0] +1542 if not np.all(eigenvalues >= 0): +1543 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) +1544 +1545 return cov
1600def import_jackknife(jacks, name, idl=None): -1601 """Imports jackknife samples and returns an Obs -1602 -1603 Parameters -1604 ---------- -1605 jacks : numpy.ndarray -1606 numpy array containing the mean value as zeroth entry and -1607 the N jackknife samples as first to Nth entry. -1608 name : str -1609 name of the ensemble the samples are defined on. -1610 """ -1611 length = len(jacks) - 1 -1612 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) -1613 samples = jacks[1:] @ prj -1614 mean = np.mean(samples) -1615 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) -1616 new_obs._value = jacks[0] -1617 return new_obs +@@ -5805,34 +5850,34 @@ name of the ensemble the samples are defined on.1625def import_jackknife(jacks, name, idl=None): +1626 """Imports jackknife samples and returns an Obs +1627 +1628 Parameters +1629 ---------- +1630 jacks : numpy.ndarray +1631 numpy array containing the mean value as zeroth entry and +1632 the N jackknife samples as first to Nth entry. +1633 name : str +1634 name of the ensemble the samples are defined on. +1635 """ +1636 length = len(jacks) - 1 +1637 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) +1638 samples = jacks[1:] @ prj +1639 mean = np.mean(samples) +1640 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) +1641 new_obs._value = jacks[0] +1642 return new_obs
1620def import_bootstrap(boots, name, random_numbers): -1621 """Imports bootstrap samples and returns an Obs -1622 -1623 Parameters -1624 ---------- -1625 boots : numpy.ndarray -1626 numpy array containing the mean value as zeroth entry and -1627 the N bootstrap samples as first to Nth entry. -1628 name : str -1629 name of the ensemble the samples are defined on. -1630 random_numbers : np.ndarray -1631 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, -1632 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo -1633 chain to be reconstructed. -1634 """ -1635 samples, length = random_numbers.shape -1636 if samples != len(boots) - 1: -1637 raise ValueError("Random numbers do not have the correct shape.") -1638 -1639 if samples < length: -1640 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") -1641 -1642 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length -1643 -1644 samples = scipy.linalg.lstsq(proj, boots[1:])[0] -1645 ret = Obs([samples], [name]) -1646 ret._value = boots[0] -1647 return ret +@@ -5866,34 +5911,34 @@ chain to be reconstructed.1645def import_bootstrap(boots, name, random_numbers): +1646 """Imports bootstrap samples and returns an Obs +1647 +1648 Parameters +1649 ---------- +1650 boots : numpy.ndarray +1651 numpy array containing the mean value as zeroth entry and +1652 the N bootstrap samples as first to Nth entry. +1653 name : str +1654 name of the ensemble the samples are defined on. +1655 random_numbers : np.ndarray +1656 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, +1657 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo +1658 chain to be reconstructed. +1659 """ +1660 samples, length = random_numbers.shape +1661 if samples != len(boots) - 1: +1662 raise ValueError("Random numbers do not have the correct shape.") +1663 +1664 if samples < length: +1665 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") +1666 +1667 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length +1668 +1669 samples = scipy.linalg.lstsq(proj, boots[1:])[0] +1670 ret = Obs([samples], [name]) +1671 ret._value = boots[0] +1672 return ret
1650def merge_obs(list_of_obs): -1651 """Combine all observables in list_of_obs into one new observable -1652 -1653 Parameters -1654 ---------- -1655 list_of_obs : list -1656 list of the Obs object to be combined -1657 -1658 Notes -1659 ----- -1660 It is not possible to combine obs which are based on the same replicum -1661 """ -1662 replist = [item for obs in list_of_obs for item in obs.names] -1663 if (len(replist) == len(set(replist))) is False: -1664 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) -1665 if any([len(o.cov_names) for o in list_of_obs]): -1666 raise Exception('Not possible to merge data that contains covobs!') -1667 new_dict = {} -1668 idl_dict = {} -1669 for o in list_of_obs: -1670 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) -1671 for key in set(o.deltas) | set(o.r_values)}) -1672 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) -1673 -1674 names = sorted(new_dict.keys()) -1675 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) -1676 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) -1677 return o +@@ -5924,47 +5969,47 @@ list of the Obs object to be combined1675def merge_obs(list_of_obs): +1676 """Combine all observables in list_of_obs into one new observable +1677 +1678 Parameters +1679 ---------- +1680 list_of_obs : list +1681 list of the Obs object to be combined +1682 +1683 Notes +1684 ----- +1685 It is not possible to combine obs which are based on the same replicum +1686 """ +1687 replist = [item for obs in list_of_obs for item in obs.names] +1688 if (len(replist) == len(set(replist))) is False: +1689 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) +1690 if any([len(o.cov_names) for o in list_of_obs]): +1691 raise Exception('Not possible to merge data that contains covobs!') +1692 new_dict = {} +1693 idl_dict = {} +1694 for o in list_of_obs: +1695 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) +1696 for key in set(o.deltas) | set(o.r_values)}) +1697 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) +1698 +1699 names = sorted(new_dict.keys()) +1700 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) +1701 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) +1702 return o
1680def cov_Obs(means, cov, name, grad=None): -1681 """Create an Obs based on mean(s) and a covariance matrix -1682 -1683 Parameters -1684 ---------- -1685 mean : list of floats or float -1686 N mean value(s) of the new Obs -1687 cov : list or array -1688 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance -1689 name : str -1690 identifier for the covariance matrix -1691 grad : list or array -1692 Gradient of the Covobs wrt. the means belonging to cov. -1693 """ -1694 -1695 def covobs_to_obs(co): -1696 """Make an Obs out of a Covobs -1697 -1698 Parameters -1699 ---------- -1700 co : Covobs -1701 Covobs to be embedded into the Obs -1702 """ -1703 o = Obs([], [], means=[]) -1704 o._value = co.value -1705 o.names.append(co.name) -1706 o._covobs[co.name] = co -1707 o._dvalue = np.sqrt(co.errsq()) -1708 return o -1709 -1710 ol = [] -1711 if isinstance(means, (float, int)): -1712 means = [means] -1713 -1714 for i in range(len(means)): -1715 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) -1716 if ol[0].covobs[name].N != len(means): -1717 raise Exception('You have to provide %d mean values!' % (ol[0].N)) -1718 if len(ol) == 1: -1719 return ol[0] -1720 return ol +1705def cov_Obs(means, cov, name, grad=None): +1706 """Create an Obs based on mean(s) and a covariance matrix +1707 +1708 Parameters +1709 ---------- +1710 mean : list of floats or float +1711 N mean value(s) of the new Obs +1712 cov : list or array +1713 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance +1714 name : str +1715 identifier for the covariance matrix +1716 grad : list or array +1717 Gradient of the Covobs wrt. the means belonging to cov. +1718 """ +1719 +1720 def covobs_to_obs(co): +1721 """Make an Obs out of a Covobs +1722 +1723 Parameters +1724 ---------- +1725 co : Covobs +1726 Covobs to be embedded into the Obs +1727 """ +1728 o = Obs([], [], means=[]) +1729 o._value = co.value +1730 o.names.append(co.name) +1731 o._covobs[co.name] = co +1732 o._dvalue = np.sqrt(co.errsq()) +1733 return o +1734 +1735 ol = [] +1736 if isinstance(means, (float, int)): +1737 means = [means] +1738 +1739 for i in range(len(means)): +1740 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) +1741 if ol[0].covobs[name].N != len(means): +1742 raise Exception('You have to provide %d mean values!' % (ol[0].N)) +1743 if len(ol) == 1: +1744 return ol[0] +1745 return ol