pyerrors.input.json
View Source
0import json 1import gzip 2import getpass 3import socket 4import datetime 5import platform 6import warnings 7import re 8import gc 9import numpy as np 10from ..obs import Obs 11from ..covobs import Covobs 12from ..correlators import Corr 13from ..misc import _assert_equal_properties 14from .. import version as pyerrorsversion 15 16 17def create_json_string(ol, description='', indent=1): 18 """Generate the string for the export of a list of Obs or structures containing Obs 19 to a .json(.gz) file 20 21 Parameters 22 ---------- 23 ol : list 24 List of objects that will be exported. At the moment, these objects can be 25 either of: Obs, list, numpy.ndarray, Corr. 26 All Obs inside a structure have to be defined on the same set of configurations. 27 description : str 28 Optional string that describes the contents of the json file. 29 indent : int 30 Specify the indentation level of the json file. None or 0 is permissible and 31 saves disk space. 32 """ 33 34 def _default(self, obj): 35 return str(obj) 36 my_encoder = json.JSONEncoder 37 _default.default = json.JSONEncoder().default 38 my_encoder.default = _default 39 40 class Deltalist: 41 __slots__ = ['cnfg', 'deltas'] 42 43 def __init__(self, li): 44 self.cnfg = li[0] 45 self.deltas = li[1:] 46 47 def __repr__(self): 48 s = '[%d' % (self.cnfg) 49 for d in self.deltas: 50 s += ', %1.15e' % (d) 51 s += ']' 52 return s 53 54 def __str__(self): 55 return self.__repr__() 56 57 class Floatlist: 58 __slots__ = ['li'] 59 60 def __init__(self, li): 61 self.li = list(li) 62 63 def __repr__(self): 64 s = '[' 65 for i in range(len(self.li)): 66 if i > 0: 67 s += ', ' 68 s += '%1.15e' % (self.li[i]) 69 s += ']' 70 return s 71 72 def __str__(self): 73 return self.__repr__() 74 75 def _gen_data_d_from_list(ol): 76 dl = [] 77 for name in ol[0].mc_names: 78 ed = {} 79 ed['id'] = name 80 ed['replica'] = [] 81 for r_name in ol[0].e_content[name]: 82 rd = {} 83 rd['name'] = r_name 84 if ol[0].is_merged.get(r_name, False): 85 rd['is_merged'] = True 86 rd['deltas'] = [] 87 for i in range(len(ol[0].idl[r_name])): 88 rd['deltas'].append([ol[0].idl[r_name][i]]) 89 for o in ol: 90 rd['deltas'][-1].append(o.deltas[r_name][i]) 91 rd['deltas'][-1] = Deltalist(rd['deltas'][-1]) 92 ed['replica'].append(rd) 93 dl.append(ed) 94 return dl 95 96 def _gen_cdata_d_from_list(ol): 97 dl = [] 98 for name in ol[0].cov_names: 99 ed = {} 100 ed['id'] = name 101 ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',') 102 ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov)) 103 ncov = ol[0].covobs[name].cov.shape[0] 104 ed['grad'] = [] 105 for i in range(ncov): 106 ed['grad'].append([]) 107 for o in ol: 108 ed['grad'][-1].append(o.covobs[name].grad[i][0]) 109 ed['grad'][-1] = Floatlist(ed['grad'][-1]) 110 dl.append(ed) 111 return dl 112 113 def write_Obs_to_dict(o): 114 d = {} 115 d['type'] = 'Obs' 116 d['layout'] = '1' 117 if o.tag: 118 d['tag'] = [o.tag] 119 if o.reweighted: 120 d['reweighted'] = o.reweighted 121 d['value'] = [o.value] 122 data = _gen_data_d_from_list([o]) 123 if len(data) > 0: 124 d['data'] = data 125 cdata = _gen_cdata_d_from_list([o]) 126 if len(cdata) > 0: 127 d['cdata'] = cdata 128 return d 129 130 def write_List_to_dict(ol): 131 _assert_equal_properties(ol) 132 d = {} 133 d['type'] = 'List' 134 d['layout'] = '%d' % len(ol) 135 taglist = [o.tag for o in ol] 136 if np.any([tag is not None for tag in taglist]): 137 d['tag'] = taglist 138 if ol[0].reweighted: 139 d['reweighted'] = ol[0].reweighted 140 d['value'] = [o.value for o in ol] 141 data = _gen_data_d_from_list(ol) 142 if len(data) > 0: 143 d['data'] = data 144 cdata = _gen_cdata_d_from_list(ol) 145 if len(cdata) > 0: 146 d['cdata'] = cdata 147 return d 148 149 def write_Array_to_dict(oa): 150 ol = np.ravel(oa) 151 _assert_equal_properties(ol) 152 d = {} 153 d['type'] = 'Array' 154 d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',') 155 taglist = [o.tag for o in ol] 156 if np.any([tag is not None for tag in taglist]): 157 d['tag'] = taglist 158 if ol[0].reweighted: 159 d['reweighted'] = ol[0].reweighted 160 d['value'] = [o.value for o in ol] 161 data = _gen_data_d_from_list(ol) 162 if len(data) > 0: 163 d['data'] = data 164 cdata = _gen_cdata_d_from_list(ol) 165 if len(cdata) > 0: 166 d['cdata'] = cdata 167 return d 168 169 def _nan_Obs_like(obs): 170 samples = [] 171 names = [] 172 idl = [] 173 for key, value in obs.idl.items(): 174 samples.append([np.nan] * len(value)) 175 names.append(key) 176 idl.append(value) 177 my_obs = Obs(samples, names, idl) 178 my_obs._covobs = obs._covobs 179 for name in obs._covobs: 180 my_obs.names.append(name) 181 my_obs.reweighted = obs.reweighted 182 my_obs.is_merged = obs.is_merged 183 return my_obs 184 185 def write_Corr_to_dict(my_corr): 186 first_not_none = next(i for i, j in enumerate(my_corr.content) if np.all(j)) 187 dummy_array = np.empty((my_corr.N, my_corr.N), dtype=object) 188 dummy_array[:] = _nan_Obs_like(my_corr.content[first_not_none].ravel()[0]) 189 content = [o if o is not None else dummy_array for o in my_corr.content] 190 dat = write_Array_to_dict(np.array(content, dtype=object)) 191 dat['type'] = 'Corr' 192 corr_meta_data = str(my_corr.tag) 193 if 'tag' in dat.keys(): 194 dat['tag'].append(corr_meta_data) 195 else: 196 dat['tag'] = [corr_meta_data] 197 taglist = dat['tag'] 198 dat['tag'] = {} # tag is now a dictionary, that contains the previous taglist in the key "tag" 199 dat['tag']['tag'] = taglist 200 if my_corr.prange is not None: 201 dat['tag']['prange'] = my_corr.prange 202 return dat 203 204 if not isinstance(ol, list): 205 ol = [ol] 206 207 d = {} 208 d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__) 209 d['version'] = '1.0' 210 d['who'] = getpass.getuser() 211 d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z') 212 d['host'] = socket.gethostname() + ', ' + platform.platform() 213 214 if description: 215 d['description'] = description 216 d['obsdata'] = [] 217 for io in ol: 218 if isinstance(io, Obs): 219 d['obsdata'].append(write_Obs_to_dict(io)) 220 elif isinstance(io, list): 221 d['obsdata'].append(write_List_to_dict(io)) 222 elif isinstance(io, np.ndarray): 223 d['obsdata'].append(write_Array_to_dict(io)) 224 elif isinstance(io, Corr): 225 d['obsdata'].append(write_Corr_to_dict(io)) 226 else: 227 raise Exception("Unkown datatype.") 228 229 jsonstring = '' 230 for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d): 231 jsonstring += chunk 232 233 del d 234 gc.collect() 235 236 def remove_quotationmarks_split(split): 237 """Workaround for un-quoting of delta lists, adds 5% of work 238 but is save, compared to a simple replace that could destroy the structure 239 """ 240 deltas = False 241 for i in range(len(split)): 242 if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]: 243 deltas = True 244 if deltas: 245 split[i] = split[i].replace('"[', '[').replace(']"', ']') 246 if split[i][-1] == ']': 247 deltas = False 248 return '\n'.join(split) 249 250 jsonstring = jsonstring.split('\n') 251 jsonstring = remove_quotationmarks_split(jsonstring) 252 jsonstring = jsonstring.replace('nan', 'NaN') 253 return jsonstring 254 255 256def dump_to_json(ol, fname, description='', indent=1, gz=True): 257 """Export a list of Obs or structures containing Obs to a .json(.gz) file 258 259 Parameters 260 ---------- 261 ol : list 262 List of objects that will be exported. At the moment, these objects can be 263 either of: Obs, list, numpy.ndarray, Corr. 264 All Obs inside a structure have to be defined on the same set of configurations. 265 fname : str 266 Filename of the output file. 267 description : str 268 Optional string that describes the contents of the json file. 269 indent : int 270 Specify the indentation level of the json file. None or 0 is permissible and 271 saves disk space. 272 gz : bool 273 If True, the output is a gzipped json. If False, the output is a json file. 274 """ 275 276 jsonstring = create_json_string(ol, description, indent) 277 278 if not fname.endswith('.json') and not fname.endswith('.gz'): 279 fname += '.json' 280 281 if gz: 282 if not fname.endswith('.gz'): 283 fname += '.gz' 284 285 fp = gzip.open(fname, 'wb') 286 fp.write(jsonstring.encode('utf-8')) 287 else: 288 fp = open(fname, 'w', encoding='utf-8') 289 fp.write(jsonstring) 290 fp.close() 291 292 293def _parse_json_dict(json_dict, verbose=True, full_output=False): 294 """Reconstruct a list of Obs or structures containing Obs from a dict that 295 was built out of a json string. 296 297 The following structures are supported: Obs, list, numpy.ndarray, Corr 298 If the list contains only one element, it is unpacked from the list. 299 300 Parameters 301 ---------- 302 json_string : str 303 json string containing the data. 304 verbose : bool 305 Print additional information that was written to the file. 306 full_output : bool 307 If True, a dict containing auxiliary information and the data is returned. 308 If False, only the data is returned. 309 """ 310 311 def _gen_obsd_from_datad(d): 312 retd = {} 313 if d: 314 retd['names'] = [] 315 retd['idl'] = [] 316 retd['deltas'] = [] 317 retd['is_merged'] = {} 318 for ens in d: 319 for rep in ens['replica']: 320 rep_name = rep['name'] 321 if len(rep_name) > len(ens["id"]): 322 if rep_name[len(ens["id"])] != "|": 323 tmp_list = list(rep_name) 324 tmp_list = tmp_list[:len(ens["id"])] + ["|"] + tmp_list[len(ens["id"]):] 325 rep_name = ''.join(tmp_list) 326 retd['names'].append(rep_name) 327 retd['idl'].append([di[0] for di in rep['deltas']]) 328 retd['deltas'].append(np.array([di[1:] for di in rep['deltas']])) 329 retd['is_merged'][rep_name] = rep.get('is_merged', False) 330 return retd 331 332 def _gen_covobsd_from_cdatad(d): 333 retd = {} 334 for ens in d: 335 retl = [] 336 name = ens['id'] 337 layouts = ens.get('layout', '1').strip() 338 layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] 339 cov = np.reshape(ens['cov'], layout) 340 grad = ens['grad'] 341 nobs = len(grad[0]) 342 for i in range(nobs): 343 retl.append({'name': name, 'cov': cov, 'grad': [g[i] for g in grad]}) 344 retd[name] = retl 345 return retd 346 347 def get_Obs_from_dict(o): 348 layouts = o.get('layout', '1').strip() 349 if layouts != '1': 350 raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning) 351 352 values = o['value'] 353 od = _gen_obsd_from_datad(o.get('data', {})) 354 cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) 355 356 if od: 357 ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) 358 ret.is_merged = od['is_merged'] 359 else: 360 ret = Obs([], [], means=[]) 361 ret._value = values[0] 362 for name in cd: 363 co = cd[name][0] 364 ret._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) 365 ret.names.append(co['name']) 366 367 ret.reweighted = o.get('reweighted', False) 368 ret.tag = o.get('tag', [None])[0] 369 return ret 370 371 def get_List_from_dict(o): 372 layouts = o.get('layout', '1').strip() 373 layout = int(layouts) 374 values = o['value'] 375 od = _gen_obsd_from_datad(o.get('data', {})) 376 cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) 377 378 ret = [] 379 taglist = o.get('tag', layout * [None]) 380 for i in range(layout): 381 if od: 382 ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) 383 ret[-1].is_merged = od['is_merged'] 384 else: 385 ret.append(Obs([], [], means=[])) 386 ret[-1]._value = values[i] 387 print('Created Obs with means= ', values[i]) 388 for name in cd: 389 co = cd[name][i] 390 ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) 391 ret[-1].names.append(co['name']) 392 393 ret[-1].reweighted = o.get('reweighted', False) 394 ret[-1].tag = taglist[i] 395 return ret 396 397 def get_Array_from_dict(o): 398 layouts = o.get('layout', '1').strip() 399 layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] 400 N = np.prod(layout) 401 values = o['value'] 402 od = _gen_obsd_from_datad(o.get('data', {})) 403 cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) 404 405 ret = [] 406 taglist = o.get('tag', N * [None]) 407 for i in range(N): 408 if od: 409 ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) 410 ret[-1].is_merged = od['is_merged'] 411 else: 412 ret.append(Obs([], [], means=[])) 413 ret[-1]._value = values[i] 414 for name in cd: 415 co = cd[name][i] 416 ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) 417 ret[-1].names.append(co['name']) 418 ret[-1].reweighted = o.get('reweighted', False) 419 ret[-1].tag = taglist[i] 420 return np.reshape(ret, layout) 421 422 def get_Corr_from_dict(o): 423 if isinstance(o.get('tag'), list): # supports the old way 424 taglist = o.get('tag') # This had to be modified to get the taglist from the dictionary 425 temp_prange = None 426 elif isinstance(o.get('tag'), dict): 427 tagdic = o.get('tag') 428 taglist = tagdic['tag'] 429 if 'prange' in tagdic: 430 temp_prange = tagdic['prange'] 431 else: 432 temp_prange = None 433 else: 434 raise Exception("The tag is not a list or dict") 435 436 corr_tag = taglist[-1] 437 tmp_o = o 438 tmp_o['tag'] = taglist[:-1] 439 if len(tmp_o['tag']) == 0: 440 del tmp_o['tag'] 441 dat = get_Array_from_dict(tmp_o) 442 my_corr = Corr([None if np.isnan(o.ravel()[0].value) else o for o in list(dat)]) 443 if corr_tag != 'None': 444 my_corr.tag = corr_tag 445 446 my_corr.prange = temp_prange 447 return my_corr 448 449 prog = json_dict.get('program', '') 450 version = json_dict.get('version', '') 451 who = json_dict.get('who', '') 452 date = json_dict.get('date', '') 453 host = json_dict.get('host', '') 454 if prog and verbose: 455 print('Data has been written using %s.' % (prog)) 456 if version and verbose: 457 print('Format version %s' % (version)) 458 if np.any([who, date, host] and verbose): 459 print('Written by %s on %s on host %s' % (who, date, host)) 460 description = json_dict.get('description', '') 461 if description and verbose: 462 print() 463 print('Description: ', description) 464 obsdata = json_dict['obsdata'] 465 ol = [] 466 for io in obsdata: 467 if io['type'] == 'Obs': 468 ol.append(get_Obs_from_dict(io)) 469 elif io['type'] == 'List': 470 ol.append(get_List_from_dict(io)) 471 elif io['type'] == 'Array': 472 ol.append(get_Array_from_dict(io)) 473 elif io['type'] == 'Corr': 474 ol.append(get_Corr_from_dict(io)) 475 else: 476 raise Exception("Unkown datatype.") 477 478 if full_output: 479 retd = {} 480 retd['program'] = prog 481 retd['version'] = version 482 retd['who'] = who 483 retd['date'] = date 484 retd['host'] = host 485 retd['description'] = description 486 retd['obsdata'] = ol 487 488 return retd 489 else: 490 if len(obsdata) == 1: 491 ol = ol[0] 492 493 return ol 494 495 496def import_json_string(json_string, verbose=True, full_output=False): 497 """Reconstruct a list of Obs or structures containing Obs from a json string. 498 499 The following structures are supported: Obs, list, numpy.ndarray, Corr 500 If the list contains only one element, it is unpacked from the list. 501 502 Parameters 503 ---------- 504 json_string : str 505 json string containing the data. 506 verbose : bool 507 Print additional information that was written to the file. 508 full_output : bool 509 If True, a dict containing auxiliary information and the data is returned. 510 If False, only the data is returned. 511 """ 512 513 return _parse_json_dict(json.loads(json_string), verbose, full_output) 514 515 516def load_json(fname, verbose=True, gz=True, full_output=False): 517 """Import a list of Obs or structures containing Obs from a .json(.gz) file. 518 519 The following structures are supported: Obs, list, numpy.ndarray, Corr 520 If the list contains only one element, it is unpacked from the list. 521 522 Parameters 523 ---------- 524 fname : str 525 Filename of the input file. 526 verbose : bool 527 Print additional information that was written to the file. 528 gz : bool 529 If True, assumes that data is gzipped. If False, assumes JSON file. 530 full_output : bool 531 If True, a dict containing auxiliary information and the data is returned. 532 If False, only the data is returned. 533 """ 534 if not fname.endswith('.json') and not fname.endswith('.gz'): 535 fname += '.json' 536 if gz: 537 if not fname.endswith('.gz'): 538 fname += '.gz' 539 with gzip.open(fname, 'r') as fin: 540 d = json.load(fin) 541 else: 542 if fname.endswith('.gz'): 543 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 544 with open(fname, 'r', encoding='utf-8') as fin: 545 d = json.loads(fin.read()) 546 547 return _parse_json_dict(d, verbose, full_output) 548 549 550def _ol_from_dict(ind, reps='DICTOBS'): 551 """Convert a dictionary of Obs objects to a list and a dictionary that contains 552 placeholders instead of the Obs objects. 553 554 Parameters 555 ---------- 556 ind : dict 557 Dict of JSON valid structures and objects that will be exported. 558 At the moment, these object can be either of: Obs, list, numpy.ndarray, Corr. 559 All Obs inside a structure have to be defined on the same set of configurations. 560 reps : str 561 Specify the structure of the placeholder in exported dict to be reps[0-9]+. 562 """ 563 564 obstypes = (Obs, Corr, np.ndarray) 565 566 if not reps.isalnum(): 567 raise Exception('Placeholder string has to be alphanumeric!') 568 ol = [] 569 counter = 0 570 571 def dict_replace_obs(d): 572 nonlocal ol 573 nonlocal counter 574 x = {} 575 for k, v in d.items(): 576 if isinstance(v, dict): 577 v = dict_replace_obs(v) 578 elif isinstance(v, list) and all([isinstance(o, Obs) for o in v]): 579 v = obslist_replace_obs(v) 580 elif isinstance(v, list): 581 v = list_replace_obs(v) 582 elif isinstance(v, obstypes): 583 ol.append(v) 584 v = reps + '%d' % (counter) 585 counter += 1 586 elif isinstance(v, str): 587 if bool(re.match(r'%s[0-9]+' % (reps), v)): 588 raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (v, reps)) 589 x[k] = v 590 return x 591 592 def list_replace_obs(li): 593 nonlocal ol 594 nonlocal counter 595 x = [] 596 for e in li: 597 if isinstance(e, list): 598 e = list_replace_obs(e) 599 elif isinstance(e, list) and all([isinstance(o, Obs) for o in e]): 600 e = obslist_replace_obs(e) 601 elif isinstance(e, dict): 602 e = dict_replace_obs(e) 603 elif isinstance(e, obstypes): 604 ol.append(e) 605 e = reps + '%d' % (counter) 606 counter += 1 607 elif isinstance(e, str): 608 if bool(re.match(r'%s[0-9]+' % (reps), e)): 609 raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (e, reps)) 610 x.append(e) 611 return x 612 613 def obslist_replace_obs(li): 614 nonlocal ol 615 nonlocal counter 616 il = [] 617 for e in li: 618 il.append(e) 619 620 ol.append(il) 621 x = reps + '%d' % (counter) 622 counter += 1 623 return x 624 625 nd = dict_replace_obs(ind) 626 627 return ol, nd 628 629 630def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True): 631 """Export a dict of Obs or structures containing Obs to a .json(.gz) file 632 633 Parameters 634 ---------- 635 od : dict 636 Dict of JSON valid structures and objects that will be exported. 637 At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. 638 All Obs inside a structure have to be defined on the same set of configurations. 639 fname : str 640 Filename of the output file. 641 description : str 642 Optional string that describes the contents of the json file. 643 indent : int 644 Specify the indentation level of the json file. None or 0 is permissible and 645 saves disk space. 646 reps : str 647 Specify the structure of the placeholder in exported dict to be reps[0-9]+. 648 gz : bool 649 If True, the output is a gzipped json. If False, the output is a json file. 650 """ 651 652 if not isinstance(od, dict): 653 raise Exception('od has to be a dictionary. Did you want to use dump_to_json?') 654 655 infostring = ('This JSON file contains a python dictionary that has been parsed to a list of structures. ' 656 'OBSDICT contains the dictionary, where Obs or other structures have been replaced by ' 657 '' + reps + '[0-9]+. The field description contains the additional description of this JSON file. ' 658 'This file may be parsed to a dict with the pyerrors routine load_json_dict.') 659 660 desc_dict = {'INFO': infostring, 'OBSDICT': {}, 'description': description} 661 ol, desc_dict['OBSDICT'] = _ol_from_dict(od, reps=reps) 662 663 dump_to_json(ol, fname, description=desc_dict, indent=indent, gz=gz) 664 665 666def _od_from_list_and_dict(ol, ind, reps='DICTOBS'): 667 """Parse a list of Obs or structures containing Obs and an accompanying 668 dict, where the structures have been replaced by placeholders to a 669 dict that contains the structures. 670 671 The following structures are supported: Obs, list, numpy.ndarray, Corr 672 673 Parameters 674 ---------- 675 ol : list 676 List of objects - 677 At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. 678 All Obs inside a structure have to be defined on the same set of configurations. 679 ind : dict 680 Dict that defines the structure of the resulting dict and contains placeholders 681 reps : str 682 Specify the structure of the placeholder in imported dict to be reps[0-9]+. 683 """ 684 if not reps.isalnum(): 685 raise Exception('Placeholder string has to be alphanumeric!') 686 687 counter = 0 688 689 def dict_replace_string(d): 690 nonlocal counter 691 nonlocal ol 692 x = {} 693 for k, v in d.items(): 694 if isinstance(v, dict): 695 v = dict_replace_string(v) 696 elif isinstance(v, list): 697 v = list_replace_string(v) 698 elif isinstance(v, str) and bool(re.match(r'%s[0-9]+' % (reps), v)): 699 index = int(v[len(reps):]) 700 v = ol[index] 701 counter += 1 702 x[k] = v 703 return x 704 705 def list_replace_string(li): 706 nonlocal counter 707 nonlocal ol 708 x = [] 709 for e in li: 710 if isinstance(e, list): 711 e = list_replace_string(e) 712 elif isinstance(e, dict): 713 e = dict_replace_string(e) 714 elif isinstance(e, str) and bool(re.match(r'%s[0-9]+' % (reps), e)): 715 index = int(e[len(reps):]) 716 e = ol[index] 717 counter += 1 718 x.append(e) 719 return x 720 721 nd = dict_replace_string(ind) 722 723 if counter == 0: 724 raise Exception('No placeholder has been replaced! Check if reps is set correctly.') 725 726 return nd 727 728 729def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'): 730 """Import a dict of Obs or structures containing Obs from a .json(.gz) file. 731 732 The following structures are supported: Obs, list, numpy.ndarray, Corr 733 734 Parameters 735 ---------- 736 fname : str 737 Filename of the input file. 738 verbose : bool 739 Print additional information that was written to the file. 740 gz : bool 741 If True, assumes that data is gzipped. If False, assumes JSON file. 742 full_output : bool 743 If True, a dict containing auxiliary information and the data is returned. 744 If False, only the data is returned. 745 reps : str 746 Specify the structure of the placeholder in imported dict to be reps[0-9]+. 747 """ 748 indata = load_json(fname, verbose=verbose, gz=gz, full_output=True) 749 description = indata['description']['description'] 750 indict = indata['description']['OBSDICT'] 751 ol = indata['obsdata'] 752 od = _od_from_list_and_dict(ol, indict, reps=reps) 753 754 if full_output: 755 indata['description'] = description 756 indata['obsdata'] = od 757 return indata 758 else: 759 return od
View Source
18def create_json_string(ol, description='', indent=1): 19 """Generate the string for the export of a list of Obs or structures containing Obs 20 to a .json(.gz) file 21 22 Parameters 23 ---------- 24 ol : list 25 List of objects that will be exported. At the moment, these objects can be 26 either of: Obs, list, numpy.ndarray, Corr. 27 All Obs inside a structure have to be defined on the same set of configurations. 28 description : str 29 Optional string that describes the contents of the json file. 30 indent : int 31 Specify the indentation level of the json file. None or 0 is permissible and 32 saves disk space. 33 """ 34 35 def _default(self, obj): 36 return str(obj) 37 my_encoder = json.JSONEncoder 38 _default.default = json.JSONEncoder().default 39 my_encoder.default = _default 40 41 class Deltalist: 42 __slots__ = ['cnfg', 'deltas'] 43 44 def __init__(self, li): 45 self.cnfg = li[0] 46 self.deltas = li[1:] 47 48 def __repr__(self): 49 s = '[%d' % (self.cnfg) 50 for d in self.deltas: 51 s += ', %1.15e' % (d) 52 s += ']' 53 return s 54 55 def __str__(self): 56 return self.__repr__() 57 58 class Floatlist: 59 __slots__ = ['li'] 60 61 def __init__(self, li): 62 self.li = list(li) 63 64 def __repr__(self): 65 s = '[' 66 for i in range(len(self.li)): 67 if i > 0: 68 s += ', ' 69 s += '%1.15e' % (self.li[i]) 70 s += ']' 71 return s 72 73 def __str__(self): 74 return self.__repr__() 75 76 def _gen_data_d_from_list(ol): 77 dl = [] 78 for name in ol[0].mc_names: 79 ed = {} 80 ed['id'] = name 81 ed['replica'] = [] 82 for r_name in ol[0].e_content[name]: 83 rd = {} 84 rd['name'] = r_name 85 if ol[0].is_merged.get(r_name, False): 86 rd['is_merged'] = True 87 rd['deltas'] = [] 88 for i in range(len(ol[0].idl[r_name])): 89 rd['deltas'].append([ol[0].idl[r_name][i]]) 90 for o in ol: 91 rd['deltas'][-1].append(o.deltas[r_name][i]) 92 rd['deltas'][-1] = Deltalist(rd['deltas'][-1]) 93 ed['replica'].append(rd) 94 dl.append(ed) 95 return dl 96 97 def _gen_cdata_d_from_list(ol): 98 dl = [] 99 for name in ol[0].cov_names: 100 ed = {} 101 ed['id'] = name 102 ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',') 103 ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov)) 104 ncov = ol[0].covobs[name].cov.shape[0] 105 ed['grad'] = [] 106 for i in range(ncov): 107 ed['grad'].append([]) 108 for o in ol: 109 ed['grad'][-1].append(o.covobs[name].grad[i][0]) 110 ed['grad'][-1] = Floatlist(ed['grad'][-1]) 111 dl.append(ed) 112 return dl 113 114 def write_Obs_to_dict(o): 115 d = {} 116 d['type'] = 'Obs' 117 d['layout'] = '1' 118 if o.tag: 119 d['tag'] = [o.tag] 120 if o.reweighted: 121 d['reweighted'] = o.reweighted 122 d['value'] = [o.value] 123 data = _gen_data_d_from_list([o]) 124 if len(data) > 0: 125 d['data'] = data 126 cdata = _gen_cdata_d_from_list([o]) 127 if len(cdata) > 0: 128 d['cdata'] = cdata 129 return d 130 131 def write_List_to_dict(ol): 132 _assert_equal_properties(ol) 133 d = {} 134 d['type'] = 'List' 135 d['layout'] = '%d' % len(ol) 136 taglist = [o.tag for o in ol] 137 if np.any([tag is not None for tag in taglist]): 138 d['tag'] = taglist 139 if ol[0].reweighted: 140 d['reweighted'] = ol[0].reweighted 141 d['value'] = [o.value for o in ol] 142 data = _gen_data_d_from_list(ol) 143 if len(data) > 0: 144 d['data'] = data 145 cdata = _gen_cdata_d_from_list(ol) 146 if len(cdata) > 0: 147 d['cdata'] = cdata 148 return d 149 150 def write_Array_to_dict(oa): 151 ol = np.ravel(oa) 152 _assert_equal_properties(ol) 153 d = {} 154 d['type'] = 'Array' 155 d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',') 156 taglist = [o.tag for o in ol] 157 if np.any([tag is not None for tag in taglist]): 158 d['tag'] = taglist 159 if ol[0].reweighted: 160 d['reweighted'] = ol[0].reweighted 161 d['value'] = [o.value for o in ol] 162 data = _gen_data_d_from_list(ol) 163 if len(data) > 0: 164 d['data'] = data 165 cdata = _gen_cdata_d_from_list(ol) 166 if len(cdata) > 0: 167 d['cdata'] = cdata 168 return d 169 170 def _nan_Obs_like(obs): 171 samples = [] 172 names = [] 173 idl = [] 174 for key, value in obs.idl.items(): 175 samples.append([np.nan] * len(value)) 176 names.append(key) 177 idl.append(value) 178 my_obs = Obs(samples, names, idl) 179 my_obs._covobs = obs._covobs 180 for name in obs._covobs: 181 my_obs.names.append(name) 182 my_obs.reweighted = obs.reweighted 183 my_obs.is_merged = obs.is_merged 184 return my_obs 185 186 def write_Corr_to_dict(my_corr): 187 first_not_none = next(i for i, j in enumerate(my_corr.content) if np.all(j)) 188 dummy_array = np.empty((my_corr.N, my_corr.N), dtype=object) 189 dummy_array[:] = _nan_Obs_like(my_corr.content[first_not_none].ravel()[0]) 190 content = [o if o is not None else dummy_array for o in my_corr.content] 191 dat = write_Array_to_dict(np.array(content, dtype=object)) 192 dat['type'] = 'Corr' 193 corr_meta_data = str(my_corr.tag) 194 if 'tag' in dat.keys(): 195 dat['tag'].append(corr_meta_data) 196 else: 197 dat['tag'] = [corr_meta_data] 198 taglist = dat['tag'] 199 dat['tag'] = {} # tag is now a dictionary, that contains the previous taglist in the key "tag" 200 dat['tag']['tag'] = taglist 201 if my_corr.prange is not None: 202 dat['tag']['prange'] = my_corr.prange 203 return dat 204 205 if not isinstance(ol, list): 206 ol = [ol] 207 208 d = {} 209 d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__) 210 d['version'] = '1.0' 211 d['who'] = getpass.getuser() 212 d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z') 213 d['host'] = socket.gethostname() + ', ' + platform.platform() 214 215 if description: 216 d['description'] = description 217 d['obsdata'] = [] 218 for io in ol: 219 if isinstance(io, Obs): 220 d['obsdata'].append(write_Obs_to_dict(io)) 221 elif isinstance(io, list): 222 d['obsdata'].append(write_List_to_dict(io)) 223 elif isinstance(io, np.ndarray): 224 d['obsdata'].append(write_Array_to_dict(io)) 225 elif isinstance(io, Corr): 226 d['obsdata'].append(write_Corr_to_dict(io)) 227 else: 228 raise Exception("Unkown datatype.") 229 230 jsonstring = '' 231 for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d): 232 jsonstring += chunk 233 234 del d 235 gc.collect() 236 237 def remove_quotationmarks_split(split): 238 """Workaround for un-quoting of delta lists, adds 5% of work 239 but is save, compared to a simple replace that could destroy the structure 240 """ 241 deltas = False 242 for i in range(len(split)): 243 if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]: 244 deltas = True 245 if deltas: 246 split[i] = split[i].replace('"[', '[').replace(']"', ']') 247 if split[i][-1] == ']': 248 deltas = False 249 return '\n'.join(split) 250 251 jsonstring = jsonstring.split('\n') 252 jsonstring = remove_quotationmarks_split(jsonstring) 253 jsonstring = jsonstring.replace('nan', 'NaN') 254 return jsonstring
Generate the string for the export of a list of Obs or structures containing Obs to a .json(.gz) file
Parameters
- ol (list): List of objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
- description (str): Optional string that describes the contents of the json file.
- indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
View Source
257def dump_to_json(ol, fname, description='', indent=1, gz=True): 258 """Export a list of Obs or structures containing Obs to a .json(.gz) file 259 260 Parameters 261 ---------- 262 ol : list 263 List of objects that will be exported. At the moment, these objects can be 264 either of: Obs, list, numpy.ndarray, Corr. 265 All Obs inside a structure have to be defined on the same set of configurations. 266 fname : str 267 Filename of the output file. 268 description : str 269 Optional string that describes the contents of the json file. 270 indent : int 271 Specify the indentation level of the json file. None or 0 is permissible and 272 saves disk space. 273 gz : bool 274 If True, the output is a gzipped json. If False, the output is a json file. 275 """ 276 277 jsonstring = create_json_string(ol, description, indent) 278 279 if not fname.endswith('.json') and not fname.endswith('.gz'): 280 fname += '.json' 281 282 if gz: 283 if not fname.endswith('.gz'): 284 fname += '.gz' 285 286 fp = gzip.open(fname, 'wb') 287 fp.write(jsonstring.encode('utf-8')) 288 else: 289 fp = open(fname, 'w', encoding='utf-8') 290 fp.write(jsonstring) 291 fp.close()
Export a list of Obs or structures containing Obs to a .json(.gz) file
Parameters
- ol (list): List of objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
- fname (str): Filename of the output file.
- description (str): Optional string that describes the contents of the json file.
- indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
- gz (bool): If True, the output is a gzipped json. If False, the output is a json file.
View Source
497def import_json_string(json_string, verbose=True, full_output=False): 498 """Reconstruct a list of Obs or structures containing Obs from a json string. 499 500 The following structures are supported: Obs, list, numpy.ndarray, Corr 501 If the list contains only one element, it is unpacked from the list. 502 503 Parameters 504 ---------- 505 json_string : str 506 json string containing the data. 507 verbose : bool 508 Print additional information that was written to the file. 509 full_output : bool 510 If True, a dict containing auxiliary information and the data is returned. 511 If False, only the data is returned. 512 """ 513 514 return _parse_json_dict(json.loads(json_string), verbose, full_output)
Reconstruct a list of Obs or structures containing Obs from a json string.
The following structures are supported: Obs, list, numpy.ndarray, Corr If the list contains only one element, it is unpacked from the list.
Parameters
- json_string (str): json string containing the data.
- verbose (bool): Print additional information that was written to the file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
View Source
517def load_json(fname, verbose=True, gz=True, full_output=False): 518 """Import a list of Obs or structures containing Obs from a .json(.gz) file. 519 520 The following structures are supported: Obs, list, numpy.ndarray, Corr 521 If the list contains only one element, it is unpacked from the list. 522 523 Parameters 524 ---------- 525 fname : str 526 Filename of the input file. 527 verbose : bool 528 Print additional information that was written to the file. 529 gz : bool 530 If True, assumes that data is gzipped. If False, assumes JSON file. 531 full_output : bool 532 If True, a dict containing auxiliary information and the data is returned. 533 If False, only the data is returned. 534 """ 535 if not fname.endswith('.json') and not fname.endswith('.gz'): 536 fname += '.json' 537 if gz: 538 if not fname.endswith('.gz'): 539 fname += '.gz' 540 with gzip.open(fname, 'r') as fin: 541 d = json.load(fin) 542 else: 543 if fname.endswith('.gz'): 544 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 545 with open(fname, 'r', encoding='utf-8') as fin: 546 d = json.loads(fin.read()) 547 548 return _parse_json_dict(d, verbose, full_output)
Import a list of Obs or structures containing Obs from a .json(.gz) file.
The following structures are supported: Obs, list, numpy.ndarray, Corr If the list contains only one element, it is unpacked from the list.
Parameters
- fname (str): Filename of the input file.
- verbose (bool): Print additional information that was written to the file.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
View Source
631def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True): 632 """Export a dict of Obs or structures containing Obs to a .json(.gz) file 633 634 Parameters 635 ---------- 636 od : dict 637 Dict of JSON valid structures and objects that will be exported. 638 At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. 639 All Obs inside a structure have to be defined on the same set of configurations. 640 fname : str 641 Filename of the output file. 642 description : str 643 Optional string that describes the contents of the json file. 644 indent : int 645 Specify the indentation level of the json file. None or 0 is permissible and 646 saves disk space. 647 reps : str 648 Specify the structure of the placeholder in exported dict to be reps[0-9]+. 649 gz : bool 650 If True, the output is a gzipped json. If False, the output is a json file. 651 """ 652 653 if not isinstance(od, dict): 654 raise Exception('od has to be a dictionary. Did you want to use dump_to_json?') 655 656 infostring = ('This JSON file contains a python dictionary that has been parsed to a list of structures. ' 657 'OBSDICT contains the dictionary, where Obs or other structures have been replaced by ' 658 '' + reps + '[0-9]+. The field description contains the additional description of this JSON file. ' 659 'This file may be parsed to a dict with the pyerrors routine load_json_dict.') 660 661 desc_dict = {'INFO': infostring, 'OBSDICT': {}, 'description': description} 662 ol, desc_dict['OBSDICT'] = _ol_from_dict(od, reps=reps) 663 664 dump_to_json(ol, fname, description=desc_dict, indent=indent, gz=gz)
Export a dict of Obs or structures containing Obs to a .json(.gz) file
Parameters
- od (dict): Dict of JSON valid structures and objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
- fname (str): Filename of the output file.
- description (str): Optional string that describes the contents of the json file.
- indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
- reps (str): Specify the structure of the placeholder in exported dict to be reps[0-9]+.
- gz (bool): If True, the output is a gzipped json. If False, the output is a json file.
View Source
730def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'): 731 """Import a dict of Obs or structures containing Obs from a .json(.gz) file. 732 733 The following structures are supported: Obs, list, numpy.ndarray, Corr 734 735 Parameters 736 ---------- 737 fname : str 738 Filename of the input file. 739 verbose : bool 740 Print additional information that was written to the file. 741 gz : bool 742 If True, assumes that data is gzipped. If False, assumes JSON file. 743 full_output : bool 744 If True, a dict containing auxiliary information and the data is returned. 745 If False, only the data is returned. 746 reps : str 747 Specify the structure of the placeholder in imported dict to be reps[0-9]+. 748 """ 749 indata = load_json(fname, verbose=verbose, gz=gz, full_output=True) 750 description = indata['description']['description'] 751 indict = indata['description']['OBSDICT'] 752 ol = indata['obsdata'] 753 od = _od_from_list_and_dict(ol, indict, reps=reps) 754 755 if full_output: 756 indata['description'] = description 757 indata['obsdata'] = od 758 return indata 759 else: 760 return od
Import a dict of Obs or structures containing Obs from a .json(.gz) file.
The following structures are supported: Obs, list, numpy.ndarray, Corr
Parameters
- fname (str): Filename of the input file.
- verbose (bool): Print additional information that was written to the file.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
- reps (str): Specify the structure of the placeholder in imported dict to be reps[0-9]+.