pyerrors.input.dobs
1from collections import defaultdict 2import gzip 3import lxml.etree as et 4import getpass 5import socket 6import datetime 7import json 8import warnings 9import numpy as np 10from ..obs import Obs 11from ..obs import _merge_idx 12from ..covobs import Covobs 13from .. import version as pyerrorsversion 14 15 16# Based on https://stackoverflow.com/a/10076823 17def _etree_to_dict(t): 18 """ Convert the content of an XML file to a python dict""" 19 d = {t.tag: {} if t.attrib else None} 20 children = list(t) 21 if children: 22 dd = defaultdict(list) 23 for dc in map(_etree_to_dict, children): 24 for k, v in dc.items(): 25 dd[k].append(v) 26 d = {t.tag: {k: v[0] if len(v) == 1 else v 27 for k, v in dd.items()}} 28 if t.attrib: 29 d[t.tag].update(('@' + k, v) 30 for k, v in t.attrib.items()) 31 if t.text: 32 text = t.text.strip() 33 if children or t.attrib: 34 if text: 35 d[t.tag]['#data'] = [text] 36 else: 37 d[t.tag] = text 38 return d 39 40 41def _dict_to_xmlstring(d): 42 if isinstance(d, dict): 43 iters = '' 44 for k in d: 45 if k.startswith('#'): 46 for la in d[k]: 47 iters += la 48 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 49 return iters 50 if isinstance(d[k], dict): 51 iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 52 elif isinstance(d[k], str): 53 if len(d[k]) > 100: 54 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 55 else: 56 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 57 elif isinstance(d[k], list): 58 for i in range(len(d[k])): 59 iters += _dict_to_xmlstring(d[k][i]) 60 elif not d[k]: 61 return '\n' 62 else: 63 raise Exception('Type', type(d[k]), 'not supported in export!') 64 else: 65 raise Exception('Type', type(d), 'not supported in export!') 66 return iters 67 68 69def _dict_to_xmlstring_spaces(d, space=' '): 70 s = _dict_to_xmlstring(d) 71 o = '' 72 c = 0 73 cm = False 74 for li in s.split('\n'): 75 if li.startswith('<%s' % ('/')): 76 c -= 1 77 cm = True 78 for i in range(c): 79 o += space 80 o += li + '\n' 81 if li.startswith('<') and not cm: 82 if not '<%s' % ('/') in li: 83 c += 1 84 cm = False 85 return o 86 87 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 89 """Export a list of Obs or structures containing Obs to an xml string 90 according to the Zeuthen pobs format. 91 92 Tags are not written or recovered automatically. The separator | is removed from the replica names. 93 94 Parameters 95 ---------- 96 obsl : list 97 List of Obs that will be exported. 98 The Obs inside a structure have to be defined on the same ensemble. 99 name : str 100 The name of the observable. 101 spec : str 102 Optional string that describes the contents of the file. 103 origin : str 104 Specify where the data has its origin. 105 symbol : list 106 A list of symbols that describe the observables to be written. May be empty. 107 enstag : str 108 Enstag that is written to pobs. If None, the ensemble name is used. 109 """ 110 111 od = {} 112 ename = obsl[0].e_names[0] 113 names = list(obsl[0].deltas.keys()) 114 nr = len(names) 115 onames = [name.replace('|', '') for name in names] 116 for o in obsl: 117 if len(o.e_names) != 1: 118 raise Exception('You try to export dobs to obs!') 119 if o.e_names[0] != ename: 120 raise Exception('You try to export dobs to obs!') 121 if len(o.deltas.keys()) != nr: 122 raise Exception('Incompatible obses in list') 123 od['observables'] = {} 124 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 125 od['observables']['origin'] = { 126 'who': getpass.getuser(), 127 'date': str(datetime.datetime.now())[:-7], 128 'host': socket.gethostname(), 129 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 130 od['observables']['pobs'] = {} 131 pd = od['observables']['pobs'] 132 pd['spec'] = spec 133 pd['origin'] = origin 134 pd['name'] = name 135 if enstag: 136 if not isinstance(enstag, str): 137 raise Exception('enstag has to be a string!') 138 pd['enstag'] = enstag 139 else: 140 pd['enstag'] = ename 141 pd['nr'] = '%d' % (nr) 142 pd['array'] = [] 143 osymbol = 'cfg' 144 if not isinstance(symbol, list): 145 raise Exception('Symbol has to be a list!') 146 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 147 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 148 for s in symbol: 149 osymbol += ' %s' % s 150 for r in range(nr): 151 ad = {} 152 ad['id'] = onames[r] 153 Nconf = len(obsl[0].deltas[names[r]]) 154 layout = '%d i f%d' % (Nconf, len(obsl)) 155 ad['layout'] = layout 156 ad['symbol'] = osymbol 157 data = '' 158 for c in range(Nconf): 159 data += '%d ' % obsl[0].idl[names[r]][c] 160 for o in obsl: 161 num = o.deltas[names[r]][c] + o.r_values[names[r]] 162 if num == 0: 163 data += '0 ' 164 else: 165 data += '%1.16e ' % (num) 166 data += '\n' 167 ad['#data'] = data 168 pd['array'].append(ad) 169 170 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 171 return rs 172 173 174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 175 """Export a list of Obs or structures containing Obs to a .xml.gz file 176 according to the Zeuthen pobs format. 177 178 Tags are not written or recovered automatically. The separator | is removed from the replica names. 179 180 Parameters 181 ---------- 182 obsl : list 183 List of Obs that will be exported. 184 The Obs inside a structure have to be defined on the same ensemble. 185 fname : str 186 Filename of the output file. 187 name : str 188 The name of the observable. 189 spec : str 190 Optional string that describes the contents of the file. 191 origin : str 192 Specify where the data has its origin. 193 symbol : list 194 A list of symbols that describe the observables to be written. May be empty. 195 enstag : str 196 Enstag that is written to pobs. If None, the ensemble name is used. 197 gz : bool 198 If True, the output is a gzipped xml. If False, the output is an xml file. 199 """ 200 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 201 202 if not fname.endswith('.xml') and not fname.endswith('.gz'): 203 fname += '.xml' 204 205 if gz: 206 if not fname.endswith('.gz'): 207 fname += '.gz' 208 209 fp = gzip.open(fname, 'wb') 210 fp.write(pobsstring.encode('utf-8')) 211 else: 212 fp = open(fname, 'w', encoding='utf-8') 213 fp.write(pobsstring) 214 fp.close() 215 216 217def _import_data(string): 218 return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]") 219 220 221def _check(condition): 222 if not condition: 223 raise Exception("XML file format not supported") 224 225 226class _NoTagInDataError(Exception): 227 """Raised when tag is not in data""" 228 def __init__(self, tag): 229 self.tag = tag 230 super().__init__('Tag %s not in data!' % (self.tag)) 231 232 233def _find_tag(dat, tag): 234 for i in range(len(dat)): 235 if dat[i].tag == tag: 236 return i 237 raise _NoTagInDataError(tag) 238 239 240def _import_array(arr): 241 name = arr[_find_tag(arr, 'id')].text.strip() 242 index = _find_tag(arr, 'layout') 243 try: 244 sindex = _find_tag(arr, 'symbol') 245 except _NoTagInDataError: 246 sindex = 0 247 if sindex > index: 248 tmp = _import_data(arr[sindex].tail) 249 else: 250 tmp = _import_data(arr[index].tail) 251 252 li = arr[index].text.strip() 253 m = li.split() 254 if m[1] == "i" and m[2][0] == "f": 255 nc = int(m[0]) 256 na = int(m[2].lstrip('f')) 257 _dat = [] 258 mask = [] 259 for a in range(na): 260 mask += [a] 261 _dat += [np.array(tmp[1 + a:: na + 1])] 262 _check(len(tmp[0:: na + 1]) == nc) 263 return [name, tmp[0:: na + 1], mask, _dat] 264 elif m[1][0] == 'f' and len(m) < 3: 265 sh = (int(m[0]), int(m[1].lstrip('f'))) 266 return np.reshape(tmp, sh) 267 elif any(['f' in s for s in m]): 268 for si in range(len(m)): 269 if m[si] == 'f': 270 break 271 sh = [int(m[i]) for i in range(si)] 272 return np.reshape(tmp, sh) 273 else: 274 print(name, m) 275 _check(False) 276 277 278def _import_rdata(rd): 279 name, idx, mask, deltas = _import_array(rd) 280 return deltas, name, idx 281 282 283def _import_cdata(cd): 284 _check(cd[0].tag == "id") 285 _check(cd[1][0].text.strip() == "cov") 286 cov = _import_array(cd[1]) 287 grad = _import_array(cd[2]) 288 return cd[0].text.strip(), cov, grad 289 290 291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 292 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 293 294 Tags are not written or recovered automatically. 295 296 Parameters 297 ---------- 298 fname : str 299 Filename of the input file. 300 full_output : bool 301 If True, a dict containing auxiliary information and the data is returned. 302 If False, only the data is returned as list. 303 separatior_insertion: str or int 304 str: replace all occurences of "separator_insertion" within the replica names 305 by "|%s" % (separator_insertion) when constructing the names of the replica. 306 int: Insert the separator "|" at the position given by separator_insertion. 307 None (default): Replica names remain unchanged. 308 """ 309 310 if not fname.endswith('.xml') and not fname.endswith('.gz'): 311 fname += '.xml' 312 if gz: 313 if not fname.endswith('.gz'): 314 fname += '.gz' 315 with gzip.open(fname, 'r') as fin: 316 content = fin.read() 317 else: 318 if fname.endswith('.gz'): 319 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 320 with open(fname, 'r') as fin: 321 content = fin.read() 322 323 # parse xml file content 324 root = et.fromstring(content) 325 326 _check(root[2].tag == 'pobs') 327 pobs = root[2] 328 329 version = root[0][1].text.strip() 330 331 _check(root[1].tag == 'origin') 332 file_origin = _etree_to_dict(root[1])['origin'] 333 334 deltas = [] 335 names = [] 336 idl = [] 337 for i in range(5, len(pobs)): 338 delta, name, idx = _import_rdata(pobs[i]) 339 deltas.append(delta) 340 if separator_insertion is None: 341 pass 342 elif isinstance(separator_insertion, int): 343 name = name[:separator_insertion] + '|' + name[separator_insertion:] 344 elif isinstance(separator_insertion, str): 345 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 346 else: 347 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 348 names.append(name) 349 idl.append(idx) 350 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 351 352 descriptiond = {} 353 for i in range(4): 354 descriptiond[pobs[i].tag] = pobs[i].text.strip() 355 356 _check(pobs[4].tag == "nr") 357 358 _check(pobs[5].tag == 'array') 359 if pobs[5][1].tag == 'symbol': 360 symbol = pobs[5][1].text.strip() 361 descriptiond['symbol'] = symbol 362 363 if full_output: 364 retd = {} 365 tool = file_origin.get('tool', None) 366 if tool: 367 program = tool['name'] + ' ' + tool['version'] 368 else: 369 program = '' 370 retd['program'] = program 371 retd['version'] = version 372 retd['who'] = file_origin['who'] 373 retd['date'] = file_origin['date'] 374 retd['host'] = file_origin['host'] 375 retd['description'] = descriptiond 376 retd['obsdata'] = res 377 return retd 378 else: 379 return res 380 381 382# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py 383def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 384 """Import a list of Obs from a string in the Zeuthen dobs format. 385 386 Tags are not written or recovered automatically. 387 388 Parameters 389 ---------- 390 content : str 391 XML string containing the data 392 noemtpy : bool 393 If True, ensembles with no contribution to the Obs are not included. 394 If False, ensembles are included as written in the file, possibly with vanishing entries. 395 full_output : bool 396 If True, a dict containing auxiliary information and the data is returned. 397 If False, only the data is returned as list. 398 separatior_insertion: str, int or bool 399 str: replace all occurences of "separator_insertion" within the replica names 400 by "|%s" % (separator_insertion) when constructing the names of the replica. 401 int: Insert the separator "|" at the position given by separator_insertion. 402 True (default): separator "|" is inserted after len(ensname), assuming that the 403 ensemble name is a prefix to the replica name. 404 None or False: No separator is inserted. 405 """ 406 407 root = et.fromstring(content) 408 409 _check(root.tag == 'OBSERVABLES') 410 _check(root[0].tag == 'SCHEMA') 411 version = root[0][1].text.strip() 412 413 _check(root[1].tag == 'origin') 414 file_origin = _etree_to_dict(root[1])['origin'] 415 416 _check(root[2].tag == 'dobs') 417 418 dobs = root[2] 419 420 descriptiond = {} 421 for i in range(3): 422 descriptiond[dobs[i].tag] = dobs[i].text.strip() 423 424 _check(dobs[3].tag == 'array') 425 426 symbol = [] 427 if dobs[3][1].tag == 'symbol': 428 symbol = dobs[3][1].text.strip() 429 descriptiond['symbol'] = symbol 430 mean = _import_array(dobs[3])[0] 431 432 _check(dobs[4].tag == "ne") 433 ne = int(dobs[4].text.strip()) 434 _check(dobs[5].tag == "nc") 435 nc = int(dobs[5].text.strip()) 436 437 idld = {} 438 deltad = {} 439 covd = {} 440 gradd = {} 441 names = [] 442 e_names = [] 443 enstags = {} 444 for k in range(6, len(list(dobs))): 445 if dobs[k].tag == "edata": 446 _check(dobs[k][0].tag == "enstag") 447 ename = dobs[k][0].text.strip() 448 e_names.append(ename) 449 _check(dobs[k][1].tag == "nr") 450 R = int(dobs[k][1].text.strip()) 451 for i in range(2, 2 + R): 452 deltas, rname, idx = _import_rdata(dobs[k][i]) 453 if separator_insertion is None or False: 454 pass 455 elif separator_insertion is True: 456 if rname.startswith(ename): 457 rname = rname[:len(ename)] + '|' + rname[len(ename):] 458 elif isinstance(separator_insertion, int): 459 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 460 elif isinstance(separator_insertion, str): 461 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 462 else: 463 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 464 if '|' in rname: 465 new_ename = rname[:rname.index('|')] 466 else: 467 new_ename = ename 468 enstags[new_ename] = ename 469 idld[rname] = idx 470 deltad[rname] = deltas 471 names.append(rname) 472 elif dobs[k].tag == "cdata": 473 cname, cov, grad = _import_cdata(dobs[k]) 474 covd[cname] = cov 475 if grad.shape[1] == 1: 476 gradd[cname] = [grad for i in range(len(mean))] 477 else: 478 gradd[cname] = grad.T 479 else: 480 _check(False) 481 names = list(set(names)) 482 483 for name in names: 484 for i in range(len(deltad[name])): 485 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 486 487 res = [] 488 for i in range(len(mean)): 489 deltas = [] 490 idl = [] 491 obs_names = [] 492 for name in names: 493 h = np.unique(deltad[name][i]) 494 if len(h) == 1 and np.all(h == mean[i]) and noempty: 495 continue 496 deltas.append(deltad[name][i]) 497 obs_names.append(name) 498 idl.append(idld[name]) 499 res.append(Obs(deltas, obs_names, idl=idl)) 500 res[-1]._value = mean[i] 501 _check(len(e_names) == ne) 502 503 cnames = list(covd.keys()) 504 for i in range(len(res)): 505 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 506 if noempty: 507 for name in cnames: 508 if np.all(new_covobs[name].grad == 0): 509 del new_covobs[name] 510 cnames_loc = list(new_covobs.keys()) 511 else: 512 cnames_loc = cnames 513 for name in cnames_loc: 514 res[i].names.append(name) 515 res[i].shape[name] = 1 516 res[i].idl[name] = [] 517 res[i]._covobs = new_covobs 518 519 if symbol: 520 for i in range(len(res)): 521 res[i].tag = symbol[i] 522 if res[i].tag == 'None': 523 res[i].tag = None 524 if not noempty: 525 _check(len(res[0].covobs.keys()) == nc) 526 if full_output: 527 retd = {} 528 tool = file_origin.get('tool', None) 529 if tool: 530 program = tool['name'] + ' ' + tool['version'] 531 else: 532 program = '' 533 retd['program'] = program 534 retd['version'] = version 535 retd['who'] = file_origin['who'] 536 retd['date'] = file_origin['date'] 537 retd['host'] = file_origin['host'] 538 retd['description'] = descriptiond 539 retd['enstags'] = enstags 540 retd['obsdata'] = res 541 return retd 542 else: 543 return res 544 545 546def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 547 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 548 549 Tags are not written or recovered automatically. 550 551 Parameters 552 ---------- 553 fname : str 554 Filename of the input file. 555 noemtpy : bool 556 If True, ensembles with no contribution to the Obs are not included. 557 If False, ensembles are included as written in the file. 558 full_output : bool 559 If True, a dict containing auxiliary information and the data is returned. 560 If False, only the data is returned as list. 561 gz : bool 562 If True, assumes that data is gzipped. If False, assumes XML file. 563 separatior_insertion: str, int or bool 564 str: replace all occurences of "separator_insertion" within the replica names 565 by "|%s" % (separator_insertion) when constructing the names of the replica. 566 int: Insert the separator "|" at the position given by separator_insertion. 567 True (default): separator "|" is inserted after len(ensname), assuming that the 568 ensemble name is a prefix to the replica name. 569 None or False: No separator is inserted. 570 """ 571 572 if not fname.endswith('.xml') and not fname.endswith('.gz'): 573 fname += '.xml' 574 if gz: 575 if not fname.endswith('.gz'): 576 fname += '.gz' 577 with gzip.open(fname, 'r') as fin: 578 content = fin.read() 579 else: 580 if fname.endswith('.gz'): 581 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 582 with open(fname, 'r') as fin: 583 content = fin.read() 584 585 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) 586 587 588def _dobsdict_to_xmlstring(d): 589 if isinstance(d, dict): 590 iters = '' 591 for k in d: 592 if k.startswith('#value'): 593 for li in d[k]: 594 iters += li 595 return iters + '\n' 596 elif k.startswith('#'): 597 for li in d[k]: 598 iters += li 599 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 600 return iters 601 if isinstance(d[k], dict): 602 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 603 elif isinstance(d[k], str): 604 if len(d[k]) > 100: 605 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 606 else: 607 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 608 elif isinstance(d[k], list): 609 tmps = '' 610 if k in ['edata', 'cdata']: 611 for i in range(len(d[k])): 612 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) 613 else: 614 for i in range(len(d[k])): 615 tmps += _dobsdict_to_xmlstring(d[k][i]) 616 iters += tmps 617 elif isinstance(d[k], (int, float)): 618 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) 619 elif not d[k]: 620 return '\n' 621 else: 622 raise Exception('Type', type(d[k]), 'not supported in export!') 623 else: 624 raise Exception('Type', type(d), 'not supported in export!') 625 return iters 626 627 628def _dobsdict_to_xmlstring_spaces(d, space=' '): 629 s = _dobsdict_to_xmlstring(d) 630 o = '' 631 c = 0 632 cm = False 633 for li in s.split('\n'): 634 if li.startswith('<%s' % ('/')): 635 c -= 1 636 cm = True 637 for i in range(c): 638 o += space 639 o += li + '\n' 640 if li.startswith('<') and not cm: 641 if not '<%s' % ('/') in li: 642 c += 1 643 cm = False 644 return o 645 646 647def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 648 """Generate the string for the export of a list of Obs or structures containing Obs 649 to a .xml.gz file according to the Zeuthen dobs format. 650 651 Tags are not written or recovered automatically. The separator |is removed from the replica names. 652 653 Parameters 654 ---------- 655 obsl : list 656 List of Obs that will be exported. 657 The Obs inside a structure do not have to be defined on the same set of configurations, 658 but the storage requirement is increased, if this is not the case. 659 name : str 660 The name of the observable. 661 spec : str 662 Optional string that describes the contents of the file. 663 origin : str 664 Specify where the data has its origin. 665 symbol : list 666 A list of symbols that describe the observables to be written. May be empty. 667 who : str 668 Provide the name of the person that exports the data. 669 enstags : dict 670 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 671 Otherwise, the ensemble name is used. 672 """ 673 if enstags is None: 674 enstags = {} 675 od = {} 676 r_names = [] 677 for o in obsl: 678 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 679 r_names = sorted(set(r_names)) 680 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 681 for tmpname in mc_names: 682 if tmpname not in enstags: 683 enstags[tmpname] = tmpname 684 ne = len(set(mc_names)) 685 cov_names = [] 686 for o in obsl: 687 cov_names += list(o.cov_names) 688 cov_names = sorted(set(cov_names)) 689 nc = len(set(cov_names)) 690 od['OBSERVABLES'] = {} 691 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 692 if who is None: 693 who = getpass.getuser() 694 od['OBSERVABLES']['origin'] = { 695 'who': who, 696 'date': str(datetime.datetime.now())[:-7], 697 'host': socket.gethostname(), 698 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 699 od['OBSERVABLES']['dobs'] = {} 700 pd = od['OBSERVABLES']['dobs'] 701 pd['spec'] = spec 702 pd['origin'] = origin 703 pd['name'] = name 704 pd['array'] = {} 705 pd['array']['id'] = 'val' 706 pd['array']['layout'] = '1 f%d' % (len(obsl)) 707 osymbol = '' 708 if symbol: 709 if not isinstance(symbol, list): 710 raise Exception('Symbol has to be a list!') 711 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 712 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 713 osymbol = symbol[0] 714 for s in symbol[1:]: 715 osymbol += ' %s' % s 716 pd['array']['symbol'] = osymbol 717 718 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 719 pd['ne'] = '%d' % (ne) 720 pd['nc'] = '%d' % (nc) 721 pd['edata'] = [] 722 for name in mc_names: 723 ed = {} 724 ed['enstag'] = enstags[name] 725 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 726 nr = len(onames) 727 ed['nr'] = nr 728 ed[''] = [] 729 730 for r in range(nr): 731 ad = {} 732 repname = onames[r] 733 ad['id'] = repname.replace('|', '') 734 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 735 Nconf = len(idx) 736 layout = '%d i f%d' % (Nconf, len(obsl)) 737 ad['layout'] = layout 738 data = '' 739 counters = [0 for o in obsl] 740 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] 741 for ci in idx: 742 data += '%d ' % ci 743 for oi in range(len(obsl)): 744 o = obsl[oi] 745 if repname in o.idl: 746 if counters[oi] < 0: 747 num = offsets[oi] 748 if num == 0: 749 data += '0 ' 750 else: 751 data += '%1.16e ' % (num) 752 continue 753 if o.idl[repname][counters[oi]] == ci: 754 num = o.deltas[repname][counters[oi]] + offsets[oi] 755 if num == 0: 756 data += '0 ' 757 else: 758 data += '%1.16e ' % (num) 759 counters[oi] += 1 760 if counters[oi] >= len(o.idl[repname]): 761 counters[oi] = -1 762 else: 763 num = offsets[oi] 764 if num == 0: 765 data += '0 ' 766 else: 767 data += '%1.16e ' % (num) 768 else: 769 data += '0 ' 770 data += '\n' 771 ad['#data'] = data 772 ed[''].append(ad) 773 pd['edata'].append(ed) 774 775 allcov = {} 776 for o in obsl: 777 for cname in o.cov_names: 778 if cname in allcov: 779 if not np.array_equal(allcov[cname], o.covobs[cname].cov): 780 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) 781 else: 782 allcov[cname] = o.covobs[cname].cov 783 pd['cdata'] = [] 784 for cname in cov_names: 785 cd = {} 786 cd['id'] = cname 787 788 covd = {'id': 'cov'} 789 if allcov[cname].shape == (): 790 ncov = 1 791 covd['layout'] = '1 1 f' 792 covd['#data'] = '%1.14e' % (allcov[cname]) 793 else: 794 shape = allcov[cname].shape 795 assert (shape[0] == shape[1]) 796 ncov = shape[0] 797 covd['layout'] = '%d %d f' % (ncov, ncov) 798 ds = '' 799 for i in range(ncov): 800 for j in range(ncov): 801 val = allcov[cname][i][j] 802 if val == 0: 803 ds += '0 ' 804 else: 805 ds += '%1.14e ' % (val) 806 ds += '\n' 807 covd['#data'] = ds 808 809 gradd = {'id': 'grad'} 810 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 811 ds = '' 812 for i in range(ncov): 813 for o in obsl: 814 if cname in o.covobs: 815 val = o.covobs[cname].grad[i] 816 if val != 0: 817 ds += '%1.14e ' % (val) 818 else: 819 ds += '0 ' 820 else: 821 ds += '0 ' 822 gradd['#data'] = ds 823 cd['array'] = [covd, gradd] 824 pd['cdata'].append(cd) 825 826 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 827 828 return rs 829 830 831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): 832 """Export a list of Obs or structures containing Obs to a .xml.gz file 833 according to the Zeuthen dobs format. 834 835 Tags are not written or recovered automatically. The separator | is removed from the replica names. 836 837 Parameters 838 ---------- 839 obsl : list 840 List of Obs that will be exported. 841 The Obs inside a structure do not have to be defined on the same set of configurations, 842 but the storage requirement is increased, if this is not the case. 843 fname : str 844 Filename of the output file. 845 name : str 846 The name of the observable. 847 spec : str 848 Optional string that describes the contents of the file. 849 origin : str 850 Specify where the data has its origin. 851 symbol : list 852 A list of symbols that describe the observables to be written. May be empty. 853 who : str 854 Provide the name of the person that exports the data. 855 enstags : dict 856 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 857 Otherwise, the ensemble name is used. 858 gz : bool 859 If True, the output is a gzipped XML. If False, the output is a XML file. 860 """ 861 if enstags is None: 862 enstags = {} 863 864 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 865 866 if not fname.endswith('.xml') and not fname.endswith('.gz'): 867 fname += '.xml' 868 869 if gz: 870 if not fname.endswith('.gz'): 871 fname += '.gz' 872 873 fp = gzip.open(fname, 'wb') 874 fp.write(dobsstring.encode('utf-8')) 875 else: 876 fp = open(fname, 'w', encoding='utf-8') 877 fp.write(dobsstring) 878 fp.close()
89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 90 """Export a list of Obs or structures containing Obs to an xml string 91 according to the Zeuthen pobs format. 92 93 Tags are not written or recovered automatically. The separator | is removed from the replica names. 94 95 Parameters 96 ---------- 97 obsl : list 98 List of Obs that will be exported. 99 The Obs inside a structure have to be defined on the same ensemble. 100 name : str 101 The name of the observable. 102 spec : str 103 Optional string that describes the contents of the file. 104 origin : str 105 Specify where the data has its origin. 106 symbol : list 107 A list of symbols that describe the observables to be written. May be empty. 108 enstag : str 109 Enstag that is written to pobs. If None, the ensemble name is used. 110 """ 111 112 od = {} 113 ename = obsl[0].e_names[0] 114 names = list(obsl[0].deltas.keys()) 115 nr = len(names) 116 onames = [name.replace('|', '') for name in names] 117 for o in obsl: 118 if len(o.e_names) != 1: 119 raise Exception('You try to export dobs to obs!') 120 if o.e_names[0] != ename: 121 raise Exception('You try to export dobs to obs!') 122 if len(o.deltas.keys()) != nr: 123 raise Exception('Incompatible obses in list') 124 od['observables'] = {} 125 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 126 od['observables']['origin'] = { 127 'who': getpass.getuser(), 128 'date': str(datetime.datetime.now())[:-7], 129 'host': socket.gethostname(), 130 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 131 od['observables']['pobs'] = {} 132 pd = od['observables']['pobs'] 133 pd['spec'] = spec 134 pd['origin'] = origin 135 pd['name'] = name 136 if enstag: 137 if not isinstance(enstag, str): 138 raise Exception('enstag has to be a string!') 139 pd['enstag'] = enstag 140 else: 141 pd['enstag'] = ename 142 pd['nr'] = '%d' % (nr) 143 pd['array'] = [] 144 osymbol = 'cfg' 145 if not isinstance(symbol, list): 146 raise Exception('Symbol has to be a list!') 147 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 148 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 149 for s in symbol: 150 osymbol += ' %s' % s 151 for r in range(nr): 152 ad = {} 153 ad['id'] = onames[r] 154 Nconf = len(obsl[0].deltas[names[r]]) 155 layout = '%d i f%d' % (Nconf, len(obsl)) 156 ad['layout'] = layout 157 ad['symbol'] = osymbol 158 data = '' 159 for c in range(Nconf): 160 data += '%d ' % obsl[0].idl[names[r]][c] 161 for o in obsl: 162 num = o.deltas[names[r]][c] + o.r_values[names[r]] 163 if num == 0: 164 data += '0 ' 165 else: 166 data += '%1.16e ' % (num) 167 data += '\n' 168 ad['#data'] = data 169 pd['array'].append(ad) 170 171 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 172 return rs
Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
175def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 176 """Export a list of Obs or structures containing Obs to a .xml.gz file 177 according to the Zeuthen pobs format. 178 179 Tags are not written or recovered automatically. The separator | is removed from the replica names. 180 181 Parameters 182 ---------- 183 obsl : list 184 List of Obs that will be exported. 185 The Obs inside a structure have to be defined on the same ensemble. 186 fname : str 187 Filename of the output file. 188 name : str 189 The name of the observable. 190 spec : str 191 Optional string that describes the contents of the file. 192 origin : str 193 Specify where the data has its origin. 194 symbol : list 195 A list of symbols that describe the observables to be written. May be empty. 196 enstag : str 197 Enstag that is written to pobs. If None, the ensemble name is used. 198 gz : bool 199 If True, the output is a gzipped xml. If False, the output is an xml file. 200 """ 201 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 202 203 if not fname.endswith('.xml') and not fname.endswith('.gz'): 204 fname += '.xml' 205 206 if gz: 207 if not fname.endswith('.gz'): 208 fname += '.gz' 209 210 fp = gzip.open(fname, 'wb') 211 fp.write(pobsstring.encode('utf-8')) 212 else: 213 fp = open(fname, 'w', encoding='utf-8') 214 fp.write(pobsstring) 215 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
- gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
292def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 293 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 294 295 Tags are not written or recovered automatically. 296 297 Parameters 298 ---------- 299 fname : str 300 Filename of the input file. 301 full_output : bool 302 If True, a dict containing auxiliary information and the data is returned. 303 If False, only the data is returned as list. 304 separatior_insertion: str or int 305 str: replace all occurences of "separator_insertion" within the replica names 306 by "|%s" % (separator_insertion) when constructing the names of the replica. 307 int: Insert the separator "|" at the position given by separator_insertion. 308 None (default): Replica names remain unchanged. 309 """ 310 311 if not fname.endswith('.xml') and not fname.endswith('.gz'): 312 fname += '.xml' 313 if gz: 314 if not fname.endswith('.gz'): 315 fname += '.gz' 316 with gzip.open(fname, 'r') as fin: 317 content = fin.read() 318 else: 319 if fname.endswith('.gz'): 320 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 321 with open(fname, 'r') as fin: 322 content = fin.read() 323 324 # parse xml file content 325 root = et.fromstring(content) 326 327 _check(root[2].tag == 'pobs') 328 pobs = root[2] 329 330 version = root[0][1].text.strip() 331 332 _check(root[1].tag == 'origin') 333 file_origin = _etree_to_dict(root[1])['origin'] 334 335 deltas = [] 336 names = [] 337 idl = [] 338 for i in range(5, len(pobs)): 339 delta, name, idx = _import_rdata(pobs[i]) 340 deltas.append(delta) 341 if separator_insertion is None: 342 pass 343 elif isinstance(separator_insertion, int): 344 name = name[:separator_insertion] + '|' + name[separator_insertion:] 345 elif isinstance(separator_insertion, str): 346 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 347 else: 348 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 349 names.append(name) 350 idl.append(idx) 351 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 352 353 descriptiond = {} 354 for i in range(4): 355 descriptiond[pobs[i].tag] = pobs[i].text.strip() 356 357 _check(pobs[4].tag == "nr") 358 359 _check(pobs[5].tag == 'array') 360 if pobs[5][1].tag == 'symbol': 361 symbol = pobs[5][1].text.strip() 362 descriptiond['symbol'] = symbol 363 364 if full_output: 365 retd = {} 366 tool = file_origin.get('tool', None) 367 if tool: 368 program = tool['name'] + ' ' + tool['version'] 369 else: 370 program = '' 371 retd['program'] = program 372 retd['version'] = version 373 retd['who'] = file_origin['who'] 374 retd['date'] = file_origin['date'] 375 retd['host'] = file_origin['host'] 376 retd['description'] = descriptiond 377 retd['obsdata'] = res 378 return retd 379 else: 380 return res
Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
384def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 385 """Import a list of Obs from a string in the Zeuthen dobs format. 386 387 Tags are not written or recovered automatically. 388 389 Parameters 390 ---------- 391 content : str 392 XML string containing the data 393 noemtpy : bool 394 If True, ensembles with no contribution to the Obs are not included. 395 If False, ensembles are included as written in the file, possibly with vanishing entries. 396 full_output : bool 397 If True, a dict containing auxiliary information and the data is returned. 398 If False, only the data is returned as list. 399 separatior_insertion: str, int or bool 400 str: replace all occurences of "separator_insertion" within the replica names 401 by "|%s" % (separator_insertion) when constructing the names of the replica. 402 int: Insert the separator "|" at the position given by separator_insertion. 403 True (default): separator "|" is inserted after len(ensname), assuming that the 404 ensemble name is a prefix to the replica name. 405 None or False: No separator is inserted. 406 """ 407 408 root = et.fromstring(content) 409 410 _check(root.tag == 'OBSERVABLES') 411 _check(root[0].tag == 'SCHEMA') 412 version = root[0][1].text.strip() 413 414 _check(root[1].tag == 'origin') 415 file_origin = _etree_to_dict(root[1])['origin'] 416 417 _check(root[2].tag == 'dobs') 418 419 dobs = root[2] 420 421 descriptiond = {} 422 for i in range(3): 423 descriptiond[dobs[i].tag] = dobs[i].text.strip() 424 425 _check(dobs[3].tag == 'array') 426 427 symbol = [] 428 if dobs[3][1].tag == 'symbol': 429 symbol = dobs[3][1].text.strip() 430 descriptiond['symbol'] = symbol 431 mean = _import_array(dobs[3])[0] 432 433 _check(dobs[4].tag == "ne") 434 ne = int(dobs[4].text.strip()) 435 _check(dobs[5].tag == "nc") 436 nc = int(dobs[5].text.strip()) 437 438 idld = {} 439 deltad = {} 440 covd = {} 441 gradd = {} 442 names = [] 443 e_names = [] 444 enstags = {} 445 for k in range(6, len(list(dobs))): 446 if dobs[k].tag == "edata": 447 _check(dobs[k][0].tag == "enstag") 448 ename = dobs[k][0].text.strip() 449 e_names.append(ename) 450 _check(dobs[k][1].tag == "nr") 451 R = int(dobs[k][1].text.strip()) 452 for i in range(2, 2 + R): 453 deltas, rname, idx = _import_rdata(dobs[k][i]) 454 if separator_insertion is None or False: 455 pass 456 elif separator_insertion is True: 457 if rname.startswith(ename): 458 rname = rname[:len(ename)] + '|' + rname[len(ename):] 459 elif isinstance(separator_insertion, int): 460 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 461 elif isinstance(separator_insertion, str): 462 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 463 else: 464 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 465 if '|' in rname: 466 new_ename = rname[:rname.index('|')] 467 else: 468 new_ename = ename 469 enstags[new_ename] = ename 470 idld[rname] = idx 471 deltad[rname] = deltas 472 names.append(rname) 473 elif dobs[k].tag == "cdata": 474 cname, cov, grad = _import_cdata(dobs[k]) 475 covd[cname] = cov 476 if grad.shape[1] == 1: 477 gradd[cname] = [grad for i in range(len(mean))] 478 else: 479 gradd[cname] = grad.T 480 else: 481 _check(False) 482 names = list(set(names)) 483 484 for name in names: 485 for i in range(len(deltad[name])): 486 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 487 488 res = [] 489 for i in range(len(mean)): 490 deltas = [] 491 idl = [] 492 obs_names = [] 493 for name in names: 494 h = np.unique(deltad[name][i]) 495 if len(h) == 1 and np.all(h == mean[i]) and noempty: 496 continue 497 deltas.append(deltad[name][i]) 498 obs_names.append(name) 499 idl.append(idld[name]) 500 res.append(Obs(deltas, obs_names, idl=idl)) 501 res[-1]._value = mean[i] 502 _check(len(e_names) == ne) 503 504 cnames = list(covd.keys()) 505 for i in range(len(res)): 506 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 507 if noempty: 508 for name in cnames: 509 if np.all(new_covobs[name].grad == 0): 510 del new_covobs[name] 511 cnames_loc = list(new_covobs.keys()) 512 else: 513 cnames_loc = cnames 514 for name in cnames_loc: 515 res[i].names.append(name) 516 res[i].shape[name] = 1 517 res[i].idl[name] = [] 518 res[i]._covobs = new_covobs 519 520 if symbol: 521 for i in range(len(res)): 522 res[i].tag = symbol[i] 523 if res[i].tag == 'None': 524 res[i].tag = None 525 if not noempty: 526 _check(len(res[0].covobs.keys()) == nc) 527 if full_output: 528 retd = {} 529 tool = file_origin.get('tool', None) 530 if tool: 531 program = tool['name'] + ' ' + tool['version'] 532 else: 533 program = '' 534 retd['program'] = program 535 retd['version'] = version 536 retd['who'] = file_origin['who'] 537 retd['date'] = file_origin['date'] 538 retd['host'] = file_origin['host'] 539 retd['description'] = descriptiond 540 retd['enstags'] = enstags 541 retd['obsdata'] = res 542 return retd 543 else: 544 return res
Import a list of Obs from a string in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- content (str): XML string containing the data
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
547def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 548 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 549 550 Tags are not written or recovered automatically. 551 552 Parameters 553 ---------- 554 fname : str 555 Filename of the input file. 556 noemtpy : bool 557 If True, ensembles with no contribution to the Obs are not included. 558 If False, ensembles are included as written in the file. 559 full_output : bool 560 If True, a dict containing auxiliary information and the data is returned. 561 If False, only the data is returned as list. 562 gz : bool 563 If True, assumes that data is gzipped. If False, assumes XML file. 564 separatior_insertion: str, int or bool 565 str: replace all occurences of "separator_insertion" within the replica names 566 by "|%s" % (separator_insertion) when constructing the names of the replica. 567 int: Insert the separator "|" at the position given by separator_insertion. 568 True (default): separator "|" is inserted after len(ensname), assuming that the 569 ensemble name is a prefix to the replica name. 570 None or False: No separator is inserted. 571 """ 572 573 if not fname.endswith('.xml') and not fname.endswith('.gz'): 574 fname += '.xml' 575 if gz: 576 if not fname.endswith('.gz'): 577 fname += '.gz' 578 with gzip.open(fname, 'r') as fin: 579 content = fin.read() 580 else: 581 if fname.endswith('.gz'): 582 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 583 with open(fname, 'r') as fin: 584 content = fin.read() 585 586 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
648def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 649 """Generate the string for the export of a list of Obs or structures containing Obs 650 to a .xml.gz file according to the Zeuthen dobs format. 651 652 Tags are not written or recovered automatically. The separator |is removed from the replica names. 653 654 Parameters 655 ---------- 656 obsl : list 657 List of Obs that will be exported. 658 The Obs inside a structure do not have to be defined on the same set of configurations, 659 but the storage requirement is increased, if this is not the case. 660 name : str 661 The name of the observable. 662 spec : str 663 Optional string that describes the contents of the file. 664 origin : str 665 Specify where the data has its origin. 666 symbol : list 667 A list of symbols that describe the observables to be written. May be empty. 668 who : str 669 Provide the name of the person that exports the data. 670 enstags : dict 671 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 672 Otherwise, the ensemble name is used. 673 """ 674 if enstags is None: 675 enstags = {} 676 od = {} 677 r_names = [] 678 for o in obsl: 679 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 680 r_names = sorted(set(r_names)) 681 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 682 for tmpname in mc_names: 683 if tmpname not in enstags: 684 enstags[tmpname] = tmpname 685 ne = len(set(mc_names)) 686 cov_names = [] 687 for o in obsl: 688 cov_names += list(o.cov_names) 689 cov_names = sorted(set(cov_names)) 690 nc = len(set(cov_names)) 691 od['OBSERVABLES'] = {} 692 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 693 if who is None: 694 who = getpass.getuser() 695 od['OBSERVABLES']['origin'] = { 696 'who': who, 697 'date': str(datetime.datetime.now())[:-7], 698 'host': socket.gethostname(), 699 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 700 od['OBSERVABLES']['dobs'] = {} 701 pd = od['OBSERVABLES']['dobs'] 702 pd['spec'] = spec 703 pd['origin'] = origin 704 pd['name'] = name 705 pd['array'] = {} 706 pd['array']['id'] = 'val' 707 pd['array']['layout'] = '1 f%d' % (len(obsl)) 708 osymbol = '' 709 if symbol: 710 if not isinstance(symbol, list): 711 raise Exception('Symbol has to be a list!') 712 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 713 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 714 osymbol = symbol[0] 715 for s in symbol[1:]: 716 osymbol += ' %s' % s 717 pd['array']['symbol'] = osymbol 718 719 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 720 pd['ne'] = '%d' % (ne) 721 pd['nc'] = '%d' % (nc) 722 pd['edata'] = [] 723 for name in mc_names: 724 ed = {} 725 ed['enstag'] = enstags[name] 726 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 727 nr = len(onames) 728 ed['nr'] = nr 729 ed[''] = [] 730 731 for r in range(nr): 732 ad = {} 733 repname = onames[r] 734 ad['id'] = repname.replace('|', '') 735 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 736 Nconf = len(idx) 737 layout = '%d i f%d' % (Nconf, len(obsl)) 738 ad['layout'] = layout 739 data = '' 740 counters = [0 for o in obsl] 741 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] 742 for ci in idx: 743 data += '%d ' % ci 744 for oi in range(len(obsl)): 745 o = obsl[oi] 746 if repname in o.idl: 747 if counters[oi] < 0: 748 num = offsets[oi] 749 if num == 0: 750 data += '0 ' 751 else: 752 data += '%1.16e ' % (num) 753 continue 754 if o.idl[repname][counters[oi]] == ci: 755 num = o.deltas[repname][counters[oi]] + offsets[oi] 756 if num == 0: 757 data += '0 ' 758 else: 759 data += '%1.16e ' % (num) 760 counters[oi] += 1 761 if counters[oi] >= len(o.idl[repname]): 762 counters[oi] = -1 763 else: 764 num = offsets[oi] 765 if num == 0: 766 data += '0 ' 767 else: 768 data += '%1.16e ' % (num) 769 else: 770 data += '0 ' 771 data += '\n' 772 ad['#data'] = data 773 ed[''].append(ad) 774 pd['edata'].append(ed) 775 776 allcov = {} 777 for o in obsl: 778 for cname in o.cov_names: 779 if cname in allcov: 780 if not np.array_equal(allcov[cname], o.covobs[cname].cov): 781 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) 782 else: 783 allcov[cname] = o.covobs[cname].cov 784 pd['cdata'] = [] 785 for cname in cov_names: 786 cd = {} 787 cd['id'] = cname 788 789 covd = {'id': 'cov'} 790 if allcov[cname].shape == (): 791 ncov = 1 792 covd['layout'] = '1 1 f' 793 covd['#data'] = '%1.14e' % (allcov[cname]) 794 else: 795 shape = allcov[cname].shape 796 assert (shape[0] == shape[1]) 797 ncov = shape[0] 798 covd['layout'] = '%d %d f' % (ncov, ncov) 799 ds = '' 800 for i in range(ncov): 801 for j in range(ncov): 802 val = allcov[cname][i][j] 803 if val == 0: 804 ds += '0 ' 805 else: 806 ds += '%1.14e ' % (val) 807 ds += '\n' 808 covd['#data'] = ds 809 810 gradd = {'id': 'grad'} 811 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 812 ds = '' 813 for i in range(ncov): 814 for o in obsl: 815 if cname in o.covobs: 816 val = o.covobs[cname].grad[i] 817 if val != 0: 818 ds += '%1.14e ' % (val) 819 else: 820 ds += '0 ' 821 else: 822 ds += '0 ' 823 gradd['#data'] = ds 824 cd['array'] = [covd, gradd] 825 pd['cdata'].append(cd) 826 827 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 828 829 return rs
Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator |is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
832def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): 833 """Export a list of Obs or structures containing Obs to a .xml.gz file 834 according to the Zeuthen dobs format. 835 836 Tags are not written or recovered automatically. The separator | is removed from the replica names. 837 838 Parameters 839 ---------- 840 obsl : list 841 List of Obs that will be exported. 842 The Obs inside a structure do not have to be defined on the same set of configurations, 843 but the storage requirement is increased, if this is not the case. 844 fname : str 845 Filename of the output file. 846 name : str 847 The name of the observable. 848 spec : str 849 Optional string that describes the contents of the file. 850 origin : str 851 Specify where the data has its origin. 852 symbol : list 853 A list of symbols that describe the observables to be written. May be empty. 854 who : str 855 Provide the name of the person that exports the data. 856 enstags : dict 857 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 858 Otherwise, the ensemble name is used. 859 gz : bool 860 If True, the output is a gzipped XML. If False, the output is a XML file. 861 """ 862 if enstags is None: 863 enstags = {} 864 865 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 866 867 if not fname.endswith('.xml') and not fname.endswith('.gz'): 868 fname += '.xml' 869 870 if gz: 871 if not fname.endswith('.gz'): 872 fname += '.gz' 873 874 fp = gzip.open(fname, 'wb') 875 fp.write(dobsstring.encode('utf-8')) 876 else: 877 fp = open(fname, 'w', encoding='utf-8') 878 fp.write(dobsstring) 879 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
- gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.