pyerrors.input.dobs
View Source
0from collections import defaultdict 1import gzip 2import lxml.etree as et 3import getpass 4import socket 5import datetime 6import json 7import warnings 8import numpy as np 9from ..obs import Obs 10from ..obs import _merge_idx 11from ..covobs import Covobs 12from .. import version as pyerrorsversion 13 14 15# Based on https://stackoverflow.com/a/10076823 16def _etree_to_dict(t): 17 """ Convert the content of an XML file to a python dict""" 18 d = {t.tag: {} if t.attrib else None} 19 children = list(t) 20 if children: 21 dd = defaultdict(list) 22 for dc in map(_etree_to_dict, children): 23 for k, v in dc.items(): 24 dd[k].append(v) 25 d = {t.tag: {k: v[0] if len(v) == 1 else v 26 for k, v in dd.items()}} 27 if t.attrib: 28 d[t.tag].update(('@' + k, v) 29 for k, v in t.attrib.items()) 30 if t.text: 31 text = t.text.strip() 32 if children or t.attrib: 33 if text: 34 d[t.tag]['#data'] = [text] 35 else: 36 d[t.tag] = text 37 return d 38 39 40def _dict_to_xmlstring(d): 41 if isinstance(d, dict): 42 iters = '' 43 for k in d: 44 if k.startswith('#'): 45 for la in d[k]: 46 iters += la 47 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 48 return iters 49 if isinstance(d[k], dict): 50 iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 51 elif isinstance(d[k], str): 52 if len(d[k]) > 100: 53 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 54 else: 55 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 56 elif isinstance(d[k], list): 57 for i in range(len(d[k])): 58 iters += _dict_to_xmlstring(d[k][i]) 59 elif not d[k]: 60 return '\n' 61 else: 62 raise Exception('Type', type(d[k]), 'not supported in export!') 63 else: 64 raise Exception('Type', type(d), 'not supported in export!') 65 return iters 66 67 68def _dict_to_xmlstring_spaces(d, space=' '): 69 s = _dict_to_xmlstring(d) 70 o = '' 71 c = 0 72 cm = False 73 for li in s.split('\n'): 74 if li.startswith('<%s' % ('/')): 75 c -= 1 76 cm = True 77 for i in range(c): 78 o += space 79 o += li + '\n' 80 if li.startswith('<') and not cm: 81 if not '<%s' % ('/') in li: 82 c += 1 83 cm = False 84 return o 85 86 87def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 88 """Export a list of Obs or structures containing Obs to an xml string 89 according to the Zeuthen pobs format. 90 91 Tags are not written or recovered automatically. The separator | is removed from the replica names. 92 93 Parameters 94 ---------- 95 obsl : list 96 List of Obs that will be exported. 97 The Obs inside a structure have to be defined on the same ensemble. 98 name : str 99 The name of the observable. 100 spec : str 101 Optional string that describes the contents of the file. 102 origin : str 103 Specify where the data has its origin. 104 symbol : list 105 A list of symbols that describe the observables to be written. May be empty. 106 enstag : str 107 Enstag that is written to pobs. If None, the ensemble name is used. 108 """ 109 110 od = {} 111 ename = obsl[0].e_names[0] 112 names = list(obsl[0].deltas.keys()) 113 nr = len(names) 114 onames = [name.replace('|', '') for name in names] 115 for o in obsl: 116 if len(o.e_names) != 1: 117 raise Exception('You try to export dobs to obs!') 118 if o.e_names[0] != ename: 119 raise Exception('You try to export dobs to obs!') 120 if len(o.deltas.keys()) != nr: 121 raise Exception('Incompatible obses in list') 122 od['observables'] = {} 123 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 124 od['observables']['origin'] = { 125 'who': getpass.getuser(), 126 'date': str(datetime.datetime.now())[:-7], 127 'host': socket.gethostname(), 128 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 129 od['observables']['pobs'] = {} 130 pd = od['observables']['pobs'] 131 pd['spec'] = spec 132 pd['origin'] = origin 133 pd['name'] = name 134 if enstag: 135 if not isinstance(enstag, str): 136 raise Exception('enstag has to be a string!') 137 pd['enstag'] = enstag 138 else: 139 pd['enstag'] = ename 140 pd['nr'] = '%d' % (nr) 141 pd['array'] = [] 142 osymbol = 'cfg' 143 if not isinstance(symbol, list): 144 raise Exception('Symbol has to be a list!') 145 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 146 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 147 for s in symbol: 148 osymbol += ' %s' % s 149 for r in range(nr): 150 ad = {} 151 ad['id'] = onames[r] 152 Nconf = len(obsl[0].deltas[names[r]]) 153 layout = '%d i f%d' % (Nconf, len(obsl)) 154 ad['layout'] = layout 155 ad['symbol'] = osymbol 156 data = '' 157 for c in range(Nconf): 158 data += '%d ' % obsl[0].idl[names[r]][c] 159 for o in obsl: 160 num = o.deltas[names[r]][c] + o.r_values[names[r]] 161 if num == 0: 162 data += '0 ' 163 else: 164 data += '%1.16e ' % (num) 165 data += '\n' 166 ad['#data'] = data 167 pd['array'].append(ad) 168 169 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 170 return rs 171 172 173def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 174 """Export a list of Obs or structures containing Obs to a .xml.gz file 175 according to the Zeuthen pobs format. 176 177 Tags are not written or recovered automatically. The separator | is removed from the replica names. 178 179 Parameters 180 ---------- 181 obsl : list 182 List of Obs that will be exported. 183 The Obs inside a structure have to be defined on the same ensemble. 184 fname : str 185 Filename of the output file. 186 name : str 187 The name of the observable. 188 spec : str 189 Optional string that describes the contents of the file. 190 origin : str 191 Specify where the data has its origin. 192 symbol : list 193 A list of symbols that describe the observables to be written. May be empty. 194 enstag : str 195 Enstag that is written to pobs. If None, the ensemble name is used. 196 gz : bool 197 If True, the output is a gzipped xml. If False, the output is an xml file. 198 """ 199 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 200 201 if not fname.endswith('.xml') and not fname.endswith('.gz'): 202 fname += '.xml' 203 204 if gz: 205 if not fname.endswith('.gz'): 206 fname += '.gz' 207 208 fp = gzip.open(fname, 'wb') 209 fp.write(pobsstring.encode('utf-8')) 210 else: 211 fp = open(fname, 'w', encoding='utf-8') 212 fp.write(pobsstring) 213 fp.close() 214 215 216def _import_data(string): 217 return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]") 218 219 220def _check(condition): 221 if not condition: 222 raise Exception("XML file format not supported") 223 224 225class _NoTagInDataError(Exception): 226 """Raised when tag is not in data""" 227 def __init__(self, tag): 228 self.tag = tag 229 super().__init__('Tag %s not in data!' % (self.tag)) 230 231 232def _find_tag(dat, tag): 233 for i in range(len(dat)): 234 if dat[i].tag == tag: 235 return i 236 raise _NoTagInDataError(tag) 237 238 239def _import_array(arr): 240 name = arr[_find_tag(arr, 'id')].text.strip() 241 index = _find_tag(arr, 'layout') 242 try: 243 sindex = _find_tag(arr, 'symbol') 244 except _NoTagInDataError: 245 sindex = 0 246 if sindex > index: 247 tmp = _import_data(arr[sindex].tail) 248 else: 249 tmp = _import_data(arr[index].tail) 250 251 li = arr[index].text.strip() 252 m = li.split() 253 if m[1] == "i" and m[2][0] == "f": 254 nc = int(m[0]) 255 na = int(m[2].lstrip('f')) 256 _dat = [] 257 mask = [] 258 for a in range(na): 259 mask += [a] 260 _dat += [np.array(tmp[1 + a:: na + 1])] 261 _check(len(tmp[0:: na + 1]) == nc) 262 return [name, tmp[0:: na + 1], mask, _dat] 263 elif m[1][0] == 'f' and len(m) < 3: 264 sh = (int(m[0]), int(m[1].lstrip('f'))) 265 return np.reshape(tmp, sh) 266 elif any(['f' in s for s in m]): 267 for si in range(len(m)): 268 if m[si] == 'f': 269 break 270 sh = [int(m[i]) for i in range(si)] 271 return np.reshape(tmp, sh) 272 else: 273 print(name, m) 274 _check(False) 275 276 277def _import_rdata(rd): 278 name, idx, mask, deltas = _import_array(rd) 279 return deltas, name, idx 280 281 282def _import_cdata(cd): 283 _check(cd[0].tag == "id") 284 _check(cd[1][0].text.strip() == "cov") 285 cov = _import_array(cd[1]) 286 grad = _import_array(cd[2]) 287 return cd[0].text.strip(), cov, grad 288 289 290def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 291 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 292 293 Tags are not written or recovered automatically. 294 295 Parameters 296 ---------- 297 fname : str 298 Filename of the input file. 299 full_output : bool 300 If True, a dict containing auxiliary information and the data is returned. 301 If False, only the data is returned as list. 302 separatior_insertion: str or int 303 str: replace all occurences of "separator_insertion" within the replica names 304 by "|%s" % (separator_insertion) when constructing the names of the replica. 305 int: Insert the separator "|" at the position given by separator_insertion. 306 None (default): Replica names remain unchanged. 307 """ 308 309 if not fname.endswith('.xml') and not fname.endswith('.gz'): 310 fname += '.xml' 311 if gz: 312 if not fname.endswith('.gz'): 313 fname += '.gz' 314 with gzip.open(fname, 'r') as fin: 315 content = fin.read() 316 else: 317 if fname.endswith('.gz'): 318 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 319 with open(fname, 'r') as fin: 320 content = fin.read() 321 322 # parse xml file content 323 root = et.fromstring(content) 324 325 _check(root[2].tag == 'pobs') 326 pobs = root[2] 327 328 version = root[0][1].text.strip() 329 330 _check(root[1].tag == 'origin') 331 file_origin = _etree_to_dict(root[1])['origin'] 332 333 deltas = [] 334 names = [] 335 idl = [] 336 for i in range(5, len(pobs)): 337 delta, name, idx = _import_rdata(pobs[i]) 338 deltas.append(delta) 339 if separator_insertion is None: 340 pass 341 elif isinstance(separator_insertion, int): 342 name = name[:separator_insertion] + '|' + name[separator_insertion:] 343 elif isinstance(separator_insertion, str): 344 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 345 else: 346 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 347 names.append(name) 348 idl.append(idx) 349 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 350 351 descriptiond = {} 352 for i in range(4): 353 descriptiond[pobs[i].tag] = pobs[i].text.strip() 354 355 _check(pobs[4].tag == "nr") 356 357 _check(pobs[5].tag == 'array') 358 if pobs[5][1].tag == 'symbol': 359 symbol = pobs[5][1].text.strip() 360 descriptiond['symbol'] = symbol 361 362 if full_output: 363 retd = {} 364 tool = file_origin.get('tool', None) 365 if tool: 366 program = tool['name'] + ' ' + tool['version'] 367 else: 368 program = '' 369 retd['program'] = program 370 retd['version'] = version 371 retd['who'] = file_origin['who'] 372 retd['date'] = file_origin['date'] 373 retd['host'] = file_origin['host'] 374 retd['description'] = descriptiond 375 retd['obsdata'] = res 376 return retd 377 else: 378 return res 379 380 381# Reading (and writing) dobs is not yet working properly: 382# we have to loop over root[2:] because each entry is a dobs 383# But maybe this is just a problem with Ben's implementation 384 385# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py 386def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 387 """Import a list of Obs from a string in the Zeuthen dobs format. 388 389 Tags are not written or recovered automatically. 390 391 Parameters 392 ---------- 393 content : str 394 XML string containing the data 395 noemtpy : bool 396 If True, ensembles with no contribution to the Obs are not included. 397 If False, ensembles are included as written in the file, possibly with vanishing entries. 398 full_output : bool 399 If True, a dict containing auxiliary information and the data is returned. 400 If False, only the data is returned as list. 401 separatior_insertion: str, int or bool 402 str: replace all occurences of "separator_insertion" within the replica names 403 by "|%s" % (separator_insertion) when constructing the names of the replica. 404 int: Insert the separator "|" at the position given by separator_insertion. 405 True (default): separator "|" is inserted after len(ensname), assuming that the 406 ensemble name is a prefix to the replica name. 407 None or False: No separator is inserted. 408 """ 409 410 root = et.fromstring(content) 411 412 _check(root.tag == 'OBSERVABLES') 413 _check(root[0].tag == 'SCHEMA') 414 version = root[0][1].text.strip() 415 416 _check(root[1].tag == 'origin') 417 file_origin = _etree_to_dict(root[1])['origin'] 418 419 _check(root[2].tag == 'dobs') 420 421 dobs = root[2] 422 423 descriptiond = {} 424 for i in range(3): 425 descriptiond[dobs[i].tag] = dobs[i].text.strip() 426 427 _check(dobs[3].tag == 'array') 428 429 symbol = [] 430 if dobs[3][1].tag == 'symbol': 431 symbol = dobs[3][1].text.strip() 432 descriptiond['symbol'] = symbol 433 mean = _import_array(dobs[3])[0] 434 435 _check(dobs[4].tag == "ne") 436 ne = int(dobs[4].text.strip()) 437 _check(dobs[5].tag == "nc") 438 nc = int(dobs[5].text.strip()) 439 440 idld = {} 441 deltad = {} 442 covd = {} 443 gradd = {} 444 names = [] 445 e_names = [] 446 enstags = {} 447 for k in range(6, len(list(dobs))): 448 if dobs[k].tag == "edata": 449 _check(dobs[k][0].tag == "enstag") 450 ename = dobs[k][0].text.strip() 451 e_names.append(ename) 452 _check(dobs[k][1].tag == "nr") 453 R = int(dobs[k][1].text.strip()) 454 for i in range(2, 2 + R): 455 deltas, rname, idx = _import_rdata(dobs[k][i]) 456 if separator_insertion is None or False: 457 pass 458 elif separator_insertion is True: 459 if rname.startswith(ename): 460 rname = rname[:len(ename)] + '|' + rname[len(ename):] 461 elif isinstance(separator_insertion, int): 462 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 463 elif isinstance(separator_insertion, str): 464 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 465 else: 466 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 467 if '|' in rname: 468 new_ename = rname[:rname.index('|')] 469 else: 470 new_ename = ename 471 enstags[new_ename] = ename 472 idld[rname] = idx 473 deltad[rname] = deltas 474 names.append(rname) 475 elif dobs[k].tag == "cdata": 476 cname, cov, grad = _import_cdata(dobs[k]) 477 covd[cname] = cov 478 if grad.shape[1] == 1: 479 gradd[cname] = [grad for i in range(len(mean))] 480 else: 481 gradd[cname] = grad.T 482 else: 483 _check(False) 484 names = list(set(names)) 485 486 for name in names: 487 for i in range(len(deltad[name])): 488 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 489 490 res = [] 491 for i in range(len(mean)): 492 deltas = [] 493 idl = [] 494 obs_names = [] 495 for name in names: 496 h = np.unique(deltad[name][i]) 497 if len(h) == 1 and np.all(h == mean[i]) and noempty: 498 continue 499 deltas.append(deltad[name][i]) 500 obs_names.append(name) 501 idl.append(idld[name]) 502 res.append(Obs(deltas, obs_names, idl=idl)) 503 print(mean, 'vs', res) 504 _check(len(e_names) == ne) 505 506 cnames = list(covd.keys()) 507 for i in range(len(res)): 508 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 509 if noempty: 510 for name in cnames: 511 if np.all(new_covobs[name].grad == 0): 512 del new_covobs[name] 513 cnames_loc = list(new_covobs.keys()) 514 else: 515 cnames_loc = cnames 516 for name in cnames_loc: 517 res[i].names.append(name) 518 res[i].shape[name] = 1 519 res[i].idl[name] = [] 520 res[i]._covobs = new_covobs 521 522 if symbol: 523 for i in range(len(res)): 524 res[i].tag = symbol[i] 525 if res[i].tag == 'None': 526 res[i].tag = None 527 if not noempty: 528 _check(len(res[0].covobs.keys()) == nc) 529 if full_output: 530 retd = {} 531 tool = file_origin.get('tool', None) 532 if tool: 533 program = tool['name'] + ' ' + tool['version'] 534 else: 535 program = '' 536 retd['program'] = program 537 retd['version'] = version 538 retd['who'] = file_origin['who'] 539 retd['date'] = file_origin['date'] 540 retd['host'] = file_origin['host'] 541 retd['description'] = descriptiond 542 retd['enstags'] = enstags 543 retd['obsdata'] = res 544 return retd 545 else: 546 return res 547 548 549def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 550 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 551 552 Tags are not written or recovered automatically. 553 554 Parameters 555 ---------- 556 fname : str 557 Filename of the input file. 558 noemtpy : bool 559 If True, ensembles with no contribution to the Obs are not included. 560 If False, ensembles are included as written in the file. 561 full_output : bool 562 If True, a dict containing auxiliary information and the data is returned. 563 If False, only the data is returned as list. 564 gz : bool 565 If True, assumes that data is gzipped. If False, assumes XML file. 566 separatior_insertion: str, int or bool 567 str: replace all occurences of "separator_insertion" within the replica names 568 by "|%s" % (separator_insertion) when constructing the names of the replica. 569 int: Insert the separator "|" at the position given by separator_insertion. 570 True (default): separator "|" is inserted after len(ensname), assuming that the 571 ensemble name is a prefix to the replica name. 572 None or False: No separator is inserted. 573 """ 574 575 if not fname.endswith('.xml') and not fname.endswith('.gz'): 576 fname += '.xml' 577 if gz: 578 if not fname.endswith('.gz'): 579 fname += '.gz' 580 with gzip.open(fname, 'r') as fin: 581 content = fin.read().decode('utf-8') 582 else: 583 if fname.endswith('.gz'): 584 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 585 with open(fname, 'r', encoding='utf-8') as fin: 586 content = fin.read() 587 588 # open and read gzipped xml file 589 infile = gzip.open(fname) 590 content = infile.read() 591 592 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) 593 594 595def _dobsdict_to_xmlstring(d): 596 if isinstance(d, dict): 597 iters = '' 598 for k in d: 599 if k.startswith('#value'): 600 for li in d[k]: 601 iters += li 602 return iters + '\n' 603 elif k.startswith('#'): 604 for li in d[k]: 605 iters += li 606 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 607 return iters 608 if isinstance(d[k], dict): 609 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 610 elif isinstance(d[k], str): 611 if len(d[k]) > 100: 612 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 613 else: 614 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 615 elif isinstance(d[k], list): 616 tmps = '' 617 if k in ['edata', 'cdata']: 618 for i in range(len(d[k])): 619 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) 620 else: 621 for i in range(len(d[k])): 622 tmps += _dobsdict_to_xmlstring(d[k][i]) 623 iters += tmps 624 elif isinstance(d[k], (int, float)): 625 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) 626 elif not d[k]: 627 return '\n' 628 else: 629 raise Exception('Type', type(d[k]), 'not supported in export!') 630 else: 631 raise Exception('Type', type(d), 'not supported in export!') 632 return iters 633 634 635def _dobsdict_to_xmlstring_spaces(d, space=' '): 636 s = _dobsdict_to_xmlstring(d) 637 o = '' 638 c = 0 639 cm = False 640 for li in s.split('\n'): 641 if li.startswith('<%s' % ('/')): 642 c -= 1 643 cm = True 644 for i in range(c): 645 o += space 646 o += li + '\n' 647 if li.startswith('<') and not cm: 648 if not '<%s' % ('/') in li: 649 c += 1 650 cm = False 651 return o 652 653 654def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}): 655 """Generate the string for the export of a list of Obs or structures containing Obs 656 to a .xml.gz file according to the Zeuthen dobs format. 657 658 Tags are not written or recovered automatically. The separator |is removed from the replica names. 659 660 Parameters 661 ---------- 662 obsl : list 663 List of Obs that will be exported. 664 The Obs inside a structure do not have to be defined on the same set of configurations, 665 but the storage requirement is increased, if this is not the case. 666 name : str 667 The name of the observable. 668 spec : str 669 Optional string that describes the contents of the file. 670 origin : str 671 Specify where the data has its origin. 672 symbol : list 673 A list of symbols that describe the observables to be written. May be empty. 674 who : str 675 Provide the name of the person that exports the data. 676 enstags : dict 677 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 678 Otherwise, the ensemble name is used. 679 """ 680 od = {} 681 r_names = [] 682 for o in obsl: 683 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 684 r_names = sorted(set(r_names)) 685 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 686 for tmpname in mc_names: 687 if tmpname not in enstags: 688 enstags[tmpname] = tmpname 689 ne = len(set(mc_names)) 690 cov_names = [] 691 for o in obsl: 692 cov_names += list(o.cov_names) 693 cov_names = sorted(set(cov_names)) 694 nc = len(set(cov_names)) 695 od['OBSERVABLES'] = {} 696 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 697 if who is None: 698 who = getpass.getuser() 699 od['OBSERVABLES']['origin'] = { 700 'who': who, 701 'date': str(datetime.datetime.now())[:-7], 702 'host': socket.gethostname(), 703 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 704 od['OBSERVABLES']['dobs'] = {} 705 pd = od['OBSERVABLES']['dobs'] 706 pd['spec'] = spec 707 pd['origin'] = origin 708 pd['name'] = name 709 pd['array'] = {} 710 pd['array']['id'] = 'val' 711 pd['array']['layout'] = '1 f%d' % (len(obsl)) 712 osymbol = '' 713 if symbol: 714 if not isinstance(symbol, list): 715 raise Exception('Symbol has to be a list!') 716 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 717 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 718 osymbol = symbol[0] 719 for s in symbol[1:]: 720 osymbol += ' %s' % s 721 pd['array']['symbol'] = osymbol 722 723 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 724 pd['ne'] = '%d' % (ne) 725 pd['nc'] = '%d' % (nc) 726 pd['edata'] = [] 727 for name in mc_names: 728 ed = {} 729 ed['enstag'] = enstags[name] 730 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 731 nr = len(onames) 732 ed['nr'] = nr 733 ed[''] = [] 734 735 for r in range(nr): 736 ad = {} 737 repname = onames[r] 738 ad['id'] = repname.replace('|', '') 739 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 740 Nconf = len(idx) 741 layout = '%d i f%d' % (Nconf, len(obsl)) 742 ad['layout'] = layout 743 data = '' 744 counters = [0 for o in obsl] 745 for ci in idx: 746 data += '%d ' % ci 747 for oi in range(len(obsl)): 748 o = obsl[oi] 749 if repname in o.idl: 750 if counters[oi] < 0: 751 data += '0 ' 752 continue 753 if o.idl[repname][counters[oi]] == ci: 754 num = o.deltas[repname][counters[oi]] 755 if num == 0: 756 data += '0 ' 757 else: 758 data += '%1.16e ' % (num) 759 counters[oi] += 1 760 if counters[oi] >= len(o.idl[repname]): 761 counters[oi] = -1 762 else: 763 data += '0 ' 764 else: 765 data += '0 ' 766 data += '\n' 767 ad['#data'] = data 768 ed[''].append(ad) 769 pd['edata'].append(ed) 770 771 allcov = {} 772 for o in obsl: 773 for name in o.cov_names: 774 if name in allcov: 775 if not np.array_equal(allcov[name], o.covobs[name].cov): 776 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 777 else: 778 allcov[name] = o.covobs[name].cov 779 pd['cdata'] = [] 780 for name in cov_names: 781 cd = {} 782 cd['id'] = name 783 784 covd = {'id': 'cov'} 785 if allcov[name].shape == (): 786 ncov = 1 787 covd['layout'] = '1 1 f' 788 covd['#data'] = '%1.14e' % (allcov[name]) 789 else: 790 shape = allcov[name].shape 791 assert (shape[0] == shape[1]) 792 ncov = shape[0] 793 covd['layout'] = '%d %d f' % (ncov, ncov) 794 ds = '' 795 for i in range(ncov): 796 for j in range(ncov): 797 val = allcov[name][i][j] 798 if val == 0: 799 ds += '0 ' 800 else: 801 ds += '%1.14e ' % (val) 802 ds += '\n' 803 covd['#data'] = ds 804 805 gradd = {'id': 'grad'} 806 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 807 ds = '' 808 for i in range(ncov): 809 for o in obsl: 810 if name in o.covobs: 811 val = o.covobs[name].grad[i] 812 if val != 0: 813 ds += '%1.14e ' % (val) 814 else: 815 ds += '0 ' 816 else: 817 ds += '0 ' 818 gradd['#data'] = ds 819 cd['array'] = [covd, gradd] 820 pd['cdata'].append(cd) 821 822 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 823 824 return rs 825 826 827def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True): 828 """Export a list of Obs or structures containing Obs to a .xml.gz file 829 according to the Zeuthen dobs format. 830 831 Tags are not written or recovered automatically. The separator | is removed from the replica names. 832 833 Parameters 834 ---------- 835 obsl : list 836 List of Obs that will be exported. 837 The Obs inside a structure do not have to be defined on the same set of configurations, 838 but the storage requirement is increased, if this is not the case. 839 fname : str 840 Filename of the output file. 841 name : str 842 The name of the observable. 843 spec : str 844 Optional string that describes the contents of the file. 845 origin : str 846 Specify where the data has its origin. 847 symbol : list 848 A list of symbols that describe the observables to be written. May be empty. 849 who : str 850 Provide the name of the person that exports the data. 851 enstags : dict 852 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 853 Otherwise, the ensemble name is used. 854 gz : bool 855 If True, the output is a gzipped XML. If False, the output is a XML file. 856 """ 857 858 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 859 860 if not fname.endswith('.xml') and not fname.endswith('.gz'): 861 fname += '.xml' 862 863 if gz: 864 if not fname.endswith('.gz'): 865 fname += '.gz' 866 867 fp = gzip.open(fname, 'wb') 868 fp.write(dobsstring.encode('utf-8')) 869 else: 870 fp = open(fname, 'w', encoding='utf-8') 871 fp.write(dobsstring) 872 fp.close()
View Source
88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 89 """Export a list of Obs or structures containing Obs to an xml string 90 according to the Zeuthen pobs format. 91 92 Tags are not written or recovered automatically. The separator | is removed from the replica names. 93 94 Parameters 95 ---------- 96 obsl : list 97 List of Obs that will be exported. 98 The Obs inside a structure have to be defined on the same ensemble. 99 name : str 100 The name of the observable. 101 spec : str 102 Optional string that describes the contents of the file. 103 origin : str 104 Specify where the data has its origin. 105 symbol : list 106 A list of symbols that describe the observables to be written. May be empty. 107 enstag : str 108 Enstag that is written to pobs. If None, the ensemble name is used. 109 """ 110 111 od = {} 112 ename = obsl[0].e_names[0] 113 names = list(obsl[0].deltas.keys()) 114 nr = len(names) 115 onames = [name.replace('|', '') for name in names] 116 for o in obsl: 117 if len(o.e_names) != 1: 118 raise Exception('You try to export dobs to obs!') 119 if o.e_names[0] != ename: 120 raise Exception('You try to export dobs to obs!') 121 if len(o.deltas.keys()) != nr: 122 raise Exception('Incompatible obses in list') 123 od['observables'] = {} 124 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 125 od['observables']['origin'] = { 126 'who': getpass.getuser(), 127 'date': str(datetime.datetime.now())[:-7], 128 'host': socket.gethostname(), 129 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 130 od['observables']['pobs'] = {} 131 pd = od['observables']['pobs'] 132 pd['spec'] = spec 133 pd['origin'] = origin 134 pd['name'] = name 135 if enstag: 136 if not isinstance(enstag, str): 137 raise Exception('enstag has to be a string!') 138 pd['enstag'] = enstag 139 else: 140 pd['enstag'] = ename 141 pd['nr'] = '%d' % (nr) 142 pd['array'] = [] 143 osymbol = 'cfg' 144 if not isinstance(symbol, list): 145 raise Exception('Symbol has to be a list!') 146 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 147 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 148 for s in symbol: 149 osymbol += ' %s' % s 150 for r in range(nr): 151 ad = {} 152 ad['id'] = onames[r] 153 Nconf = len(obsl[0].deltas[names[r]]) 154 layout = '%d i f%d' % (Nconf, len(obsl)) 155 ad['layout'] = layout 156 ad['symbol'] = osymbol 157 data = '' 158 for c in range(Nconf): 159 data += '%d ' % obsl[0].idl[names[r]][c] 160 for o in obsl: 161 num = o.deltas[names[r]][c] + o.r_values[names[r]] 162 if num == 0: 163 data += '0 ' 164 else: 165 data += '%1.16e ' % (num) 166 data += '\n' 167 ad['#data'] = data 168 pd['array'].append(ad) 169 170 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 171 return rs
Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
View Source
174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 175 """Export a list of Obs or structures containing Obs to a .xml.gz file 176 according to the Zeuthen pobs format. 177 178 Tags are not written or recovered automatically. The separator | is removed from the replica names. 179 180 Parameters 181 ---------- 182 obsl : list 183 List of Obs that will be exported. 184 The Obs inside a structure have to be defined on the same ensemble. 185 fname : str 186 Filename of the output file. 187 name : str 188 The name of the observable. 189 spec : str 190 Optional string that describes the contents of the file. 191 origin : str 192 Specify where the data has its origin. 193 symbol : list 194 A list of symbols that describe the observables to be written. May be empty. 195 enstag : str 196 Enstag that is written to pobs. If None, the ensemble name is used. 197 gz : bool 198 If True, the output is a gzipped xml. If False, the output is an xml file. 199 """ 200 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 201 202 if not fname.endswith('.xml') and not fname.endswith('.gz'): 203 fname += '.xml' 204 205 if gz: 206 if not fname.endswith('.gz'): 207 fname += '.gz' 208 209 fp = gzip.open(fname, 'wb') 210 fp.write(pobsstring.encode('utf-8')) 211 else: 212 fp = open(fname, 'w', encoding='utf-8') 213 fp.write(pobsstring) 214 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
- gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
View Source
291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 292 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 293 294 Tags are not written or recovered automatically. 295 296 Parameters 297 ---------- 298 fname : str 299 Filename of the input file. 300 full_output : bool 301 If True, a dict containing auxiliary information and the data is returned. 302 If False, only the data is returned as list. 303 separatior_insertion: str or int 304 str: replace all occurences of "separator_insertion" within the replica names 305 by "|%s" % (separator_insertion) when constructing the names of the replica. 306 int: Insert the separator "|" at the position given by separator_insertion. 307 None (default): Replica names remain unchanged. 308 """ 309 310 if not fname.endswith('.xml') and not fname.endswith('.gz'): 311 fname += '.xml' 312 if gz: 313 if not fname.endswith('.gz'): 314 fname += '.gz' 315 with gzip.open(fname, 'r') as fin: 316 content = fin.read() 317 else: 318 if fname.endswith('.gz'): 319 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 320 with open(fname, 'r') as fin: 321 content = fin.read() 322 323 # parse xml file content 324 root = et.fromstring(content) 325 326 _check(root[2].tag == 'pobs') 327 pobs = root[2] 328 329 version = root[0][1].text.strip() 330 331 _check(root[1].tag == 'origin') 332 file_origin = _etree_to_dict(root[1])['origin'] 333 334 deltas = [] 335 names = [] 336 idl = [] 337 for i in range(5, len(pobs)): 338 delta, name, idx = _import_rdata(pobs[i]) 339 deltas.append(delta) 340 if separator_insertion is None: 341 pass 342 elif isinstance(separator_insertion, int): 343 name = name[:separator_insertion] + '|' + name[separator_insertion:] 344 elif isinstance(separator_insertion, str): 345 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 346 else: 347 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 348 names.append(name) 349 idl.append(idx) 350 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 351 352 descriptiond = {} 353 for i in range(4): 354 descriptiond[pobs[i].tag] = pobs[i].text.strip() 355 356 _check(pobs[4].tag == "nr") 357 358 _check(pobs[5].tag == 'array') 359 if pobs[5][1].tag == 'symbol': 360 symbol = pobs[5][1].text.strip() 361 descriptiond['symbol'] = symbol 362 363 if full_output: 364 retd = {} 365 tool = file_origin.get('tool', None) 366 if tool: 367 program = tool['name'] + ' ' + tool['version'] 368 else: 369 program = '' 370 retd['program'] = program 371 retd['version'] = version 372 retd['who'] = file_origin['who'] 373 retd['date'] = file_origin['date'] 374 retd['host'] = file_origin['host'] 375 retd['description'] = descriptiond 376 retd['obsdata'] = res 377 return retd 378 else: 379 return res
Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
View Source
387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 388 """Import a list of Obs from a string in the Zeuthen dobs format. 389 390 Tags are not written or recovered automatically. 391 392 Parameters 393 ---------- 394 content : str 395 XML string containing the data 396 noemtpy : bool 397 If True, ensembles with no contribution to the Obs are not included. 398 If False, ensembles are included as written in the file, possibly with vanishing entries. 399 full_output : bool 400 If True, a dict containing auxiliary information and the data is returned. 401 If False, only the data is returned as list. 402 separatior_insertion: str, int or bool 403 str: replace all occurences of "separator_insertion" within the replica names 404 by "|%s" % (separator_insertion) when constructing the names of the replica. 405 int: Insert the separator "|" at the position given by separator_insertion. 406 True (default): separator "|" is inserted after len(ensname), assuming that the 407 ensemble name is a prefix to the replica name. 408 None or False: No separator is inserted. 409 """ 410 411 root = et.fromstring(content) 412 413 _check(root.tag == 'OBSERVABLES') 414 _check(root[0].tag == 'SCHEMA') 415 version = root[0][1].text.strip() 416 417 _check(root[1].tag == 'origin') 418 file_origin = _etree_to_dict(root[1])['origin'] 419 420 _check(root[2].tag == 'dobs') 421 422 dobs = root[2] 423 424 descriptiond = {} 425 for i in range(3): 426 descriptiond[dobs[i].tag] = dobs[i].text.strip() 427 428 _check(dobs[3].tag == 'array') 429 430 symbol = [] 431 if dobs[3][1].tag == 'symbol': 432 symbol = dobs[3][1].text.strip() 433 descriptiond['symbol'] = symbol 434 mean = _import_array(dobs[3])[0] 435 436 _check(dobs[4].tag == "ne") 437 ne = int(dobs[4].text.strip()) 438 _check(dobs[5].tag == "nc") 439 nc = int(dobs[5].text.strip()) 440 441 idld = {} 442 deltad = {} 443 covd = {} 444 gradd = {} 445 names = [] 446 e_names = [] 447 enstags = {} 448 for k in range(6, len(list(dobs))): 449 if dobs[k].tag == "edata": 450 _check(dobs[k][0].tag == "enstag") 451 ename = dobs[k][0].text.strip() 452 e_names.append(ename) 453 _check(dobs[k][1].tag == "nr") 454 R = int(dobs[k][1].text.strip()) 455 for i in range(2, 2 + R): 456 deltas, rname, idx = _import_rdata(dobs[k][i]) 457 if separator_insertion is None or False: 458 pass 459 elif separator_insertion is True: 460 if rname.startswith(ename): 461 rname = rname[:len(ename)] + '|' + rname[len(ename):] 462 elif isinstance(separator_insertion, int): 463 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 464 elif isinstance(separator_insertion, str): 465 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 466 else: 467 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 468 if '|' in rname: 469 new_ename = rname[:rname.index('|')] 470 else: 471 new_ename = ename 472 enstags[new_ename] = ename 473 idld[rname] = idx 474 deltad[rname] = deltas 475 names.append(rname) 476 elif dobs[k].tag == "cdata": 477 cname, cov, grad = _import_cdata(dobs[k]) 478 covd[cname] = cov 479 if grad.shape[1] == 1: 480 gradd[cname] = [grad for i in range(len(mean))] 481 else: 482 gradd[cname] = grad.T 483 else: 484 _check(False) 485 names = list(set(names)) 486 487 for name in names: 488 for i in range(len(deltad[name])): 489 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 490 491 res = [] 492 for i in range(len(mean)): 493 deltas = [] 494 idl = [] 495 obs_names = [] 496 for name in names: 497 h = np.unique(deltad[name][i]) 498 if len(h) == 1 and np.all(h == mean[i]) and noempty: 499 continue 500 deltas.append(deltad[name][i]) 501 obs_names.append(name) 502 idl.append(idld[name]) 503 res.append(Obs(deltas, obs_names, idl=idl)) 504 print(mean, 'vs', res) 505 _check(len(e_names) == ne) 506 507 cnames = list(covd.keys()) 508 for i in range(len(res)): 509 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 510 if noempty: 511 for name in cnames: 512 if np.all(new_covobs[name].grad == 0): 513 del new_covobs[name] 514 cnames_loc = list(new_covobs.keys()) 515 else: 516 cnames_loc = cnames 517 for name in cnames_loc: 518 res[i].names.append(name) 519 res[i].shape[name] = 1 520 res[i].idl[name] = [] 521 res[i]._covobs = new_covobs 522 523 if symbol: 524 for i in range(len(res)): 525 res[i].tag = symbol[i] 526 if res[i].tag == 'None': 527 res[i].tag = None 528 if not noempty: 529 _check(len(res[0].covobs.keys()) == nc) 530 if full_output: 531 retd = {} 532 tool = file_origin.get('tool', None) 533 if tool: 534 program = tool['name'] + ' ' + tool['version'] 535 else: 536 program = '' 537 retd['program'] = program 538 retd['version'] = version 539 retd['who'] = file_origin['who'] 540 retd['date'] = file_origin['date'] 541 retd['host'] = file_origin['host'] 542 retd['description'] = descriptiond 543 retd['enstags'] = enstags 544 retd['obsdata'] = res 545 return retd 546 else: 547 return res
Import a list of Obs from a string in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- content (str): XML string containing the data
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
View Source
550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 551 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 552 553 Tags are not written or recovered automatically. 554 555 Parameters 556 ---------- 557 fname : str 558 Filename of the input file. 559 noemtpy : bool 560 If True, ensembles with no contribution to the Obs are not included. 561 If False, ensembles are included as written in the file. 562 full_output : bool 563 If True, a dict containing auxiliary information and the data is returned. 564 If False, only the data is returned as list. 565 gz : bool 566 If True, assumes that data is gzipped. If False, assumes XML file. 567 separatior_insertion: str, int or bool 568 str: replace all occurences of "separator_insertion" within the replica names 569 by "|%s" % (separator_insertion) when constructing the names of the replica. 570 int: Insert the separator "|" at the position given by separator_insertion. 571 True (default): separator "|" is inserted after len(ensname), assuming that the 572 ensemble name is a prefix to the replica name. 573 None or False: No separator is inserted. 574 """ 575 576 if not fname.endswith('.xml') and not fname.endswith('.gz'): 577 fname += '.xml' 578 if gz: 579 if not fname.endswith('.gz'): 580 fname += '.gz' 581 with gzip.open(fname, 'r') as fin: 582 content = fin.read().decode('utf-8') 583 else: 584 if fname.endswith('.gz'): 585 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 586 with open(fname, 'r', encoding='utf-8') as fin: 587 content = fin.read() 588 589 # open and read gzipped xml file 590 infile = gzip.open(fname) 591 content = infile.read() 592 593 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
View Source
655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}): 656 """Generate the string for the export of a list of Obs or structures containing Obs 657 to a .xml.gz file according to the Zeuthen dobs format. 658 659 Tags are not written or recovered automatically. The separator |is removed from the replica names. 660 661 Parameters 662 ---------- 663 obsl : list 664 List of Obs that will be exported. 665 The Obs inside a structure do not have to be defined on the same set of configurations, 666 but the storage requirement is increased, if this is not the case. 667 name : str 668 The name of the observable. 669 spec : str 670 Optional string that describes the contents of the file. 671 origin : str 672 Specify where the data has its origin. 673 symbol : list 674 A list of symbols that describe the observables to be written. May be empty. 675 who : str 676 Provide the name of the person that exports the data. 677 enstags : dict 678 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 679 Otherwise, the ensemble name is used. 680 """ 681 od = {} 682 r_names = [] 683 for o in obsl: 684 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 685 r_names = sorted(set(r_names)) 686 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 687 for tmpname in mc_names: 688 if tmpname not in enstags: 689 enstags[tmpname] = tmpname 690 ne = len(set(mc_names)) 691 cov_names = [] 692 for o in obsl: 693 cov_names += list(o.cov_names) 694 cov_names = sorted(set(cov_names)) 695 nc = len(set(cov_names)) 696 od['OBSERVABLES'] = {} 697 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 698 if who is None: 699 who = getpass.getuser() 700 od['OBSERVABLES']['origin'] = { 701 'who': who, 702 'date': str(datetime.datetime.now())[:-7], 703 'host': socket.gethostname(), 704 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 705 od['OBSERVABLES']['dobs'] = {} 706 pd = od['OBSERVABLES']['dobs'] 707 pd['spec'] = spec 708 pd['origin'] = origin 709 pd['name'] = name 710 pd['array'] = {} 711 pd['array']['id'] = 'val' 712 pd['array']['layout'] = '1 f%d' % (len(obsl)) 713 osymbol = '' 714 if symbol: 715 if not isinstance(symbol, list): 716 raise Exception('Symbol has to be a list!') 717 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 718 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 719 osymbol = symbol[0] 720 for s in symbol[1:]: 721 osymbol += ' %s' % s 722 pd['array']['symbol'] = osymbol 723 724 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 725 pd['ne'] = '%d' % (ne) 726 pd['nc'] = '%d' % (nc) 727 pd['edata'] = [] 728 for name in mc_names: 729 ed = {} 730 ed['enstag'] = enstags[name] 731 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 732 nr = len(onames) 733 ed['nr'] = nr 734 ed[''] = [] 735 736 for r in range(nr): 737 ad = {} 738 repname = onames[r] 739 ad['id'] = repname.replace('|', '') 740 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 741 Nconf = len(idx) 742 layout = '%d i f%d' % (Nconf, len(obsl)) 743 ad['layout'] = layout 744 data = '' 745 counters = [0 for o in obsl] 746 for ci in idx: 747 data += '%d ' % ci 748 for oi in range(len(obsl)): 749 o = obsl[oi] 750 if repname in o.idl: 751 if counters[oi] < 0: 752 data += '0 ' 753 continue 754 if o.idl[repname][counters[oi]] == ci: 755 num = o.deltas[repname][counters[oi]] 756 if num == 0: 757 data += '0 ' 758 else: 759 data += '%1.16e ' % (num) 760 counters[oi] += 1 761 if counters[oi] >= len(o.idl[repname]): 762 counters[oi] = -1 763 else: 764 data += '0 ' 765 else: 766 data += '0 ' 767 data += '\n' 768 ad['#data'] = data 769 ed[''].append(ad) 770 pd['edata'].append(ed) 771 772 allcov = {} 773 for o in obsl: 774 for name in o.cov_names: 775 if name in allcov: 776 if not np.array_equal(allcov[name], o.covobs[name].cov): 777 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 778 else: 779 allcov[name] = o.covobs[name].cov 780 pd['cdata'] = [] 781 for name in cov_names: 782 cd = {} 783 cd['id'] = name 784 785 covd = {'id': 'cov'} 786 if allcov[name].shape == (): 787 ncov = 1 788 covd['layout'] = '1 1 f' 789 covd['#data'] = '%1.14e' % (allcov[name]) 790 else: 791 shape = allcov[name].shape 792 assert (shape[0] == shape[1]) 793 ncov = shape[0] 794 covd['layout'] = '%d %d f' % (ncov, ncov) 795 ds = '' 796 for i in range(ncov): 797 for j in range(ncov): 798 val = allcov[name][i][j] 799 if val == 0: 800 ds += '0 ' 801 else: 802 ds += '%1.14e ' % (val) 803 ds += '\n' 804 covd['#data'] = ds 805 806 gradd = {'id': 'grad'} 807 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 808 ds = '' 809 for i in range(ncov): 810 for o in obsl: 811 if name in o.covobs: 812 val = o.covobs[name].grad[i] 813 if val != 0: 814 ds += '%1.14e ' % (val) 815 else: 816 ds += '0 ' 817 else: 818 ds += '0 ' 819 gradd['#data'] = ds 820 cd['array'] = [covd, gradd] 821 pd['cdata'].append(cd) 822 823 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 824 825 return rs
Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator |is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
View Source
828def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True): 829 """Export a list of Obs or structures containing Obs to a .xml.gz file 830 according to the Zeuthen dobs format. 831 832 Tags are not written or recovered automatically. The separator | is removed from the replica names. 833 834 Parameters 835 ---------- 836 obsl : list 837 List of Obs that will be exported. 838 The Obs inside a structure do not have to be defined on the same set of configurations, 839 but the storage requirement is increased, if this is not the case. 840 fname : str 841 Filename of the output file. 842 name : str 843 The name of the observable. 844 spec : str 845 Optional string that describes the contents of the file. 846 origin : str 847 Specify where the data has its origin. 848 symbol : list 849 A list of symbols that describe the observables to be written. May be empty. 850 who : str 851 Provide the name of the person that exports the data. 852 enstags : dict 853 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 854 Otherwise, the ensemble name is used. 855 gz : bool 856 If True, the output is a gzipped XML. If False, the output is a XML file. 857 """ 858 859 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 860 861 if not fname.endswith('.xml') and not fname.endswith('.gz'): 862 fname += '.xml' 863 864 if gz: 865 if not fname.endswith('.gz'): 866 fname += '.gz' 867 868 fp = gzip.open(fname, 'wb') 869 fp.write(dobsstring.encode('utf-8')) 870 else: 871 fp = open(fname, 'w', encoding='utf-8') 872 fp.write(dobsstring) 873 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
- gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.