pyerrors.input.dobs

  1from collections import defaultdict
  2import gzip
  3import lxml.etree as et
  4import getpass
  5import socket
  6import datetime
  7import json
  8import warnings
  9import numpy as np
 10from ..obs import Obs
 11from ..obs import _merge_idx
 12from ..covobs import Covobs
 13from .. import version as pyerrorsversion
 14
 15
 16# Based on https://stackoverflow.com/a/10076823
 17def _etree_to_dict(t):
 18    """ Convert the content of an XML file to a python dict"""
 19    d = {t.tag: {} if t.attrib else None}
 20    children = list(t)
 21    if children:
 22        dd = defaultdict(list)
 23        for dc in map(_etree_to_dict, children):
 24            for k, v in dc.items():
 25                dd[k].append(v)
 26        d = {t.tag: {k: v[0] if len(v) == 1 else v
 27                     for k, v in dd.items()}}
 28    if t.attrib:
 29        d[t.tag].update(('@' + k, v)
 30                        for k, v in t.attrib.items())
 31    if t.text:
 32        text = t.text.strip()
 33        if children or t.attrib:
 34            if text:
 35                d[t.tag]['#data'] = [text]
 36        else:
 37            d[t.tag] = text
 38    return d
 39
 40
 41def _dict_to_xmlstring(d):
 42    if isinstance(d, dict):
 43        iters = ''
 44        for k in d:
 45            if k.startswith('#'):
 46                for la in d[k]:
 47                    iters += la
 48                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 49                return iters
 50            if isinstance(d[k], dict):
 51                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 52            elif isinstance(d[k], str):
 53                if len(d[k]) > 100:
 54                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 55                else:
 56                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 57            elif isinstance(d[k], list):
 58                for i in range(len(d[k])):
 59                    iters += _dict_to_xmlstring(d[k][i])
 60            elif not d[k]:
 61                return '\n'
 62            else:
 63                raise Exception('Type', type(d[k]), 'not supported in export!')
 64    else:
 65        raise Exception('Type', type(d), 'not supported in export!')
 66    return iters
 67
 68
 69def _dict_to_xmlstring_spaces(d, space='  '):
 70    s = _dict_to_xmlstring(d)
 71    o = ''
 72    c = 0
 73    cm = False
 74    for li in s.split('\n'):
 75        if li.startswith('<%s' % ('/')):
 76            c -= 1
 77            cm = True
 78        for i in range(c):
 79            o += space
 80        o += li + '\n'
 81        if li.startswith('<') and not cm:
 82            if not '<%s' % ('/') in li:
 83                c += 1
 84        cm = False
 85    return o
 86
 87
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109    """
110
111    od = {}
112    ename = obsl[0].e_names[0]
113    names = list(obsl[0].deltas.keys())
114    nr = len(names)
115    onames = [name.replace('|', '') for name in names]
116    for o in obsl:
117        if len(o.e_names) != 1:
118            raise Exception('You try to export dobs to obs!')
119        if o.e_names[0] != ename:
120            raise Exception('You try to export dobs to obs!')
121        if len(o.deltas.keys()) != nr:
122            raise Exception('Incompatible obses in list')
123    od['observables'] = {}
124    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
125    od['observables']['origin'] = {
126        'who': getpass.getuser(),
127        'date': str(datetime.datetime.now())[:-7],
128        'host': socket.gethostname(),
129        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
130    od['observables']['pobs'] = {}
131    pd = od['observables']['pobs']
132    pd['spec'] = spec
133    pd['origin'] = origin
134    pd['name'] = name
135    if enstag:
136        if not isinstance(enstag, str):
137            raise Exception('enstag has to be a string!')
138        pd['enstag'] = enstag
139    else:
140        pd['enstag'] = ename
141    pd['nr'] = '%d' % (nr)
142    pd['array'] = []
143    osymbol = 'cfg'
144    if not isinstance(symbol, list):
145        raise Exception('Symbol has to be a list!')
146    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
147        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
148    for s in symbol:
149        osymbol += ' %s' % s
150    for r in range(nr):
151        ad = {}
152        ad['id'] = onames[r]
153        Nconf = len(obsl[0].deltas[names[r]])
154        layout = '%d i f%d' % (Nconf, len(obsl))
155        ad['layout'] = layout
156        ad['symbol'] = osymbol
157        data = ''
158        for c in range(Nconf):
159            data += '%d ' % obsl[0].idl[names[r]][c]
160            for o in obsl:
161                num = o.deltas[names[r]][c] + o.r_values[names[r]]
162                if num == 0:
163                    data += '0 '
164                else:
165                    data += '%1.16e ' % (num)
166            data += '\n'
167        ad['#data'] = data
168        pd['array'].append(ad)
169
170    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
171    return rs
172
173
174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
175    """Export a list of Obs or structures containing Obs to a .xml.gz file
176    according to the Zeuthen pobs format.
177
178    Tags are not written or recovered automatically. The separator | is removed from the replica names.
179
180    Parameters
181    ----------
182    obsl : list
183        List of Obs that will be exported.
184        The Obs inside a structure have to be defined on the same ensemble.
185    fname : str
186        Filename of the output file.
187    name : str
188        The name of the observable.
189    spec : str
190        Optional string that describes the contents of the file.
191    origin : str
192        Specify where the data has its origin.
193    symbol : list
194        A list of symbols that describe the observables to be written. May be empty.
195    enstag : str
196        Enstag that is written to pobs. If None, the ensemble name is used.
197    gz : bool
198        If True, the output is a gzipped xml. If False, the output is an xml file.
199    """
200    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
201
202    if not fname.endswith('.xml') and not fname.endswith('.gz'):
203        fname += '.xml'
204
205    if gz:
206        if not fname.endswith('.gz'):
207            fname += '.gz'
208
209        fp = gzip.open(fname, 'wb')
210        fp.write(pobsstring.encode('utf-8'))
211    else:
212        fp = open(fname, 'w', encoding='utf-8')
213        fp.write(pobsstring)
214    fp.close()
215
216
217def _import_data(string):
218    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
219
220
221def _check(condition):
222    if not condition:
223        raise Exception("XML file format not supported")
224
225
226class _NoTagInDataError(Exception):
227    """Raised when tag is not in data"""
228    def __init__(self, tag):
229        self.tag = tag
230        super().__init__('Tag %s not in data!' % (self.tag))
231
232
233def _find_tag(dat, tag):
234    for i in range(len(dat)):
235        if dat[i].tag == tag:
236            return i
237    raise _NoTagInDataError(tag)
238
239
240def _import_array(arr):
241    name = arr[_find_tag(arr, 'id')].text.strip()
242    index = _find_tag(arr, 'layout')
243    try:
244        sindex = _find_tag(arr, 'symbol')
245    except _NoTagInDataError:
246        sindex = 0
247    if sindex > index:
248        tmp = _import_data(arr[sindex].tail)
249    else:
250        tmp = _import_data(arr[index].tail)
251
252    li = arr[index].text.strip()
253    m = li.split()
254    if m[1] == "i" and m[2][0] == "f":
255        nc = int(m[0])
256        na = int(m[2].lstrip('f'))
257        _dat = []
258        mask = []
259        for a in range(na):
260            mask += [a]
261            _dat += [np.array(tmp[1 + a:: na + 1])]
262        _check(len(tmp[0:: na + 1]) == nc)
263        return [name, tmp[0:: na + 1], mask, _dat]
264    elif m[1][0] == 'f' and len(m) < 3:
265        sh = (int(m[0]), int(m[1].lstrip('f')))
266        return np.reshape(tmp, sh)
267    elif any(['f' in s for s in m]):
268        for si in range(len(m)):
269            if m[si] == 'f':
270                break
271        sh = [int(m[i]) for i in range(si)]
272        return np.reshape(tmp, sh)
273    else:
274        print(name, m)
275        _check(False)
276
277
278def _import_rdata(rd):
279    name, idx, mask, deltas = _import_array(rd)
280    return deltas, name, idx
281
282
283def _import_cdata(cd):
284    _check(cd[0].tag == "id")
285    _check(cd[1][0].text.strip() == "cov")
286    cov = _import_array(cd[1])
287    grad = _import_array(cd[2])
288    return cd[0].text.strip(), cov, grad
289
290
291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
292    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
293
294    Tags are not written or recovered automatically.
295
296    Parameters
297    ----------
298    fname : str
299        Filename of the input file.
300    full_output : bool
301        If True, a dict containing auxiliary information and the data is returned.
302        If False, only the data is returned as list.
303    separatior_insertion: str or int
304        str: replace all occurences of "separator_insertion" within the replica names
305        by "|%s" % (separator_insertion) when constructing the names of the replica.
306        int: Insert the separator "|" at the position given by separator_insertion.
307        None (default): Replica names remain unchanged.
308    """
309
310    if not fname.endswith('.xml') and not fname.endswith('.gz'):
311        fname += '.xml'
312    if gz:
313        if not fname.endswith('.gz'):
314            fname += '.gz'
315        with gzip.open(fname, 'r') as fin:
316            content = fin.read()
317    else:
318        if fname.endswith('.gz'):
319            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
320        with open(fname, 'r') as fin:
321            content = fin.read()
322
323    # parse xml file content
324    root = et.fromstring(content)
325
326    _check(root[2].tag == 'pobs')
327    pobs = root[2]
328
329    version = root[0][1].text.strip()
330
331    _check(root[1].tag == 'origin')
332    file_origin = _etree_to_dict(root[1])['origin']
333
334    deltas = []
335    names = []
336    idl = []
337    for i in range(5, len(pobs)):
338        delta, name, idx = _import_rdata(pobs[i])
339        deltas.append(delta)
340        if separator_insertion is None:
341            pass
342        elif isinstance(separator_insertion, int):
343            name = name[:separator_insertion] + '|' + name[separator_insertion:]
344        elif isinstance(separator_insertion, str):
345            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
346        else:
347            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
348        names.append(name)
349        idl.append(idx)
350    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
351
352    descriptiond = {}
353    for i in range(4):
354        descriptiond[pobs[i].tag] = pobs[i].text.strip()
355
356    _check(pobs[4].tag == "nr")
357
358    _check(pobs[5].tag == 'array')
359    if pobs[5][1].tag == 'symbol':
360        symbol = pobs[5][1].text.strip()
361        descriptiond['symbol'] = symbol
362
363    if full_output:
364        retd = {}
365        tool = file_origin.get('tool', None)
366        if tool:
367            program = tool['name'] + ' ' + tool['version']
368        else:
369            program = ''
370        retd['program'] = program
371        retd['version'] = version
372        retd['who'] = file_origin['who']
373        retd['date'] = file_origin['date']
374        retd['host'] = file_origin['host']
375        retd['description'] = descriptiond
376        retd['obsdata'] = res
377        return retd
378    else:
379        return res
380
381
382# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
383def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
384    """Import a list of Obs from a string in the Zeuthen dobs format.
385
386    Tags are not written or recovered automatically.
387
388    Parameters
389    ----------
390    content : str
391        XML string containing the data
392    noemtpy : bool
393        If True, ensembles with no contribution to the Obs are not included.
394        If False, ensembles are included as written in the file, possibly with vanishing entries.
395    full_output : bool
396        If True, a dict containing auxiliary information and the data is returned.
397        If False, only the data is returned as list.
398    separatior_insertion: str, int or bool
399        str: replace all occurences of "separator_insertion" within the replica names
400        by "|%s" % (separator_insertion) when constructing the names of the replica.
401        int: Insert the separator "|" at the position given by separator_insertion.
402        True (default): separator "|" is inserted after len(ensname), assuming that the
403        ensemble name is a prefix to the replica name.
404        None or False: No separator is inserted.
405    """
406
407    root = et.fromstring(content)
408
409    _check(root.tag == 'OBSERVABLES')
410    _check(root[0].tag == 'SCHEMA')
411    version = root[0][1].text.strip()
412
413    _check(root[1].tag == 'origin')
414    file_origin = _etree_to_dict(root[1])['origin']
415
416    _check(root[2].tag == 'dobs')
417
418    dobs = root[2]
419
420    descriptiond = {}
421    for i in range(3):
422        descriptiond[dobs[i].tag] = dobs[i].text.strip()
423
424    _check(dobs[3].tag == 'array')
425
426    symbol = []
427    if dobs[3][1].tag == 'symbol':
428        symbol = dobs[3][1].text.strip()
429        descriptiond['symbol'] = symbol
430    mean = _import_array(dobs[3])[0]
431
432    _check(dobs[4].tag == "ne")
433    ne = int(dobs[4].text.strip())
434    _check(dobs[5].tag == "nc")
435    nc = int(dobs[5].text.strip())
436
437    idld = {}
438    deltad = {}
439    covd = {}
440    gradd = {}
441    names = []
442    e_names = []
443    enstags = {}
444    for k in range(6, len(list(dobs))):
445        if dobs[k].tag == "edata":
446            _check(dobs[k][0].tag == "enstag")
447            ename = dobs[k][0].text.strip()
448            e_names.append(ename)
449            _check(dobs[k][1].tag == "nr")
450            R = int(dobs[k][1].text.strip())
451            for i in range(2, 2 + R):
452                deltas, rname, idx = _import_rdata(dobs[k][i])
453                if separator_insertion is None or False:
454                    pass
455                elif separator_insertion is True:
456                    if rname.startswith(ename):
457                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
458                elif isinstance(separator_insertion, int):
459                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
460                elif isinstance(separator_insertion, str):
461                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
462                else:
463                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
464                if '|' in rname:
465                    new_ename = rname[:rname.index('|')]
466                else:
467                    new_ename = ename
468                enstags[new_ename] = ename
469                idld[rname] = idx
470                deltad[rname] = deltas
471                names.append(rname)
472        elif dobs[k].tag == "cdata":
473            cname, cov, grad = _import_cdata(dobs[k])
474            covd[cname] = cov
475            if grad.shape[1] == 1:
476                gradd[cname] = [grad for i in range(len(mean))]
477            else:
478                gradd[cname] = grad.T
479        else:
480            _check(False)
481    names = list(set(names))
482
483    for name in names:
484        for i in range(len(deltad[name])):
485            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
486
487    res = []
488    for i in range(len(mean)):
489        deltas = []
490        idl = []
491        obs_names = []
492        for name in names:
493            h = np.unique(deltad[name][i])
494            if len(h) == 1 and np.all(h == mean[i]) and noempty:
495                continue
496            deltas.append(deltad[name][i])
497            obs_names.append(name)
498            idl.append(idld[name])
499        res.append(Obs(deltas, obs_names, idl=idl))
500        res[-1]._value = mean[i]
501    _check(len(e_names) == ne)
502
503    cnames = list(covd.keys())
504    for i in range(len(res)):
505        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
506        if noempty:
507            for name in cnames:
508                if np.all(new_covobs[name].grad == 0):
509                    del new_covobs[name]
510            cnames_loc = list(new_covobs.keys())
511        else:
512            cnames_loc = cnames
513        for name in cnames_loc:
514            res[i].names.append(name)
515            res[i].shape[name] = 1
516            res[i].idl[name] = []
517        res[i]._covobs = new_covobs
518
519    if symbol:
520        for i in range(len(res)):
521            res[i].tag = symbol[i]
522            if res[i].tag == 'None':
523                res[i].tag = None
524    if not noempty:
525        _check(len(res[0].covobs.keys()) == nc)
526    if full_output:
527        retd = {}
528        tool = file_origin.get('tool', None)
529        if tool:
530            program = tool['name'] + ' ' + tool['version']
531        else:
532            program = ''
533        retd['program'] = program
534        retd['version'] = version
535        retd['who'] = file_origin['who']
536        retd['date'] = file_origin['date']
537        retd['host'] = file_origin['host']
538        retd['description'] = descriptiond
539        retd['enstags'] = enstags
540        retd['obsdata'] = res
541        return retd
542    else:
543        return res
544
545
546def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
547    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
548
549    Tags are not written or recovered automatically.
550
551    Parameters
552    ----------
553    fname : str
554        Filename of the input file.
555    noemtpy : bool
556        If True, ensembles with no contribution to the Obs are not included.
557        If False, ensembles are included as written in the file.
558    full_output : bool
559        If True, a dict containing auxiliary information and the data is returned.
560        If False, only the data is returned as list.
561    gz : bool
562        If True, assumes that data is gzipped. If False, assumes XML file.
563    separatior_insertion: str, int or bool
564        str: replace all occurences of "separator_insertion" within the replica names
565        by "|%s" % (separator_insertion) when constructing the names of the replica.
566        int: Insert the separator "|" at the position given by separator_insertion.
567        True (default): separator "|" is inserted after len(ensname), assuming that the
568        ensemble name is a prefix to the replica name.
569        None or False: No separator is inserted.
570    """
571
572    if not fname.endswith('.xml') and not fname.endswith('.gz'):
573        fname += '.xml'
574    if gz:
575        if not fname.endswith('.gz'):
576            fname += '.gz'
577        with gzip.open(fname, 'r') as fin:
578            content = fin.read()
579    else:
580        if fname.endswith('.gz'):
581            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
582        with open(fname, 'r') as fin:
583            content = fin.read()
584
585    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
586
587
588def _dobsdict_to_xmlstring(d):
589    if isinstance(d, dict):
590        iters = ''
591        for k in d:
592            if k.startswith('#value'):
593                for li in d[k]:
594                    iters += li
595                return iters + '\n'
596            elif k.startswith('#'):
597                for li in d[k]:
598                    iters += li
599                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
600                return iters
601            if isinstance(d[k], dict):
602                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
603            elif isinstance(d[k], str):
604                if len(d[k]) > 100:
605                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
606                else:
607                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
608            elif isinstance(d[k], list):
609                tmps = ''
610                if k in ['edata', 'cdata']:
611                    for i in range(len(d[k])):
612                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
613                else:
614                    for i in range(len(d[k])):
615                        tmps += _dobsdict_to_xmlstring(d[k][i])
616                iters += tmps
617            elif isinstance(d[k], (int, float)):
618                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
619            elif not d[k]:
620                return '\n'
621            else:
622                raise Exception('Type', type(d[k]), 'not supported in export!')
623    else:
624        raise Exception('Type', type(d), 'not supported in export!')
625    return iters
626
627
628def _dobsdict_to_xmlstring_spaces(d, space='  '):
629    s = _dobsdict_to_xmlstring(d)
630    o = ''
631    c = 0
632    cm = False
633    for li in s.split('\n'):
634        if li.startswith('<%s' % ('/')):
635            c -= 1
636            cm = True
637        for i in range(c):
638            o += space
639        o += li + '\n'
640        if li.startswith('<') and not cm:
641            if not '<%s' % ('/') in li:
642                c += 1
643        cm = False
644    return o
645
646
647def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
648    """Generate the string for the export of a list of Obs or structures containing Obs
649    to a .xml.gz file according to the Zeuthen dobs format.
650
651    Tags are not written or recovered automatically. The separator |is removed from the replica names.
652
653    Parameters
654    ----------
655    obsl : list
656        List of Obs that will be exported.
657        The Obs inside a structure do not have to be defined on the same set of configurations,
658        but the storage requirement is increased, if this is not the case.
659    name : str
660        The name of the observable.
661    spec : str
662        Optional string that describes the contents of the file.
663    origin : str
664        Specify where the data has its origin.
665    symbol : list
666        A list of symbols that describe the observables to be written. May be empty.
667    who : str
668        Provide the name of the person that exports the data.
669    enstags : dict
670        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
671        Otherwise, the ensemble name is used.
672    """
673    if enstags is None:
674        enstags = {}
675    od = {}
676    r_names = []
677    for o in obsl:
678        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
679    r_names = sorted(set(r_names))
680    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
681    for tmpname in mc_names:
682        if tmpname not in enstags:
683            enstags[tmpname] = tmpname
684    ne = len(set(mc_names))
685    cov_names = []
686    for o in obsl:
687        cov_names += list(o.cov_names)
688    cov_names = sorted(set(cov_names))
689    nc = len(set(cov_names))
690    od['OBSERVABLES'] = {}
691    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
692    if who is None:
693        who = getpass.getuser()
694    od['OBSERVABLES']['origin'] = {
695        'who': who,
696        'date': str(datetime.datetime.now())[:-7],
697        'host': socket.gethostname(),
698        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
699    od['OBSERVABLES']['dobs'] = {}
700    pd = od['OBSERVABLES']['dobs']
701    pd['spec'] = spec
702    pd['origin'] = origin
703    pd['name'] = name
704    pd['array'] = {}
705    pd['array']['id'] = 'val'
706    pd['array']['layout'] = '1 f%d' % (len(obsl))
707    osymbol = ''
708    if symbol:
709        if not isinstance(symbol, list):
710            raise Exception('Symbol has to be a list!')
711        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
712            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
713        osymbol = symbol[0]
714        for s in symbol[1:]:
715            osymbol += ' %s' % s
716        pd['array']['symbol'] = osymbol
717
718    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
719    pd['ne'] = '%d' % (ne)
720    pd['nc'] = '%d' % (nc)
721    pd['edata'] = []
722    for name in mc_names:
723        ed = {}
724        ed['enstag'] = enstags[name]
725        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
726        nr = len(onames)
727        ed['nr'] = nr
728        ed[''] = []
729
730        for r in range(nr):
731            ad = {}
732            repname = onames[r]
733            ad['id'] = repname.replace('|', '')
734            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
735            Nconf = len(idx)
736            layout = '%d i f%d' % (Nconf, len(obsl))
737            ad['layout'] = layout
738            data = ''
739            counters = [0 for o in obsl]
740            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
741            for ci in idx:
742                data += '%d ' % ci
743                for oi in range(len(obsl)):
744                    o = obsl[oi]
745                    if repname in o.idl:
746                        if counters[oi] < 0:
747                            num = offsets[oi]
748                            if num == 0:
749                                data += '0 '
750                            else:
751                                data += '%1.16e ' % (num)
752                            continue
753                        if o.idl[repname][counters[oi]] == ci:
754                            num = o.deltas[repname][counters[oi]] + offsets[oi]
755                            if num == 0:
756                                data += '0 '
757                            else:
758                                data += '%1.16e ' % (num)
759                            counters[oi] += 1
760                            if counters[oi] >= len(o.idl[repname]):
761                                counters[oi] = -1
762                        else:
763                            num = offsets[oi]
764                            if num == 0:
765                                data += '0 '
766                            else:
767                                data += '%1.16e ' % (num)
768                    else:
769                        data += '0 '
770                data += '\n'
771            ad['#data'] = data
772            ed[''].append(ad)
773        pd['edata'].append(ed)
774
775        allcov = {}
776        for o in obsl:
777            for cname in o.cov_names:
778                if cname in allcov:
779                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
780                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
781                else:
782                    allcov[cname] = o.covobs[cname].cov
783        pd['cdata'] = []
784        for cname in cov_names:
785            cd = {}
786            cd['id'] = cname
787
788            covd = {'id': 'cov'}
789            if allcov[cname].shape == ():
790                ncov = 1
791                covd['layout'] = '1 1 f'
792                covd['#data'] = '%1.14e' % (allcov[cname])
793            else:
794                shape = allcov[cname].shape
795                assert (shape[0] == shape[1])
796                ncov = shape[0]
797                covd['layout'] = '%d %d f' % (ncov, ncov)
798                ds = ''
799                for i in range(ncov):
800                    for j in range(ncov):
801                        val = allcov[cname][i][j]
802                        if val == 0:
803                            ds += '0 '
804                        else:
805                            ds += '%1.14e ' % (val)
806                    ds += '\n'
807                covd['#data'] = ds
808
809            gradd = {'id': 'grad'}
810            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
811            ds = ''
812            for i in range(ncov):
813                for o in obsl:
814                    if cname in o.covobs:
815                        val = o.covobs[cname].grad[i]
816                        if val != 0:
817                            ds += '%1.14e ' % (val)
818                        else:
819                            ds += '0 '
820                    else:
821                        ds += '0 '
822            gradd['#data'] = ds
823            cd['array'] = [covd, gradd]
824            pd['cdata'].append(cd)
825
826    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
827
828    return rs
829
830
831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
832    """Export a list of Obs or structures containing Obs to a .xml.gz file
833    according to the Zeuthen dobs format.
834
835    Tags are not written or recovered automatically. The separator | is removed from the replica names.
836
837    Parameters
838    ----------
839    obsl : list
840        List of Obs that will be exported.
841        The Obs inside a structure do not have to be defined on the same set of configurations,
842        but the storage requirement is increased, if this is not the case.
843    fname : str
844        Filename of the output file.
845    name : str
846        The name of the observable.
847    spec : str
848        Optional string that describes the contents of the file.
849    origin : str
850        Specify where the data has its origin.
851    symbol : list
852        A list of symbols that describe the observables to be written. May be empty.
853    who : str
854        Provide the name of the person that exports the data.
855    enstags : dict
856        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
857        Otherwise, the ensemble name is used.
858    gz : bool
859        If True, the output is a gzipped XML. If False, the output is a XML file.
860    """
861    if enstags is None:
862        enstags = {}
863
864    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
865
866    if not fname.endswith('.xml') and not fname.endswith('.gz'):
867        fname += '.xml'
868
869    if gz:
870        if not fname.endswith('.gz'):
871            fname += '.gz'
872
873        fp = gzip.open(fname, 'wb')
874        fp.write(dobsstring.encode('utf-8'))
875    else:
876        fp = open(fname, 'w', encoding='utf-8')
877        fp.write(dobsstring)
878    fp.close()
def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 90    """Export a list of Obs or structures containing Obs to an xml string
 91    according to the Zeuthen pobs format.
 92
 93    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 94
 95    Parameters
 96    ----------
 97    obsl : list
 98        List of Obs that will be exported.
 99        The Obs inside a structure have to be defined on the same ensemble.
100    name : str
101        The name of the observable.
102    spec : str
103        Optional string that describes the contents of the file.
104    origin : str
105        Specify where the data has its origin.
106    symbol : list
107        A list of symbols that describe the observables to be written. May be empty.
108    enstag : str
109        Enstag that is written to pobs. If None, the ensemble name is used.
110    """
111
112    od = {}
113    ename = obsl[0].e_names[0]
114    names = list(obsl[0].deltas.keys())
115    nr = len(names)
116    onames = [name.replace('|', '') for name in names]
117    for o in obsl:
118        if len(o.e_names) != 1:
119            raise Exception('You try to export dobs to obs!')
120        if o.e_names[0] != ename:
121            raise Exception('You try to export dobs to obs!')
122        if len(o.deltas.keys()) != nr:
123            raise Exception('Incompatible obses in list')
124    od['observables'] = {}
125    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
126    od['observables']['origin'] = {
127        'who': getpass.getuser(),
128        'date': str(datetime.datetime.now())[:-7],
129        'host': socket.gethostname(),
130        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
131    od['observables']['pobs'] = {}
132    pd = od['observables']['pobs']
133    pd['spec'] = spec
134    pd['origin'] = origin
135    pd['name'] = name
136    if enstag:
137        if not isinstance(enstag, str):
138            raise Exception('enstag has to be a string!')
139        pd['enstag'] = enstag
140    else:
141        pd['enstag'] = ename
142    pd['nr'] = '%d' % (nr)
143    pd['array'] = []
144    osymbol = 'cfg'
145    if not isinstance(symbol, list):
146        raise Exception('Symbol has to be a list!')
147    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
148        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
149    for s in symbol:
150        osymbol += ' %s' % s
151    for r in range(nr):
152        ad = {}
153        ad['id'] = onames[r]
154        Nconf = len(obsl[0].deltas[names[r]])
155        layout = '%d i f%d' % (Nconf, len(obsl))
156        ad['layout'] = layout
157        ad['symbol'] = osymbol
158        data = ''
159        for c in range(Nconf):
160            data += '%d ' % obsl[0].idl[names[r]][c]
161            for o in obsl:
162                num = o.deltas[names[r]][c] + o.r_values[names[r]]
163                if num == 0:
164                    data += '0 '
165                else:
166                    data += '%1.16e ' % (num)
167            data += '\n'
168        ad['#data'] = data
169        pd['array'].append(ad)
170
171    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
172    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
175def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
176    """Export a list of Obs or structures containing Obs to a .xml.gz file
177    according to the Zeuthen pobs format.
178
179    Tags are not written or recovered automatically. The separator | is removed from the replica names.
180
181    Parameters
182    ----------
183    obsl : list
184        List of Obs that will be exported.
185        The Obs inside a structure have to be defined on the same ensemble.
186    fname : str
187        Filename of the output file.
188    name : str
189        The name of the observable.
190    spec : str
191        Optional string that describes the contents of the file.
192    origin : str
193        Specify where the data has its origin.
194    symbol : list
195        A list of symbols that describe the observables to be written. May be empty.
196    enstag : str
197        Enstag that is written to pobs. If None, the ensemble name is used.
198    gz : bool
199        If True, the output is a gzipped xml. If False, the output is an xml file.
200    """
201    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
202
203    if not fname.endswith('.xml') and not fname.endswith('.gz'):
204        fname += '.xml'
205
206    if gz:
207        if not fname.endswith('.gz'):
208            fname += '.gz'
209
210        fp = gzip.open(fname, 'wb')
211        fp.write(pobsstring.encode('utf-8'))
212    else:
213        fp = open(fname, 'w', encoding='utf-8')
214        fp.write(pobsstring)
215    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
292def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
293    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
294
295    Tags are not written or recovered automatically.
296
297    Parameters
298    ----------
299    fname : str
300        Filename of the input file.
301    full_output : bool
302        If True, a dict containing auxiliary information and the data is returned.
303        If False, only the data is returned as list.
304    separatior_insertion: str or int
305        str: replace all occurences of "separator_insertion" within the replica names
306        by "|%s" % (separator_insertion) when constructing the names of the replica.
307        int: Insert the separator "|" at the position given by separator_insertion.
308        None (default): Replica names remain unchanged.
309    """
310
311    if not fname.endswith('.xml') and not fname.endswith('.gz'):
312        fname += '.xml'
313    if gz:
314        if not fname.endswith('.gz'):
315            fname += '.gz'
316        with gzip.open(fname, 'r') as fin:
317            content = fin.read()
318    else:
319        if fname.endswith('.gz'):
320            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
321        with open(fname, 'r') as fin:
322            content = fin.read()
323
324    # parse xml file content
325    root = et.fromstring(content)
326
327    _check(root[2].tag == 'pobs')
328    pobs = root[2]
329
330    version = root[0][1].text.strip()
331
332    _check(root[1].tag == 'origin')
333    file_origin = _etree_to_dict(root[1])['origin']
334
335    deltas = []
336    names = []
337    idl = []
338    for i in range(5, len(pobs)):
339        delta, name, idx = _import_rdata(pobs[i])
340        deltas.append(delta)
341        if separator_insertion is None:
342            pass
343        elif isinstance(separator_insertion, int):
344            name = name[:separator_insertion] + '|' + name[separator_insertion:]
345        elif isinstance(separator_insertion, str):
346            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
347        else:
348            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
349        names.append(name)
350        idl.append(idx)
351    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
352
353    descriptiond = {}
354    for i in range(4):
355        descriptiond[pobs[i].tag] = pobs[i].text.strip()
356
357    _check(pobs[4].tag == "nr")
358
359    _check(pobs[5].tag == 'array')
360    if pobs[5][1].tag == 'symbol':
361        symbol = pobs[5][1].text.strip()
362        descriptiond['symbol'] = symbol
363
364    if full_output:
365        retd = {}
366        tool = file_origin.get('tool', None)
367        if tool:
368            program = tool['name'] + ' ' + tool['version']
369        else:
370            program = ''
371        retd['program'] = program
372        retd['version'] = version
373        retd['who'] = file_origin['who']
374        retd['date'] = file_origin['date']
375        retd['host'] = file_origin['host']
376        retd['description'] = descriptiond
377        retd['obsdata'] = res
378        return retd
379    else:
380        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
384def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
385    """Import a list of Obs from a string in the Zeuthen dobs format.
386
387    Tags are not written or recovered automatically.
388
389    Parameters
390    ----------
391    content : str
392        XML string containing the data
393    noemtpy : bool
394        If True, ensembles with no contribution to the Obs are not included.
395        If False, ensembles are included as written in the file, possibly with vanishing entries.
396    full_output : bool
397        If True, a dict containing auxiliary information and the data is returned.
398        If False, only the data is returned as list.
399    separatior_insertion: str, int or bool
400        str: replace all occurences of "separator_insertion" within the replica names
401        by "|%s" % (separator_insertion) when constructing the names of the replica.
402        int: Insert the separator "|" at the position given by separator_insertion.
403        True (default): separator "|" is inserted after len(ensname), assuming that the
404        ensemble name is a prefix to the replica name.
405        None or False: No separator is inserted.
406    """
407
408    root = et.fromstring(content)
409
410    _check(root.tag == 'OBSERVABLES')
411    _check(root[0].tag == 'SCHEMA')
412    version = root[0][1].text.strip()
413
414    _check(root[1].tag == 'origin')
415    file_origin = _etree_to_dict(root[1])['origin']
416
417    _check(root[2].tag == 'dobs')
418
419    dobs = root[2]
420
421    descriptiond = {}
422    for i in range(3):
423        descriptiond[dobs[i].tag] = dobs[i].text.strip()
424
425    _check(dobs[3].tag == 'array')
426
427    symbol = []
428    if dobs[3][1].tag == 'symbol':
429        symbol = dobs[3][1].text.strip()
430        descriptiond['symbol'] = symbol
431    mean = _import_array(dobs[3])[0]
432
433    _check(dobs[4].tag == "ne")
434    ne = int(dobs[4].text.strip())
435    _check(dobs[5].tag == "nc")
436    nc = int(dobs[5].text.strip())
437
438    idld = {}
439    deltad = {}
440    covd = {}
441    gradd = {}
442    names = []
443    e_names = []
444    enstags = {}
445    for k in range(6, len(list(dobs))):
446        if dobs[k].tag == "edata":
447            _check(dobs[k][0].tag == "enstag")
448            ename = dobs[k][0].text.strip()
449            e_names.append(ename)
450            _check(dobs[k][1].tag == "nr")
451            R = int(dobs[k][1].text.strip())
452            for i in range(2, 2 + R):
453                deltas, rname, idx = _import_rdata(dobs[k][i])
454                if separator_insertion is None or False:
455                    pass
456                elif separator_insertion is True:
457                    if rname.startswith(ename):
458                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
459                elif isinstance(separator_insertion, int):
460                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
461                elif isinstance(separator_insertion, str):
462                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
463                else:
464                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
465                if '|' in rname:
466                    new_ename = rname[:rname.index('|')]
467                else:
468                    new_ename = ename
469                enstags[new_ename] = ename
470                idld[rname] = idx
471                deltad[rname] = deltas
472                names.append(rname)
473        elif dobs[k].tag == "cdata":
474            cname, cov, grad = _import_cdata(dobs[k])
475            covd[cname] = cov
476            if grad.shape[1] == 1:
477                gradd[cname] = [grad for i in range(len(mean))]
478            else:
479                gradd[cname] = grad.T
480        else:
481            _check(False)
482    names = list(set(names))
483
484    for name in names:
485        for i in range(len(deltad[name])):
486            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
487
488    res = []
489    for i in range(len(mean)):
490        deltas = []
491        idl = []
492        obs_names = []
493        for name in names:
494            h = np.unique(deltad[name][i])
495            if len(h) == 1 and np.all(h == mean[i]) and noempty:
496                continue
497            deltas.append(deltad[name][i])
498            obs_names.append(name)
499            idl.append(idld[name])
500        res.append(Obs(deltas, obs_names, idl=idl))
501        res[-1]._value = mean[i]
502    _check(len(e_names) == ne)
503
504    cnames = list(covd.keys())
505    for i in range(len(res)):
506        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
507        if noempty:
508            for name in cnames:
509                if np.all(new_covobs[name].grad == 0):
510                    del new_covobs[name]
511            cnames_loc = list(new_covobs.keys())
512        else:
513            cnames_loc = cnames
514        for name in cnames_loc:
515            res[i].names.append(name)
516            res[i].shape[name] = 1
517            res[i].idl[name] = []
518        res[i]._covobs = new_covobs
519
520    if symbol:
521        for i in range(len(res)):
522            res[i].tag = symbol[i]
523            if res[i].tag == 'None':
524                res[i].tag = None
525    if not noempty:
526        _check(len(res[0].covobs.keys()) == nc)
527    if full_output:
528        retd = {}
529        tool = file_origin.get('tool', None)
530        if tool:
531            program = tool['name'] + ' ' + tool['version']
532        else:
533            program = ''
534        retd['program'] = program
535        retd['version'] = version
536        retd['who'] = file_origin['who']
537        retd['date'] = file_origin['date']
538        retd['host'] = file_origin['host']
539        retd['description'] = descriptiond
540        retd['enstags'] = enstags
541        retd['obsdata'] = res
542        return retd
543    else:
544        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def read_dobs( fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
547def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
548    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
549
550    Tags are not written or recovered automatically.
551
552    Parameters
553    ----------
554    fname : str
555        Filename of the input file.
556    noemtpy : bool
557        If True, ensembles with no contribution to the Obs are not included.
558        If False, ensembles are included as written in the file.
559    full_output : bool
560        If True, a dict containing auxiliary information and the data is returned.
561        If False, only the data is returned as list.
562    gz : bool
563        If True, assumes that data is gzipped. If False, assumes XML file.
564    separatior_insertion: str, int or bool
565        str: replace all occurences of "separator_insertion" within the replica names
566        by "|%s" % (separator_insertion) when constructing the names of the replica.
567        int: Insert the separator "|" at the position given by separator_insertion.
568        True (default): separator "|" is inserted after len(ensname), assuming that the
569        ensemble name is a prefix to the replica name.
570        None or False: No separator is inserted.
571    """
572
573    if not fname.endswith('.xml') and not fname.endswith('.gz'):
574        fname += '.xml'
575    if gz:
576        if not fname.endswith('.gz'):
577            fname += '.gz'
578        with gzip.open(fname, 'r') as fin:
579            content = fin.read()
580    else:
581        if fname.endswith('.gz'):
582            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
583        with open(fname, 'r') as fin:
584            content = fin.read()
585
586    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
648def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
649    """Generate the string for the export of a list of Obs or structures containing Obs
650    to a .xml.gz file according to the Zeuthen dobs format.
651
652    Tags are not written or recovered automatically. The separator |is removed from the replica names.
653
654    Parameters
655    ----------
656    obsl : list
657        List of Obs that will be exported.
658        The Obs inside a structure do not have to be defined on the same set of configurations,
659        but the storage requirement is increased, if this is not the case.
660    name : str
661        The name of the observable.
662    spec : str
663        Optional string that describes the contents of the file.
664    origin : str
665        Specify where the data has its origin.
666    symbol : list
667        A list of symbols that describe the observables to be written. May be empty.
668    who : str
669        Provide the name of the person that exports the data.
670    enstags : dict
671        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
672        Otherwise, the ensemble name is used.
673    """
674    if enstags is None:
675        enstags = {}
676    od = {}
677    r_names = []
678    for o in obsl:
679        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
680    r_names = sorted(set(r_names))
681    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
682    for tmpname in mc_names:
683        if tmpname not in enstags:
684            enstags[tmpname] = tmpname
685    ne = len(set(mc_names))
686    cov_names = []
687    for o in obsl:
688        cov_names += list(o.cov_names)
689    cov_names = sorted(set(cov_names))
690    nc = len(set(cov_names))
691    od['OBSERVABLES'] = {}
692    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
693    if who is None:
694        who = getpass.getuser()
695    od['OBSERVABLES']['origin'] = {
696        'who': who,
697        'date': str(datetime.datetime.now())[:-7],
698        'host': socket.gethostname(),
699        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
700    od['OBSERVABLES']['dobs'] = {}
701    pd = od['OBSERVABLES']['dobs']
702    pd['spec'] = spec
703    pd['origin'] = origin
704    pd['name'] = name
705    pd['array'] = {}
706    pd['array']['id'] = 'val'
707    pd['array']['layout'] = '1 f%d' % (len(obsl))
708    osymbol = ''
709    if symbol:
710        if not isinstance(symbol, list):
711            raise Exception('Symbol has to be a list!')
712        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
713            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
714        osymbol = symbol[0]
715        for s in symbol[1:]:
716            osymbol += ' %s' % s
717        pd['array']['symbol'] = osymbol
718
719    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
720    pd['ne'] = '%d' % (ne)
721    pd['nc'] = '%d' % (nc)
722    pd['edata'] = []
723    for name in mc_names:
724        ed = {}
725        ed['enstag'] = enstags[name]
726        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
727        nr = len(onames)
728        ed['nr'] = nr
729        ed[''] = []
730
731        for r in range(nr):
732            ad = {}
733            repname = onames[r]
734            ad['id'] = repname.replace('|', '')
735            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
736            Nconf = len(idx)
737            layout = '%d i f%d' % (Nconf, len(obsl))
738            ad['layout'] = layout
739            data = ''
740            counters = [0 for o in obsl]
741            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
742            for ci in idx:
743                data += '%d ' % ci
744                for oi in range(len(obsl)):
745                    o = obsl[oi]
746                    if repname in o.idl:
747                        if counters[oi] < 0:
748                            num = offsets[oi]
749                            if num == 0:
750                                data += '0 '
751                            else:
752                                data += '%1.16e ' % (num)
753                            continue
754                        if o.idl[repname][counters[oi]] == ci:
755                            num = o.deltas[repname][counters[oi]] + offsets[oi]
756                            if num == 0:
757                                data += '0 '
758                            else:
759                                data += '%1.16e ' % (num)
760                            counters[oi] += 1
761                            if counters[oi] >= len(o.idl[repname]):
762                                counters[oi] = -1
763                        else:
764                            num = offsets[oi]
765                            if num == 0:
766                                data += '0 '
767                            else:
768                                data += '%1.16e ' % (num)
769                    else:
770                        data += '0 '
771                data += '\n'
772            ad['#data'] = data
773            ed[''].append(ad)
774        pd['edata'].append(ed)
775
776        allcov = {}
777        for o in obsl:
778            for cname in o.cov_names:
779                if cname in allcov:
780                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
781                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
782                else:
783                    allcov[cname] = o.covobs[cname].cov
784        pd['cdata'] = []
785        for cname in cov_names:
786            cd = {}
787            cd['id'] = cname
788
789            covd = {'id': 'cov'}
790            if allcov[cname].shape == ():
791                ncov = 1
792                covd['layout'] = '1 1 f'
793                covd['#data'] = '%1.14e' % (allcov[cname])
794            else:
795                shape = allcov[cname].shape
796                assert (shape[0] == shape[1])
797                ncov = shape[0]
798                covd['layout'] = '%d %d f' % (ncov, ncov)
799                ds = ''
800                for i in range(ncov):
801                    for j in range(ncov):
802                        val = allcov[cname][i][j]
803                        if val == 0:
804                            ds += '0 '
805                        else:
806                            ds += '%1.14e ' % (val)
807                    ds += '\n'
808                covd['#data'] = ds
809
810            gradd = {'id': 'grad'}
811            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
812            ds = ''
813            for i in range(ncov):
814                for o in obsl:
815                    if cname in o.covobs:
816                        val = o.covobs[cname].grad[i]
817                        if val != 0:
818                            ds += '%1.14e ' % (val)
819                        else:
820                            ds += '0 '
821                    else:
822                        ds += '0 '
823            gradd['#data'] = ds
824            cd['array'] = [covd, gradd]
825            pd['cdata'].append(cd)
826
827    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
828
829    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
832def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
833    """Export a list of Obs or structures containing Obs to a .xml.gz file
834    according to the Zeuthen dobs format.
835
836    Tags are not written or recovered automatically. The separator | is removed from the replica names.
837
838    Parameters
839    ----------
840    obsl : list
841        List of Obs that will be exported.
842        The Obs inside a structure do not have to be defined on the same set of configurations,
843        but the storage requirement is increased, if this is not the case.
844    fname : str
845        Filename of the output file.
846    name : str
847        The name of the observable.
848    spec : str
849        Optional string that describes the contents of the file.
850    origin : str
851        Specify where the data has its origin.
852    symbol : list
853        A list of symbols that describe the observables to be written. May be empty.
854    who : str
855        Provide the name of the person that exports the data.
856    enstags : dict
857        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
858        Otherwise, the ensemble name is used.
859    gz : bool
860        If True, the output is a gzipped XML. If False, the output is a XML file.
861    """
862    if enstags is None:
863        enstags = {}
864
865    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
866
867    if not fname.endswith('.xml') and not fname.endswith('.gz'):
868        fname += '.xml'
869
870    if gz:
871        if not fname.endswith('.gz'):
872            fname += '.gz'
873
874        fp = gzip.open(fname, 'wb')
875        fp.write(dobsstring.encode('utf-8'))
876    else:
877        fp = open(fname, 'w', encoding='utf-8')
878        fp.write(dobsstring)
879    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.