pyerrors.input.json

View Source
  0import json
  1import gzip
  2import getpass
  3import socket
  4import datetime
  5import platform
  6import warnings
  7import re
  8import gc
  9import numpy as np
 10from ..obs import Obs
 11from ..covobs import Covobs
 12from ..correlators import Corr
 13from ..misc import _assert_equal_properties
 14from .. import version as pyerrorsversion
 15
 16
 17def create_json_string(ol, description='', indent=1):
 18    """Generate the string for the export of a list of Obs or structures containing Obs
 19    to a .json(.gz) file
 20
 21    Parameters
 22    ----------
 23    ol : list
 24        List of objects that will be exported. At the moment, these objects can be
 25        either of: Obs, list, numpy.ndarray, Corr.
 26        All Obs inside a structure have to be defined on the same set of configurations.
 27    description : str
 28        Optional string that describes the contents of the json file.
 29    indent : int
 30        Specify the indentation level of the json file. None or 0 is permissible and
 31        saves disk space.
 32    """
 33
 34    def _default(self, obj):
 35        return str(obj)
 36    my_encoder = json.JSONEncoder
 37    _default.default = json.JSONEncoder().default
 38    my_encoder.default = _default
 39
 40    class Deltalist:
 41        __slots__ = ['cnfg', 'deltas']
 42
 43        def __init__(self, li):
 44            self.cnfg = li[0]
 45            self.deltas = li[1:]
 46
 47        def __repr__(self):
 48            s = '[%d' % (self.cnfg)
 49            for d in self.deltas:
 50                s += ', %1.15e' % (d)
 51            s += ']'
 52            return s
 53
 54        def __str__(self):
 55            return self.__repr__()
 56
 57    class Floatlist:
 58        __slots__ = ['li']
 59
 60        def __init__(self, li):
 61            self.li = list(li)
 62
 63        def __repr__(self):
 64            s = '['
 65            for i in range(len(self.li)):
 66                if i > 0:
 67                    s += ', '
 68                s += '%1.15e' % (self.li[i])
 69            s += ']'
 70            return s
 71
 72        def __str__(self):
 73            return self.__repr__()
 74
 75    def _gen_data_d_from_list(ol):
 76        dl = []
 77        for name in ol[0].mc_names:
 78            ed = {}
 79            ed['id'] = name
 80            ed['replica'] = []
 81            for r_name in ol[0].e_content[name]:
 82                rd = {}
 83                rd['name'] = r_name
 84                if ol[0].is_merged.get(r_name, False):
 85                    rd['is_merged'] = True
 86                rd['deltas'] = []
 87                for i in range(len(ol[0].idl[r_name])):
 88                    rd['deltas'].append([ol[0].idl[r_name][i]])
 89                    for o in ol:
 90                        rd['deltas'][-1].append(o.deltas[r_name][i])
 91                    rd['deltas'][-1] = Deltalist(rd['deltas'][-1])
 92                ed['replica'].append(rd)
 93            dl.append(ed)
 94        return dl
 95
 96    def _gen_cdata_d_from_list(ol):
 97        dl = []
 98        for name in ol[0].cov_names:
 99            ed = {}
100            ed['id'] = name
101            ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',')
102            ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov))
103            ncov = ol[0].covobs[name].cov.shape[0]
104            ed['grad'] = []
105            for i in range(ncov):
106                ed['grad'].append([])
107                for o in ol:
108                    ed['grad'][-1].append(o.covobs[name].grad[i][0])
109                ed['grad'][-1] = Floatlist(ed['grad'][-1])
110            dl.append(ed)
111        return dl
112
113    def write_Obs_to_dict(o):
114        d = {}
115        d['type'] = 'Obs'
116        d['layout'] = '1'
117        if o.tag:
118            d['tag'] = [o.tag]
119        if o.reweighted:
120            d['reweighted'] = o.reweighted
121        d['value'] = [o.value]
122        data = _gen_data_d_from_list([o])
123        if len(data) > 0:
124            d['data'] = data
125        cdata = _gen_cdata_d_from_list([o])
126        if len(cdata) > 0:
127            d['cdata'] = cdata
128        return d
129
130    def write_List_to_dict(ol):
131        _assert_equal_properties(ol)
132        d = {}
133        d['type'] = 'List'
134        d['layout'] = '%d' % len(ol)
135        taglist = [o.tag for o in ol]
136        if np.any([tag is not None for tag in taglist]):
137            d['tag'] = taglist
138        if ol[0].reweighted:
139            d['reweighted'] = ol[0].reweighted
140        d['value'] = [o.value for o in ol]
141        data = _gen_data_d_from_list(ol)
142        if len(data) > 0:
143            d['data'] = data
144        cdata = _gen_cdata_d_from_list(ol)
145        if len(cdata) > 0:
146            d['cdata'] = cdata
147        return d
148
149    def write_Array_to_dict(oa):
150        ol = np.ravel(oa)
151        _assert_equal_properties(ol)
152        d = {}
153        d['type'] = 'Array'
154        d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',')
155        taglist = [o.tag for o in ol]
156        if np.any([tag is not None for tag in taglist]):
157            d['tag'] = taglist
158        if ol[0].reweighted:
159            d['reweighted'] = ol[0].reweighted
160        d['value'] = [o.value for o in ol]
161        data = _gen_data_d_from_list(ol)
162        if len(data) > 0:
163            d['data'] = data
164        cdata = _gen_cdata_d_from_list(ol)
165        if len(cdata) > 0:
166            d['cdata'] = cdata
167        return d
168
169    def _nan_Obs_like(obs):
170        samples = []
171        names = []
172        idl = []
173        for key, value in obs.idl.items():
174            samples.append([np.nan] * len(value))
175            names.append(key)
176            idl.append(value)
177        my_obs = Obs(samples, names, idl)
178        my_obs._covobs = obs._covobs
179        for name in obs._covobs:
180            my_obs.names.append(name)
181        my_obs.reweighted = obs.reweighted
182        my_obs.is_merged = obs.is_merged
183        return my_obs
184
185    def write_Corr_to_dict(my_corr):
186        first_not_none = next(i for i, j in enumerate(my_corr.content) if np.all(j))
187        dummy_array = np.empty((my_corr.N, my_corr.N), dtype=object)
188        dummy_array[:] = _nan_Obs_like(my_corr.content[first_not_none].ravel()[0])
189        content = [o if o is not None else dummy_array for o in my_corr.content]
190        dat = write_Array_to_dict(np.array(content, dtype=object))
191        dat['type'] = 'Corr'
192        corr_meta_data = str(my_corr.tag)
193        if 'tag' in dat.keys():
194            dat['tag'].append(corr_meta_data)
195        else:
196            dat['tag'] = [corr_meta_data]
197        taglist = dat['tag']
198        dat['tag'] = {}  # tag is now a dictionary, that contains the previous taglist in the key "tag"
199        dat['tag']['tag'] = taglist
200        if my_corr.prange is not None:
201            dat['tag']['prange'] = my_corr.prange
202        return dat
203
204    if not isinstance(ol, list):
205        ol = [ol]
206
207    d = {}
208    d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__)
209    d['version'] = '1.0'
210    d['who'] = getpass.getuser()
211    d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z')
212    d['host'] = socket.gethostname() + ', ' + platform.platform()
213
214    if description:
215        d['description'] = description
216    d['obsdata'] = []
217    for io in ol:
218        if isinstance(io, Obs):
219            d['obsdata'].append(write_Obs_to_dict(io))
220        elif isinstance(io, list):
221            d['obsdata'].append(write_List_to_dict(io))
222        elif isinstance(io, np.ndarray):
223            d['obsdata'].append(write_Array_to_dict(io))
224        elif isinstance(io, Corr):
225            d['obsdata'].append(write_Corr_to_dict(io))
226        else:
227            raise Exception("Unkown datatype.")
228
229    jsonstring = ''
230    for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d):
231        jsonstring += chunk
232
233    del d
234    gc.collect()
235
236    def remove_quotationmarks_split(split):
237        """Workaround for un-quoting of delta lists, adds 5% of work
238           but is save, compared to a simple replace that could destroy the structure
239        """
240        deltas = False
241        for i in range(len(split)):
242            if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]:
243                deltas = True
244            if deltas:
245                split[i] = split[i].replace('"[', '[').replace(']"', ']')
246                if split[i][-1] == ']':
247                    deltas = False
248        return '\n'.join(split)
249
250    jsonstring = jsonstring.split('\n')
251    jsonstring = remove_quotationmarks_split(jsonstring)
252    jsonstring = jsonstring.replace('nan', 'NaN')
253    return jsonstring
254
255
256def dump_to_json(ol, fname, description='', indent=1, gz=True):
257    """Export a list of Obs or structures containing Obs to a .json(.gz) file
258
259    Parameters
260    ----------
261    ol : list
262        List of objects that will be exported. At the moment, these objects can be
263        either of: Obs, list, numpy.ndarray, Corr.
264        All Obs inside a structure have to be defined on the same set of configurations.
265    fname : str
266        Filename of the output file.
267    description : str
268        Optional string that describes the contents of the json file.
269    indent : int
270        Specify the indentation level of the json file. None or 0 is permissible and
271        saves disk space.
272    gz : bool
273        If True, the output is a gzipped json. If False, the output is a json file.
274    """
275
276    jsonstring = create_json_string(ol, description, indent)
277
278    if not fname.endswith('.json') and not fname.endswith('.gz'):
279        fname += '.json'
280
281    if gz:
282        if not fname.endswith('.gz'):
283            fname += '.gz'
284
285        fp = gzip.open(fname, 'wb')
286        fp.write(jsonstring.encode('utf-8'))
287    else:
288        fp = open(fname, 'w', encoding='utf-8')
289        fp.write(jsonstring)
290    fp.close()
291
292
293def _parse_json_dict(json_dict, verbose=True, full_output=False):
294    """Reconstruct a list of Obs or structures containing Obs from a dict that
295    was built out of a json string.
296
297    The following structures are supported: Obs, list, numpy.ndarray, Corr
298    If the list contains only one element, it is unpacked from the list.
299
300    Parameters
301    ----------
302    json_string : str
303        json string containing the data.
304    verbose : bool
305        Print additional information that was written to the file.
306    full_output : bool
307        If True, a dict containing auxiliary information and the data is returned.
308        If False, only the data is returned.
309    """
310
311    def _gen_obsd_from_datad(d):
312        retd = {}
313        if d:
314            retd['names'] = []
315            retd['idl'] = []
316            retd['deltas'] = []
317            retd['is_merged'] = {}
318            for ens in d:
319                for rep in ens['replica']:
320                    rep_name = rep['name']
321                    if len(rep_name) > len(ens["id"]):
322                        if rep_name[len(ens["id"])] != "|":
323                            tmp_list = list(rep_name)
324                            tmp_list = tmp_list[:len(ens["id"])] + ["|"] + tmp_list[len(ens["id"]):]
325                            rep_name = ''.join(tmp_list)
326                    retd['names'].append(rep_name)
327                    retd['idl'].append([di[0] for di in rep['deltas']])
328                    retd['deltas'].append(np.array([di[1:] for di in rep['deltas']]))
329                    retd['is_merged'][rep_name] = rep.get('is_merged', False)
330        return retd
331
332    def _gen_covobsd_from_cdatad(d):
333        retd = {}
334        for ens in d:
335            retl = []
336            name = ens['id']
337            layouts = ens.get('layout', '1').strip()
338            layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0]
339            cov = np.reshape(ens['cov'], layout)
340            grad = ens['grad']
341            nobs = len(grad[0])
342            for i in range(nobs):
343                retl.append({'name': name, 'cov': cov, 'grad': [g[i] for g in grad]})
344            retd[name] = retl
345        return retd
346
347    def get_Obs_from_dict(o):
348        layouts = o.get('layout', '1').strip()
349        if layouts != '1':
350            raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning)
351
352        values = o['value']
353        od = _gen_obsd_from_datad(o.get('data', {}))
354        cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
355
356        if od:
357            ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
358            ret.is_merged = od['is_merged']
359        else:
360            ret = Obs([], [], means=[])
361            ret._value = values[0]
362        for name in cd:
363            co = cd[name][0]
364            ret._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad'])
365            ret.names.append(co['name'])
366
367        ret.reweighted = o.get('reweighted', False)
368        ret.tag = o.get('tag', [None])[0]
369        return ret
370
371    def get_List_from_dict(o):
372        layouts = o.get('layout', '1').strip()
373        layout = int(layouts)
374        values = o['value']
375        od = _gen_obsd_from_datad(o.get('data', {}))
376        cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
377
378        ret = []
379        taglist = o.get('tag', layout * [None])
380        for i in range(layout):
381            if od:
382                ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
383                ret[-1].is_merged = od['is_merged']
384            else:
385                ret.append(Obs([], [], means=[]))
386                ret[-1]._value = values[i]
387                print('Created Obs with means= ', values[i])
388            for name in cd:
389                co = cd[name][i]
390                ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad'])
391                ret[-1].names.append(co['name'])
392
393            ret[-1].reweighted = o.get('reweighted', False)
394            ret[-1].tag = taglist[i]
395        return ret
396
397    def get_Array_from_dict(o):
398        layouts = o.get('layout', '1').strip()
399        layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0]
400        N = np.prod(layout)
401        values = o['value']
402        od = _gen_obsd_from_datad(o.get('data', {}))
403        cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
404
405        ret = []
406        taglist = o.get('tag', N * [None])
407        for i in range(N):
408            if od:
409                ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
410                ret[-1].is_merged = od['is_merged']
411            else:
412                ret.append(Obs([], [], means=[]))
413                ret[-1]._value = values[i]
414            for name in cd:
415                co = cd[name][i]
416                ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad'])
417                ret[-1].names.append(co['name'])
418            ret[-1].reweighted = o.get('reweighted', False)
419            ret[-1].tag = taglist[i]
420        return np.reshape(ret, layout)
421
422    def get_Corr_from_dict(o):
423        if isinstance(o.get('tag'), list):  # supports the old way
424            taglist = o.get('tag')  # This had to be modified to get the taglist from the dictionary
425            temp_prange = None
426        elif isinstance(o.get('tag'), dict):
427            tagdic = o.get('tag')
428            taglist = tagdic['tag']
429            if 'prange' in tagdic:
430                temp_prange = tagdic['prange']
431            else:
432                temp_prange = None
433        else:
434            raise Exception("The tag is not a list or dict")
435
436        corr_tag = taglist[-1]
437        tmp_o = o
438        tmp_o['tag'] = taglist[:-1]
439        if len(tmp_o['tag']) == 0:
440            del tmp_o['tag']
441        dat = get_Array_from_dict(tmp_o)
442        my_corr = Corr([None if np.isnan(o.ravel()[0].value) else o for o in list(dat)])
443        if corr_tag != 'None':
444            my_corr.tag = corr_tag
445
446        my_corr.prange = temp_prange
447        return my_corr
448
449    prog = json_dict.get('program', '')
450    version = json_dict.get('version', '')
451    who = json_dict.get('who', '')
452    date = json_dict.get('date', '')
453    host = json_dict.get('host', '')
454    if prog and verbose:
455        print('Data has been written using %s.' % (prog))
456    if version and verbose:
457        print('Format version %s' % (version))
458    if np.any([who, date, host] and verbose):
459        print('Written by %s on %s on host %s' % (who, date, host))
460    description = json_dict.get('description', '')
461    if description and verbose:
462        print()
463        print('Description: ', description)
464    obsdata = json_dict['obsdata']
465    ol = []
466    for io in obsdata:
467        if io['type'] == 'Obs':
468            ol.append(get_Obs_from_dict(io))
469        elif io['type'] == 'List':
470            ol.append(get_List_from_dict(io))
471        elif io['type'] == 'Array':
472            ol.append(get_Array_from_dict(io))
473        elif io['type'] == 'Corr':
474            ol.append(get_Corr_from_dict(io))
475        else:
476            raise Exception("Unkown datatype.")
477
478    if full_output:
479        retd = {}
480        retd['program'] = prog
481        retd['version'] = version
482        retd['who'] = who
483        retd['date'] = date
484        retd['host'] = host
485        retd['description'] = description
486        retd['obsdata'] = ol
487
488        return retd
489    else:
490        if len(obsdata) == 1:
491            ol = ol[0]
492
493        return ol
494
495
496def import_json_string(json_string, verbose=True, full_output=False):
497    """Reconstruct a list of Obs or structures containing Obs from a json string.
498
499    The following structures are supported: Obs, list, numpy.ndarray, Corr
500    If the list contains only one element, it is unpacked from the list.
501
502    Parameters
503    ----------
504    json_string : str
505        json string containing the data.
506    verbose : bool
507        Print additional information that was written to the file.
508    full_output : bool
509        If True, a dict containing auxiliary information and the data is returned.
510        If False, only the data is returned.
511    """
512
513    return _parse_json_dict(json.loads(json_string), verbose, full_output)
514
515
516def load_json(fname, verbose=True, gz=True, full_output=False):
517    """Import a list of Obs or structures containing Obs from a .json(.gz) file.
518
519    The following structures are supported: Obs, list, numpy.ndarray, Corr
520    If the list contains only one element, it is unpacked from the list.
521
522    Parameters
523    ----------
524    fname : str
525        Filename of the input file.
526    verbose : bool
527        Print additional information that was written to the file.
528    gz : bool
529        If True, assumes that data is gzipped. If False, assumes JSON file.
530    full_output : bool
531        If True, a dict containing auxiliary information and the data is returned.
532        If False, only the data is returned.
533    """
534    if not fname.endswith('.json') and not fname.endswith('.gz'):
535        fname += '.json'
536    if gz:
537        if not fname.endswith('.gz'):
538            fname += '.gz'
539        with gzip.open(fname, 'r') as fin:
540            d = json.load(fin)
541    else:
542        if fname.endswith('.gz'):
543            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
544        with open(fname, 'r', encoding='utf-8') as fin:
545            d = json.loads(fin.read())
546
547    return _parse_json_dict(d, verbose, full_output)
548
549
550def _ol_from_dict(ind, reps='DICTOBS'):
551    """Convert a dictionary of Obs objects to a list and a dictionary that contains
552    placeholders instead of the Obs objects.
553
554    Parameters
555    ----------
556    ind : dict
557        Dict of JSON valid structures and objects that will be exported.
558        At the moment, these object can be either of: Obs, list, numpy.ndarray, Corr.
559        All Obs inside a structure have to be defined on the same set of configurations.
560    reps : str
561        Specify the structure of the placeholder in exported dict to be reps[0-9]+.
562    """
563
564    obstypes = (Obs, Corr, np.ndarray)
565
566    if not reps.isalnum():
567        raise Exception('Placeholder string has to be alphanumeric!')
568    ol = []
569    counter = 0
570
571    def dict_replace_obs(d):
572        nonlocal ol
573        nonlocal counter
574        x = {}
575        for k, v in d.items():
576            if isinstance(v, dict):
577                v = dict_replace_obs(v)
578            elif isinstance(v, list) and all([isinstance(o, Obs) for o in v]):
579                v = obslist_replace_obs(v)
580            elif isinstance(v, list):
581                v = list_replace_obs(v)
582            elif isinstance(v, obstypes):
583                ol.append(v)
584                v = reps + '%d' % (counter)
585                counter += 1
586            elif isinstance(v, str):
587                if bool(re.match(r'%s[0-9]+' % (reps), v)):
588                    raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (v, reps))
589            x[k] = v
590        return x
591
592    def list_replace_obs(li):
593        nonlocal ol
594        nonlocal counter
595        x = []
596        for e in li:
597            if isinstance(e, list):
598                e = list_replace_obs(e)
599            elif isinstance(e, list) and all([isinstance(o, Obs) for o in e]):
600                e = obslist_replace_obs(e)
601            elif isinstance(e, dict):
602                e = dict_replace_obs(e)
603            elif isinstance(e, obstypes):
604                ol.append(e)
605                e = reps + '%d' % (counter)
606                counter += 1
607            elif isinstance(e, str):
608                if bool(re.match(r'%s[0-9]+' % (reps), e)):
609                    raise Exception('Dict contains string %s that matches the placeholder! %s Cannot be savely exported.' % (e, reps))
610            x.append(e)
611        return x
612
613    def obslist_replace_obs(li):
614        nonlocal ol
615        nonlocal counter
616        il = []
617        for e in li:
618            il.append(e)
619
620        ol.append(il)
621        x = reps + '%d' % (counter)
622        counter += 1
623        return x
624
625    nd = dict_replace_obs(ind)
626
627    return ol, nd
628
629
630def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True):
631    """Export a dict of Obs or structures containing Obs to a .json(.gz) file
632
633    Parameters
634    ----------
635    od : dict
636        Dict of JSON valid structures and objects that will be exported.
637        At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr.
638        All Obs inside a structure have to be defined on the same set of configurations.
639    fname : str
640        Filename of the output file.
641    description : str
642        Optional string that describes the contents of the json file.
643    indent : int
644        Specify the indentation level of the json file. None or 0 is permissible and
645        saves disk space.
646    reps : str
647        Specify the structure of the placeholder in exported dict to be reps[0-9]+.
648    gz : bool
649        If True, the output is a gzipped json. If False, the output is a json file.
650    """
651
652    if not isinstance(od, dict):
653        raise Exception('od has to be a dictionary. Did you want to use dump_to_json?')
654
655    infostring = ('This JSON file contains a python dictionary that has been parsed to a list of structures. '
656                  'OBSDICT contains the dictionary, where Obs or other structures have been replaced by '
657                  '' + reps + '[0-9]+. The field description contains the additional description of this JSON file. '
658                  'This file may be parsed to a dict with the pyerrors routine load_json_dict.')
659
660    desc_dict = {'INFO': infostring, 'OBSDICT': {}, 'description': description}
661    ol, desc_dict['OBSDICT'] = _ol_from_dict(od, reps=reps)
662
663    dump_to_json(ol, fname, description=desc_dict, indent=indent, gz=gz)
664
665
666def _od_from_list_and_dict(ol, ind, reps='DICTOBS'):
667    """Parse a list of Obs or structures containing Obs and an accompanying
668    dict, where the structures have been replaced by placeholders to a
669    dict that contains the structures.
670
671    The following structures are supported: Obs, list, numpy.ndarray, Corr
672
673    Parameters
674    ----------
675    ol : list
676        List of objects -
677        At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr.
678        All Obs inside a structure have to be defined on the same set of configurations.
679    ind : dict
680        Dict that defines the structure of the resulting dict and contains placeholders
681    reps : str
682        Specify the structure of the placeholder in imported dict to be reps[0-9]+.
683    """
684    if not reps.isalnum():
685        raise Exception('Placeholder string has to be alphanumeric!')
686
687    counter = 0
688
689    def dict_replace_string(d):
690        nonlocal counter
691        nonlocal ol
692        x = {}
693        for k, v in d.items():
694            if isinstance(v, dict):
695                v = dict_replace_string(v)
696            elif isinstance(v, list):
697                v = list_replace_string(v)
698            elif isinstance(v, str) and bool(re.match(r'%s[0-9]+' % (reps), v)):
699                index = int(v[len(reps):])
700                v = ol[index]
701                counter += 1
702            x[k] = v
703        return x
704
705    def list_replace_string(li):
706        nonlocal counter
707        nonlocal ol
708        x = []
709        for e in li:
710            if isinstance(e, list):
711                e = list_replace_string(e)
712            elif isinstance(e, dict):
713                e = dict_replace_string(e)
714            elif isinstance(e, str) and bool(re.match(r'%s[0-9]+' % (reps), e)):
715                index = int(e[len(reps):])
716                e = ol[index]
717                counter += 1
718            x.append(e)
719        return x
720
721    nd = dict_replace_string(ind)
722
723    if counter == 0:
724        raise Exception('No placeholder has been replaced! Check if reps is set correctly.')
725
726    return nd
727
728
729def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'):
730    """Import a dict of Obs or structures containing Obs from a .json(.gz) file.
731
732    The following structures are supported: Obs, list, numpy.ndarray, Corr
733
734    Parameters
735    ----------
736    fname : str
737        Filename of the input file.
738    verbose : bool
739        Print additional information that was written to the file.
740    gz : bool
741        If True, assumes that data is gzipped. If False, assumes JSON file.
742    full_output : bool
743        If True, a dict containing auxiliary information and the data is returned.
744        If False, only the data is returned.
745    reps : str
746        Specify the structure of the placeholder in imported dict to be reps[0-9]+.
747    """
748    indata = load_json(fname, verbose=verbose, gz=gz, full_output=True)
749    description = indata['description']['description']
750    indict = indata['description']['OBSDICT']
751    ol = indata['obsdata']
752    od = _od_from_list_and_dict(ol, indict, reps=reps)
753
754    if full_output:
755        indata['description'] = description
756        indata['obsdata'] = od
757        return indata
758    else:
759        return od
#   def create_json_string(ol, description='', indent=1):
View Source
 18def create_json_string(ol, description='', indent=1):
 19    """Generate the string for the export of a list of Obs or structures containing Obs
 20    to a .json(.gz) file
 21
 22    Parameters
 23    ----------
 24    ol : list
 25        List of objects that will be exported. At the moment, these objects can be
 26        either of: Obs, list, numpy.ndarray, Corr.
 27        All Obs inside a structure have to be defined on the same set of configurations.
 28    description : str
 29        Optional string that describes the contents of the json file.
 30    indent : int
 31        Specify the indentation level of the json file. None or 0 is permissible and
 32        saves disk space.
 33    """
 34
 35    def _default(self, obj):
 36        return str(obj)
 37    my_encoder = json.JSONEncoder
 38    _default.default = json.JSONEncoder().default
 39    my_encoder.default = _default
 40
 41    class Deltalist:
 42        __slots__ = ['cnfg', 'deltas']
 43
 44        def __init__(self, li):
 45            self.cnfg = li[0]
 46            self.deltas = li[1:]
 47
 48        def __repr__(self):
 49            s = '[%d' % (self.cnfg)
 50            for d in self.deltas:
 51                s += ', %1.15e' % (d)
 52            s += ']'
 53            return s
 54
 55        def __str__(self):
 56            return self.__repr__()
 57
 58    class Floatlist:
 59        __slots__ = ['li']
 60
 61        def __init__(self, li):
 62            self.li = list(li)
 63
 64        def __repr__(self):
 65            s = '['
 66            for i in range(len(self.li)):
 67                if i > 0:
 68                    s += ', '
 69                s += '%1.15e' % (self.li[i])
 70            s += ']'
 71            return s
 72
 73        def __str__(self):
 74            return self.__repr__()
 75
 76    def _gen_data_d_from_list(ol):
 77        dl = []
 78        for name in ol[0].mc_names:
 79            ed = {}
 80            ed['id'] = name
 81            ed['replica'] = []
 82            for r_name in ol[0].e_content[name]:
 83                rd = {}
 84                rd['name'] = r_name
 85                if ol[0].is_merged.get(r_name, False):
 86                    rd['is_merged'] = True
 87                rd['deltas'] = []
 88                for i in range(len(ol[0].idl[r_name])):
 89                    rd['deltas'].append([ol[0].idl[r_name][i]])
 90                    for o in ol:
 91                        rd['deltas'][-1].append(o.deltas[r_name][i])
 92                    rd['deltas'][-1] = Deltalist(rd['deltas'][-1])
 93                ed['replica'].append(rd)
 94            dl.append(ed)
 95        return dl
 96
 97    def _gen_cdata_d_from_list(ol):
 98        dl = []
 99        for name in ol[0].cov_names:
100            ed = {}
101            ed['id'] = name
102            ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',')
103            ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov))
104            ncov = ol[0].covobs[name].cov.shape[0]
105            ed['grad'] = []
106            for i in range(ncov):
107                ed['grad'].append([])
108                for o in ol:
109                    ed['grad'][-1].append(o.covobs[name].grad[i][0])
110                ed['grad'][-1] = Floatlist(ed['grad'][-1])
111            dl.append(ed)
112        return dl
113
114    def write_Obs_to_dict(o):
115        d = {}
116        d['type'] = 'Obs'
117        d['layout'] = '1'
118        if o.tag:
119            d['tag'] = [o.tag]
120        if o.reweighted:
121            d['reweighted'] = o.reweighted
122        d['value'] = [o.value]
123        data = _gen_data_d_from_list([o])
124        if len(data) > 0:
125            d['data'] = data
126        cdata = _gen_cdata_d_from_list([o])
127        if len(cdata) > 0:
128            d['cdata'] = cdata
129        return d
130
131    def write_List_to_dict(ol):
132        _assert_equal_properties(ol)
133        d = {}
134        d['type'] = 'List'
135        d['layout'] = '%d' % len(ol)
136        taglist = [o.tag for o in ol]
137        if np.any([tag is not None for tag in taglist]):
138            d['tag'] = taglist
139        if ol[0].reweighted:
140            d['reweighted'] = ol[0].reweighted
141        d['value'] = [o.value for o in ol]
142        data = _gen_data_d_from_list(ol)
143        if len(data) > 0:
144            d['data'] = data
145        cdata = _gen_cdata_d_from_list(ol)
146        if len(cdata) > 0:
147            d['cdata'] = cdata
148        return d
149
150    def write_Array_to_dict(oa):
151        ol = np.ravel(oa)
152        _assert_equal_properties(ol)
153        d = {}
154        d['type'] = 'Array'
155        d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',')
156        taglist = [o.tag for o in ol]
157        if np.any([tag is not None for tag in taglist]):
158            d['tag'] = taglist
159        if ol[0].reweighted:
160            d['reweighted'] = ol[0].reweighted
161        d['value'] = [o.value for o in ol]
162        data = _gen_data_d_from_list(ol)
163        if len(data) > 0:
164            d['data'] = data
165        cdata = _gen_cdata_d_from_list(ol)
166        if len(cdata) > 0:
167            d['cdata'] = cdata
168        return d
169
170    def _nan_Obs_like(obs):
171        samples = []
172        names = []
173        idl = []
174        for key, value in obs.idl.items():
175            samples.append([np.nan] * len(value))
176            names.append(key)
177            idl.append(value)
178        my_obs = Obs(samples, names, idl)
179        my_obs._covobs = obs._covobs
180        for name in obs._covobs:
181            my_obs.names.append(name)
182        my_obs.reweighted = obs.reweighted
183        my_obs.is_merged = obs.is_merged
184        return my_obs
185
186    def write_Corr_to_dict(my_corr):
187        first_not_none = next(i for i, j in enumerate(my_corr.content) if np.all(j))
188        dummy_array = np.empty((my_corr.N, my_corr.N), dtype=object)
189        dummy_array[:] = _nan_Obs_like(my_corr.content[first_not_none].ravel()[0])
190        content = [o if o is not None else dummy_array for o in my_corr.content]
191        dat = write_Array_to_dict(np.array(content, dtype=object))
192        dat['type'] = 'Corr'
193        corr_meta_data = str(my_corr.tag)
194        if 'tag' in dat.keys():
195            dat['tag'].append(corr_meta_data)
196        else:
197            dat['tag'] = [corr_meta_data]
198        taglist = dat['tag']
199        dat['tag'] = {}  # tag is now a dictionary, that contains the previous taglist in the key "tag"
200        dat['tag']['tag'] = taglist
201        if my_corr.prange is not None:
202            dat['tag']['prange'] = my_corr.prange
203        return dat
204
205    if not isinstance(ol, list):
206        ol = [ol]
207
208    d = {}
209    d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__)
210    d['version'] = '1.0'
211    d['who'] = getpass.getuser()
212    d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z')
213    d['host'] = socket.gethostname() + ', ' + platform.platform()
214
215    if description:
216        d['description'] = description
217    d['obsdata'] = []
218    for io in ol:
219        if isinstance(io, Obs):
220            d['obsdata'].append(write_Obs_to_dict(io))
221        elif isinstance(io, list):
222            d['obsdata'].append(write_List_to_dict(io))
223        elif isinstance(io, np.ndarray):
224            d['obsdata'].append(write_Array_to_dict(io))
225        elif isinstance(io, Corr):
226            d['obsdata'].append(write_Corr_to_dict(io))
227        else:
228            raise Exception("Unkown datatype.")
229
230    jsonstring = ''
231    for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d):
232        jsonstring += chunk
233
234    del d
235    gc.collect()
236
237    def remove_quotationmarks_split(split):
238        """Workaround for un-quoting of delta lists, adds 5% of work
239           but is save, compared to a simple replace that could destroy the structure
240        """
241        deltas = False
242        for i in range(len(split)):
243            if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]:
244                deltas = True
245            if deltas:
246                split[i] = split[i].replace('"[', '[').replace(']"', ']')
247                if split[i][-1] == ']':
248                    deltas = False
249        return '\n'.join(split)
250
251    jsonstring = jsonstring.split('\n')
252    jsonstring = remove_quotationmarks_split(jsonstring)
253    jsonstring = jsonstring.replace('nan', 'NaN')
254    return jsonstring

Generate the string for the export of a list of Obs or structures containing Obs to a .json(.gz) file

Parameters
  • ol (list): List of objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
  • description (str): Optional string that describes the contents of the json file.
  • indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
#   def dump_to_json(ol, fname, description='', indent=1, gz=True):
View Source
257def dump_to_json(ol, fname, description='', indent=1, gz=True):
258    """Export a list of Obs or structures containing Obs to a .json(.gz) file
259
260    Parameters
261    ----------
262    ol : list
263        List of objects that will be exported. At the moment, these objects can be
264        either of: Obs, list, numpy.ndarray, Corr.
265        All Obs inside a structure have to be defined on the same set of configurations.
266    fname : str
267        Filename of the output file.
268    description : str
269        Optional string that describes the contents of the json file.
270    indent : int
271        Specify the indentation level of the json file. None or 0 is permissible and
272        saves disk space.
273    gz : bool
274        If True, the output is a gzipped json. If False, the output is a json file.
275    """
276
277    jsonstring = create_json_string(ol, description, indent)
278
279    if not fname.endswith('.json') and not fname.endswith('.gz'):
280        fname += '.json'
281
282    if gz:
283        if not fname.endswith('.gz'):
284            fname += '.gz'
285
286        fp = gzip.open(fname, 'wb')
287        fp.write(jsonstring.encode('utf-8'))
288    else:
289        fp = open(fname, 'w', encoding='utf-8')
290        fp.write(jsonstring)
291    fp.close()

Export a list of Obs or structures containing Obs to a .json(.gz) file

Parameters
  • ol (list): List of objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
  • fname (str): Filename of the output file.
  • description (str): Optional string that describes the contents of the json file.
  • indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
  • gz (bool): If True, the output is a gzipped json. If False, the output is a json file.
#   def import_json_string(json_string, verbose=True, full_output=False):
View Source
497def import_json_string(json_string, verbose=True, full_output=False):
498    """Reconstruct a list of Obs or structures containing Obs from a json string.
499
500    The following structures are supported: Obs, list, numpy.ndarray, Corr
501    If the list contains only one element, it is unpacked from the list.
502
503    Parameters
504    ----------
505    json_string : str
506        json string containing the data.
507    verbose : bool
508        Print additional information that was written to the file.
509    full_output : bool
510        If True, a dict containing auxiliary information and the data is returned.
511        If False, only the data is returned.
512    """
513
514    return _parse_json_dict(json.loads(json_string), verbose, full_output)

Reconstruct a list of Obs or structures containing Obs from a json string.

The following structures are supported: Obs, list, numpy.ndarray, Corr If the list contains only one element, it is unpacked from the list.

Parameters
  • json_string (str): json string containing the data.
  • verbose (bool): Print additional information that was written to the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
#   def load_json(fname, verbose=True, gz=True, full_output=False):
View Source
517def load_json(fname, verbose=True, gz=True, full_output=False):
518    """Import a list of Obs or structures containing Obs from a .json(.gz) file.
519
520    The following structures are supported: Obs, list, numpy.ndarray, Corr
521    If the list contains only one element, it is unpacked from the list.
522
523    Parameters
524    ----------
525    fname : str
526        Filename of the input file.
527    verbose : bool
528        Print additional information that was written to the file.
529    gz : bool
530        If True, assumes that data is gzipped. If False, assumes JSON file.
531    full_output : bool
532        If True, a dict containing auxiliary information and the data is returned.
533        If False, only the data is returned.
534    """
535    if not fname.endswith('.json') and not fname.endswith('.gz'):
536        fname += '.json'
537    if gz:
538        if not fname.endswith('.gz'):
539            fname += '.gz'
540        with gzip.open(fname, 'r') as fin:
541            d = json.load(fin)
542    else:
543        if fname.endswith('.gz'):
544            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
545        with open(fname, 'r', encoding='utf-8') as fin:
546            d = json.loads(fin.read())
547
548    return _parse_json_dict(d, verbose, full_output)

Import a list of Obs or structures containing Obs from a .json(.gz) file.

The following structures are supported: Obs, list, numpy.ndarray, Corr If the list contains only one element, it is unpacked from the list.

Parameters
  • fname (str): Filename of the input file.
  • verbose (bool): Print additional information that was written to the file.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
#   def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True):
View Source
631def dump_dict_to_json(od, fname, description='', indent=1, reps='DICTOBS', gz=True):
632    """Export a dict of Obs or structures containing Obs to a .json(.gz) file
633
634    Parameters
635    ----------
636    od : dict
637        Dict of JSON valid structures and objects that will be exported.
638        At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr.
639        All Obs inside a structure have to be defined on the same set of configurations.
640    fname : str
641        Filename of the output file.
642    description : str
643        Optional string that describes the contents of the json file.
644    indent : int
645        Specify the indentation level of the json file. None or 0 is permissible and
646        saves disk space.
647    reps : str
648        Specify the structure of the placeholder in exported dict to be reps[0-9]+.
649    gz : bool
650        If True, the output is a gzipped json. If False, the output is a json file.
651    """
652
653    if not isinstance(od, dict):
654        raise Exception('od has to be a dictionary. Did you want to use dump_to_json?')
655
656    infostring = ('This JSON file contains a python dictionary that has been parsed to a list of structures. '
657                  'OBSDICT contains the dictionary, where Obs or other structures have been replaced by '
658                  '' + reps + '[0-9]+. The field description contains the additional description of this JSON file. '
659                  'This file may be parsed to a dict with the pyerrors routine load_json_dict.')
660
661    desc_dict = {'INFO': infostring, 'OBSDICT': {}, 'description': description}
662    ol, desc_dict['OBSDICT'] = _ol_from_dict(od, reps=reps)
663
664    dump_to_json(ol, fname, description=desc_dict, indent=indent, gz=gz)

Export a dict of Obs or structures containing Obs to a .json(.gz) file

Parameters
  • od (dict): Dict of JSON valid structures and objects that will be exported. At the moment, these objects can be either of: Obs, list, numpy.ndarray, Corr. All Obs inside a structure have to be defined on the same set of configurations.
  • fname (str): Filename of the output file.
  • description (str): Optional string that describes the contents of the json file.
  • indent (int): Specify the indentation level of the json file. None or 0 is permissible and saves disk space.
  • reps (str): Specify the structure of the placeholder in exported dict to be reps[0-9]+.
  • gz (bool): If True, the output is a gzipped json. If False, the output is a json file.
#   def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'):
View Source
730def load_json_dict(fname, verbose=True, gz=True, full_output=False, reps='DICTOBS'):
731    """Import a dict of Obs or structures containing Obs from a .json(.gz) file.
732
733    The following structures are supported: Obs, list, numpy.ndarray, Corr
734
735    Parameters
736    ----------
737    fname : str
738        Filename of the input file.
739    verbose : bool
740        Print additional information that was written to the file.
741    gz : bool
742        If True, assumes that data is gzipped. If False, assumes JSON file.
743    full_output : bool
744        If True, a dict containing auxiliary information and the data is returned.
745        If False, only the data is returned.
746    reps : str
747        Specify the structure of the placeholder in imported dict to be reps[0-9]+.
748    """
749    indata = load_json(fname, verbose=verbose, gz=gz, full_output=True)
750    description = indata['description']['description']
751    indict = indata['description']['OBSDICT']
752    ol = indata['obsdata']
753    od = _od_from_list_and_dict(ol, indict, reps=reps)
754
755    if full_output:
756        indata['description'] = description
757        indata['obsdata'] = od
758        return indata
759    else:
760        return od

Import a dict of Obs or structures containing Obs from a .json(.gz) file.

The following structures are supported: Obs, list, numpy.ndarray, Corr

Parameters
  • fname (str): Filename of the input file.
  • verbose (bool): Print additional information that was written to the file.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned.
  • reps (str): Specify the structure of the placeholder in imported dict to be reps[0-9]+.