diff --git a/docs/pyerrors/input/json.html b/docs/pyerrors/input/json.html index 6e576802..72e84839 100644 --- a/docs/pyerrors/input/json.html +++ b/docs/pyerrors/input/json.html @@ -92,6 +92,7 @@ import platform import warnings import re +import gc import numpy as np from ..obs import Obs from ..covobs import Covobs @@ -124,6 +125,8 @@ my_encoder.default = _default class Deltalist: + __slots__ = ['cnfg', 'deltas'] + def __init__(self, li): self.cnfg = li[0] self.deltas = li[1:] @@ -139,6 +142,8 @@ return self.__repr__() class Floatlist: + __slots__ = ['li'] + def __init__(self, li): self.li = list(li) @@ -308,14 +313,18 @@ else: raise Exception("Unkown datatype.") - jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False) + jsonstring = '' + for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d): + jsonstring += chunk - def remove_quotationmarks(s): + del d + gc.collect() + + def remove_quotationmarks_split(split): """Workaround for un-quoting of delta lists, adds 5% of work but is save, compared to a simple replace that could destroy the structure """ deltas = False - split = s.split('\n') for i in range(len(split)): if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]: deltas = True @@ -325,7 +334,8 @@ deltas = False return '\n'.join(split) - jsonstring = remove_quotationmarks(jsonstring) + jsonstring = jsonstring.split('\n') + jsonstring = remove_quotationmarks_split(jsonstring) jsonstring = jsonstring.replace('nan', 'NaN') return jsonstring @@ -367,8 +377,9 @@ fp.close() -def import_json_string(json_string, verbose=True, full_output=False): - """Reconstruct a list of Obs or structures containing Obs from a json string. +def _parse_json_dict(json_dict, verbose=True, full_output=False): + """Reconstruct a list of Obs or structures containing Obs from a dict that + was built out of a json string. The following structures are supported: Obs, list, numpy.ndarray, Corr If the list contains only one element, it is unpacked from the list. @@ -522,8 +533,6 @@ my_corr.prange = temp_prange return my_corr - json_dict = json.loads(json_string) - prog = json_dict.get('program', '') version = json_dict.get('version', '') who = json_dict.get('who', '') @@ -571,6 +580,26 @@ return ol +def import_json_string(json_string, verbose=True, full_output=False): + """Reconstruct a list of Obs or structures containing Obs from a json string. + + The following structures are supported: Obs, list, numpy.ndarray, Corr + If the list contains only one element, it is unpacked from the list. + + Parameters + ---------- + json_string : str + json string containing the data. + verbose : bool + Print additional information that was written to the file. + full_output : bool + If True, a dict containing auxiliary information and the data is returned. + If False, only the data is returned. + """ + + return _parse_json_dict(json.loads(json_string), verbose, full_output) + + def load_json(fname, verbose=True, gz=True, full_output=False): """Import a list of Obs or structures containing Obs from a .json(.gz) file. @@ -595,14 +624,14 @@ if not fname.endswith('.gz'): fname += '.gz' with gzip.open(fname, 'r') as fin: - d = fin.read().decode('utf-8') + d = json.load(fin) else: if fname.endswith('.gz'): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) with open(fname, 'r', encoding='utf-8') as fin: - d = fin.read() + d = json.loads(fin.read()) - return import_json_string(d, verbose, full_output) + return _parse_json_dict(d, verbose, full_output) def _ol_from_dict(ind, reps='DICTOBS'): @@ -854,6 +883,8 @@ my_encoder.default = _default class Deltalist: + __slots__ = ['cnfg', 'deltas'] + def __init__(self, li): self.cnfg = li[0] self.deltas = li[1:] @@ -869,6 +900,8 @@ return self.__repr__() class Floatlist: + __slots__ = ['li'] + def __init__(self, li): self.li = list(li) @@ -1038,14 +1071,18 @@ else: raise Exception("Unkown datatype.") - jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False) + jsonstring = '' + for chunk in my_encoder(indent=indent, ensure_ascii=False).iterencode(d): + jsonstring += chunk - def remove_quotationmarks(s): + del d + gc.collect() + + def remove_quotationmarks_split(split): """Workaround for un-quoting of delta lists, adds 5% of work but is save, compared to a simple replace that could destroy the structure """ deltas = False - split = s.split('\n') for i in range(len(split)): if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]: deltas = True @@ -1055,7 +1092,8 @@ deltas = False return '\n'.join(split) - jsonstring = remove_quotationmarks(jsonstring) + jsonstring = jsonstring.split('\n') + jsonstring = remove_quotationmarks_split(jsonstring) jsonstring = jsonstring.replace('nan', 'NaN') return jsonstring @@ -1181,191 +1219,7 @@ If True, the output is a gzipped json. If False, the output is a json file. If False, only the data is returned. """ - def _gen_obsd_from_datad(d): - retd = {} - if d: - retd['names'] = [] - retd['idl'] = [] - retd['deltas'] = [] - retd['is_merged'] = {} - for ens in d: - for rep in ens['replica']: - rep_name = rep['name'] - if len(rep_name) > len(ens["id"]): - if rep_name[len(ens["id"])] != "|": - tmp_list = list(rep_name) - tmp_list = tmp_list[:len(ens["id"])] + ["|"] + tmp_list[len(ens["id"]):] - rep_name = ''.join(tmp_list) - retd['names'].append(rep_name) - retd['idl'].append([di[0] for di in rep['deltas']]) - retd['deltas'].append(np.array([di[1:] for di in rep['deltas']])) - retd['is_merged'][rep_name] = rep.get('is_merged', False) - return retd - - def _gen_covobsd_from_cdatad(d): - retd = {} - for ens in d: - retl = [] - name = ens['id'] - layouts = ens.get('layout', '1').strip() - layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] - cov = np.reshape(ens['cov'], layout) - grad = ens['grad'] - nobs = len(grad[0]) - for i in range(nobs): - retl.append({'name': name, 'cov': cov, 'grad': [g[i] for g in grad]}) - retd[name] = retl - return retd - - def get_Obs_from_dict(o): - layouts = o.get('layout', '1').strip() - if layouts != '1': - raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning) - - values = o['value'] - od = _gen_obsd_from_datad(o.get('data', {})) - cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) - - if od: - ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) - ret.is_merged = od['is_merged'] - else: - ret = Obs([], []) - ret._value = values[0] - for name in cd: - co = cd[name][0] - ret._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) - ret.names.append(co['name']) - - ret.reweighted = o.get('reweighted', False) - ret.tag = o.get('tag', [None])[0] - return ret - - def get_List_from_dict(o): - layouts = o.get('layout', '1').strip() - layout = int(layouts) - values = o['value'] - od = _gen_obsd_from_datad(o.get('data', {})) - cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) - - ret = [] - taglist = o.get('tag', layout * [None]) - for i in range(layout): - if od: - ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) - ret[-1].is_merged = od['is_merged'] - else: - ret.append(Obs([], [])) - ret[-1]._value = values[i] - print('Created Obs with means= ', values[i]) - for name in cd: - co = cd[name][i] - ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) - ret[-1].names.append(co['name']) - - ret[-1].reweighted = o.get('reweighted', False) - ret[-1].tag = taglist[i] - return ret - - def get_Array_from_dict(o): - layouts = o.get('layout', '1').strip() - layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] - N = np.prod(layout) - values = o['value'] - od = _gen_obsd_from_datad(o.get('data', {})) - cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) - - ret = [] - taglist = o.get('tag', N * [None]) - for i in range(N): - if od: - ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) - ret[-1].is_merged = od['is_merged'] - else: - ret.append(Obs([], [])) - ret[-1]._value = values[i] - for name in cd: - co = cd[name][i] - ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) - ret[-1].names.append(co['name']) - ret[-1].reweighted = o.get('reweighted', False) - ret[-1].tag = taglist[i] - return np.reshape(ret, layout) - - def get_Corr_from_dict(o): - if isinstance(o.get('tag'), list): # supports the old way - taglist = o.get('tag') # This had to be modified to get the taglist from the dictionary - temp_prange = None - elif isinstance(o.get('tag'), dict): - tagdic = o.get('tag') - taglist = tagdic['tag'] - if 'prange' in tagdic: - temp_prange = tagdic['prange'] - else: - temp_prange = None - else: - raise Exception("The tag is not a list or dict") - - corr_tag = taglist[-1] - tmp_o = o - tmp_o['tag'] = taglist[:-1] - if len(tmp_o['tag']) == 0: - del tmp_o['tag'] - dat = get_Array_from_dict(tmp_o) - my_corr = Corr([None if np.isnan(o.ravel()[0].value) else o for o in list(dat)]) - if corr_tag != 'None': - my_corr.tag = corr_tag - - my_corr.prange = temp_prange - return my_corr - - json_dict = json.loads(json_string) - - prog = json_dict.get('program', '') - version = json_dict.get('version', '') - who = json_dict.get('who', '') - date = json_dict.get('date', '') - host = json_dict.get('host', '') - if prog and verbose: - print('Data has been written using %s.' % (prog)) - if version and verbose: - print('Format version %s' % (version)) - if np.any([who, date, host] and verbose): - print('Written by %s on %s on host %s' % (who, date, host)) - description = json_dict.get('description', '') - if description and verbose: - print() - print('Description: ', description) - obsdata = json_dict['obsdata'] - ol = [] - for io in obsdata: - if io['type'] == 'Obs': - ol.append(get_Obs_from_dict(io)) - elif io['type'] == 'List': - ol.append(get_List_from_dict(io)) - elif io['type'] == 'Array': - ol.append(get_Array_from_dict(io)) - elif io['type'] == 'Corr': - ol.append(get_Corr_from_dict(io)) - else: - raise Exception("Unkown datatype.") - - if full_output: - retd = {} - retd['program'] = prog - retd['version'] = version - retd['who'] = who - retd['date'] = date - retd['host'] = host - retd['description'] = description - retd['obsdata'] = ol - - return retd - else: - if len(obsdata) == 1: - ol = ol[0] - - return ol + return _parse_json_dict(json.loads(json_string), verbose, full_output) @@ -1424,14 +1278,14 @@ If False, only the data is returned. if not fname.endswith('.gz'): fname += '.gz' with gzip.open(fname, 'r') as fin: - d = fin.read().decode('utf-8') + d = json.load(fin) else: if fname.endswith('.gz'): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) with open(fname, 'r', encoding='utf-8') as fin: - d = fin.read() + d = json.loads(fin.read()) - return import_json_string(d, verbose, full_output) + return _parse_json_dict(d, verbose, full_output)