pyerrors.input.pandas

  1import warnings
  2import gzip
  3import sqlite3
  4import pandas as pd
  5from ..obs import Obs
  6from ..correlators import Corr
  7from .json import create_json_string, import_json_string
  8
  9
 10def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
 11    """Write DataFrame including Obs or Corr valued columns to sqlite database.
 12
 13    Parameters
 14    ----------
 15    df : pandas.DataFrame
 16        Dataframe to be written to the database.
 17    table_name : str
 18        Name of the table in the database.
 19    db : str
 20        Path to the sqlite database.
 21    if exists : str
 22        How to behave if table already exists. Options 'fail', 'replace', 'append'.
 23    gz : bool
 24        If True the json strings are gzipped.
 25    """
 26    se_df = _serialize_df(df, gz=gz)
 27    con = sqlite3.connect(db)
 28    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
 29    con.close()
 30
 31
 32def read_sql(sql, db, auto_gamma=False, **kwargs):
 33    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
 34
 35    Parameters
 36    ----------
 37    sql : str
 38        SQL query to be executed.
 39    db : str
 40        Path to the sqlite database.
 41    auto_gamma : bool
 42        If True applies the gamma_method to all imported Obs objects with the default parameters for
 43        the error analysis. Default False.
 44    """
 45    con = sqlite3.connect(db)
 46    extract_df = pd.read_sql(sql, con, **kwargs)
 47    con.close()
 48    return _deserialize_df(extract_df, auto_gamma=auto_gamma)
 49
 50
 51def dump_df(df, fname, gz=True):
 52    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
 53
 54    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
 55    json format of pyerrors.
 56
 57    Parameters
 58    ----------
 59    df : pandas.DataFrame
 60        Dataframe to be dumped to a file.
 61    fname : str
 62        Filename of the output file.
 63    gz : bool
 64        If True, the output is a gzipped csv file. If False, the output is a csv file.
 65    """
 66    out = _serialize_df(df, gz=False)
 67
 68    if not fname.endswith('.csv'):
 69        fname += '.csv'
 70
 71    if gz is True:
 72        if not fname.endswith('.gz'):
 73            fname += '.gz'
 74        out.to_csv(fname, index=False, compression='gzip')
 75    else:
 76        out.to_csv(fname, index=False)
 77
 78
 79def load_df(fname, auto_gamma=False, gz=True):
 80    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
 81
 82    Parameters
 83    ----------
 84    fname : str
 85        Filename of the input file.
 86    auto_gamma : bool
 87        If True applies the gamma_method to all imported Obs objects with the default parameters for
 88        the error analysis. Default False.
 89    gz : bool
 90        If True, assumes that data is gzipped. If False, assumes JSON file.
 91    """
 92    if not fname.endswith('.csv') and not fname.endswith('.gz'):
 93        fname += '.csv'
 94
 95    if gz is True:
 96        if not fname.endswith('.gz'):
 97            fname += '.gz'
 98        with gzip.open(fname) as f:
 99            re_import = pd.read_csv(f)
100    else:
101        if fname.endswith('.gz'):
102            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
103        re_import = pd.read_csv(fname)
104
105    return _deserialize_df(re_import, auto_gamma=auto_gamma)
106
107
108def _serialize_df(df, gz=False):
109    """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification.
110
111    Parameters
112    ----------
113    df : pandas.DataFrame
114        DataFrame to be serilized.
115    gz: bool
116        gzip the json string representation. Default False.
117    """
118    out = df.copy()
119    for column in out:
120        if isinstance(out[column][0], (Obs, Corr)):
121            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
122            if gz is True:
123                out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')))
124    return out
125
126
127def _deserialize_df(df, auto_gamma=False):
128    """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification.
129
130    Parameters
131    ----------
132    df : pandas.DataFrame
133        DataFrame to be deserilized.
134    auto_gamma : bool
135        If True applies the gamma_method to all imported Obs objects with the default parameters for
136        the error analysis. Default False.
137
138    Notes:
139    ------
140    In case any column of the DataFrame is gzipped it is gunzipped in the process.
141    """
142    for column in df.select_dtypes(include="object"):
143        if isinstance(df[column][0], bytes):
144            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
145                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
146        if isinstance(df[column][0], str):
147            if '"program":' in df[column][0][:20]:
148                df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False))
149                if auto_gamma is True:
150                    df[column].apply(lambda x: x.gamma_method())
151    return df
def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
12    """Write DataFrame including Obs or Corr valued columns to sqlite database.
13
14    Parameters
15    ----------
16    df : pandas.DataFrame
17        Dataframe to be written to the database.
18    table_name : str
19        Name of the table in the database.
20    db : str
21        Path to the sqlite database.
22    if exists : str
23        How to behave if table already exists. Options 'fail', 'replace', 'append'.
24    gz : bool
25        If True the json strings are gzipped.
26    """
27    se_df = _serialize_df(df, gz=gz)
28    con = sqlite3.connect(db)
29    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
30    con.close()

Write DataFrame including Obs or Corr valued columns to sqlite database.

Parameters
  • df (pandas.DataFrame): Dataframe to be written to the database.
  • table_name (str): Name of the table in the database.
  • db (str): Path to the sqlite database.
  • if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
  • gz (bool): If True the json strings are gzipped.
def read_sql(sql, db, auto_gamma=False, **kwargs):
33def read_sql(sql, db, auto_gamma=False, **kwargs):
34    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
35
36    Parameters
37    ----------
38    sql : str
39        SQL query to be executed.
40    db : str
41        Path to the sqlite database.
42    auto_gamma : bool
43        If True applies the gamma_method to all imported Obs objects with the default parameters for
44        the error analysis. Default False.
45    """
46    con = sqlite3.connect(db)
47    extract_df = pd.read_sql(sql, con, **kwargs)
48    con.close()
49    return _deserialize_df(extract_df, auto_gamma=auto_gamma)

Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.

Parameters
  • sql (str): SQL query to be executed.
  • db (str): Path to the sqlite database.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
def dump_df(df, fname, gz=True):
52def dump_df(df, fname, gz=True):
53    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
54
55    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
56    json format of pyerrors.
57
58    Parameters
59    ----------
60    df : pandas.DataFrame
61        Dataframe to be dumped to a file.
62    fname : str
63        Filename of the output file.
64    gz : bool
65        If True, the output is a gzipped csv file. If False, the output is a csv file.
66    """
67    out = _serialize_df(df, gz=False)
68
69    if not fname.endswith('.csv'):
70        fname += '.csv'
71
72    if gz is True:
73        if not fname.endswith('.gz'):
74            fname += '.gz'
75        out.to_csv(fname, index=False, compression='gzip')
76    else:
77        out.to_csv(fname, index=False)

Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.

Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.

Parameters
  • df (pandas.DataFrame): Dataframe to be dumped to a file.
  • fname (str): Filename of the output file.
  • gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
def load_df(fname, auto_gamma=False, gz=True):
 80def load_df(fname, auto_gamma=False, gz=True):
 81    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
 82
 83    Parameters
 84    ----------
 85    fname : str
 86        Filename of the input file.
 87    auto_gamma : bool
 88        If True applies the gamma_method to all imported Obs objects with the default parameters for
 89        the error analysis. Default False.
 90    gz : bool
 91        If True, assumes that data is gzipped. If False, assumes JSON file.
 92    """
 93    if not fname.endswith('.csv') and not fname.endswith('.gz'):
 94        fname += '.csv'
 95
 96    if gz is True:
 97        if not fname.endswith('.gz'):
 98            fname += '.gz'
 99        with gzip.open(fname) as f:
100            re_import = pd.read_csv(f)
101    else:
102        if fname.endswith('.gz'):
103            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
104        re_import = pd.read_csv(fname)
105
106    return _deserialize_df(re_import, auto_gamma=auto_gamma)

Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.

Parameters
  • fname (str): Filename of the input file.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.