pyerrors.input.pandas

  1import warnings
  2import gzip
  3import sqlite3
  4import pandas as pd
  5from ..obs import Obs
  6from ..correlators import Corr
  7from .json import create_json_string, import_json_string
  8
  9
 10def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
 11    """Write DataFrame including Obs or Corr valued columns to sqlite database.
 12
 13    Parameters
 14    ----------
 15    df : pandas.DataFrame
 16        Dataframe to be written to the database.
 17    table_name : str
 18        Name of the table in the database.
 19    db : str
 20        Path to the sqlite database.
 21    if exists : str
 22        How to behave if table already exists. Options 'fail', 'replace', 'append'.
 23    gz : bool
 24        If True the json strings are gzipped.
 25
 26    Returns
 27    -------
 28    None
 29    """
 30    se_df = _serialize_df(df, gz=gz)
 31    con = sqlite3.connect(db)
 32    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
 33    con.close()
 34
 35
 36def read_sql(sql, db, auto_gamma=False, **kwargs):
 37    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
 38
 39    Parameters
 40    ----------
 41    sql : str
 42        SQL query to be executed.
 43    db : str
 44        Path to the sqlite database.
 45    auto_gamma : bool
 46        If True applies the gamma_method to all imported Obs objects with the default parameters for
 47        the error analysis. Default False.
 48
 49    Returns
 50    -------
 51    data : pandas.DataFrame
 52        Dataframe with the content of the sqlite database.
 53    """
 54    con = sqlite3.connect(db)
 55    extract_df = pd.read_sql(sql, con, **kwargs)
 56    con.close()
 57    return _deserialize_df(extract_df, auto_gamma=auto_gamma)
 58
 59
 60def dump_df(df, fname, gz=True):
 61    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
 62
 63    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
 64    json format of pyerrors.
 65
 66    Parameters
 67    ----------
 68    df : pandas.DataFrame
 69        Dataframe to be dumped to a file.
 70    fname : str
 71        Filename of the output file.
 72    gz : bool
 73        If True, the output is a gzipped csv file. If False, the output is a csv file.
 74
 75    Returns
 76    -------
 77    None
 78    """
 79    out = _serialize_df(df, gz=False)
 80
 81    if not fname.endswith('.csv'):
 82        fname += '.csv'
 83
 84    if gz is True:
 85        if not fname.endswith('.gz'):
 86            fname += '.gz'
 87        out.to_csv(fname, index=False, compression='gzip')
 88    else:
 89        out.to_csv(fname, index=False)
 90
 91
 92def load_df(fname, auto_gamma=False, gz=True):
 93    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
 94
 95    Parameters
 96    ----------
 97    fname : str
 98        Filename of the input file.
 99    auto_gamma : bool
100        If True applies the gamma_method to all imported Obs objects with the default parameters for
101        the error analysis. Default False.
102    gz : bool
103        If True, assumes that data is gzipped. If False, assumes JSON file.
104
105    Returns
106    -------
107    data : pandas.DataFrame
108        Dataframe with the content of the sqlite database.
109    """
110    if not fname.endswith('.csv') and not fname.endswith('.gz'):
111        fname += '.csv'
112
113    if gz is True:
114        if not fname.endswith('.gz'):
115            fname += '.gz'
116        with gzip.open(fname) as f:
117            re_import = pd.read_csv(f)
118    else:
119        if fname.endswith('.gz'):
120            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
121        re_import = pd.read_csv(fname)
122
123    return _deserialize_df(re_import, auto_gamma=auto_gamma)
124
125
126def _serialize_df(df, gz=False):
127    """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification.
128
129    Parameters
130    ----------
131    df : pandas.DataFrame
132        DataFrame to be serilized.
133    gz: bool
134        gzip the json string representation. Default False.
135    """
136    out = df.copy()
137    for column in out:
138        if isinstance(out[column][0], (Obs, Corr)):
139            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
140            if gz is True:
141                out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')))
142    return out
143
144
145def _deserialize_df(df, auto_gamma=False):
146    """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification.
147
148    Parameters
149    ----------
150    df : pandas.DataFrame
151        DataFrame to be deserilized.
152    auto_gamma : bool
153        If True applies the gamma_method to all imported Obs objects with the default parameters for
154        the error analysis. Default False.
155
156    Notes:
157    ------
158    In case any column of the DataFrame is gzipped it is gunzipped in the process.
159    """
160    for column in df.select_dtypes(include="object"):
161        if isinstance(df[column][0], bytes):
162            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
163                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
164        if isinstance(df[column][0], str):
165            if '"program":' in df[column][0][:20]:
166                df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False))
167                if auto_gamma is True:
168                    df[column].apply(lambda x: x.gamma_method())
169    return df
def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
12    """Write DataFrame including Obs or Corr valued columns to sqlite database.
13
14    Parameters
15    ----------
16    df : pandas.DataFrame
17        Dataframe to be written to the database.
18    table_name : str
19        Name of the table in the database.
20    db : str
21        Path to the sqlite database.
22    if exists : str
23        How to behave if table already exists. Options 'fail', 'replace', 'append'.
24    gz : bool
25        If True the json strings are gzipped.
26
27    Returns
28    -------
29    None
30    """
31    se_df = _serialize_df(df, gz=gz)
32    con = sqlite3.connect(db)
33    se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs)
34    con.close()

Write DataFrame including Obs or Corr valued columns to sqlite database.

Parameters
  • df (pandas.DataFrame): Dataframe to be written to the database.
  • table_name (str): Name of the table in the database.
  • db (str): Path to the sqlite database.
  • if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
  • gz (bool): If True the json strings are gzipped.
Returns
  • None
def read_sql(sql, db, auto_gamma=False, **kwargs):
37def read_sql(sql, db, auto_gamma=False, **kwargs):
38    """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
39
40    Parameters
41    ----------
42    sql : str
43        SQL query to be executed.
44    db : str
45        Path to the sqlite database.
46    auto_gamma : bool
47        If True applies the gamma_method to all imported Obs objects with the default parameters for
48        the error analysis. Default False.
49
50    Returns
51    -------
52    data : pandas.DataFrame
53        Dataframe with the content of the sqlite database.
54    """
55    con = sqlite3.connect(db)
56    extract_df = pd.read_sql(sql, con, **kwargs)
57    con.close()
58    return _deserialize_df(extract_df, auto_gamma=auto_gamma)

Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.

Parameters
  • sql (str): SQL query to be executed.
  • db (str): Path to the sqlite database.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
Returns
  • data (pandas.DataFrame): Dataframe with the content of the sqlite database.
def dump_df(df, fname, gz=True):
61def dump_df(df, fname, gz=True):
62    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
63
64    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
65    json format of pyerrors.
66
67    Parameters
68    ----------
69    df : pandas.DataFrame
70        Dataframe to be dumped to a file.
71    fname : str
72        Filename of the output file.
73    gz : bool
74        If True, the output is a gzipped csv file. If False, the output is a csv file.
75
76    Returns
77    -------
78    None
79    """
80    out = _serialize_df(df, gz=False)
81
82    if not fname.endswith('.csv'):
83        fname += '.csv'
84
85    if gz is True:
86        if not fname.endswith('.gz'):
87            fname += '.gz'
88        out.to_csv(fname, index=False, compression='gzip')
89    else:
90        out.to_csv(fname, index=False)

Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.

Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.

Parameters
  • df (pandas.DataFrame): Dataframe to be dumped to a file.
  • fname (str): Filename of the output file.
  • gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
Returns
  • None
def load_df(fname, auto_gamma=False, gz=True):
 93def load_df(fname, auto_gamma=False, gz=True):
 94    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
 95
 96    Parameters
 97    ----------
 98    fname : str
 99        Filename of the input file.
100    auto_gamma : bool
101        If True applies the gamma_method to all imported Obs objects with the default parameters for
102        the error analysis. Default False.
103    gz : bool
104        If True, assumes that data is gzipped. If False, assumes JSON file.
105
106    Returns
107    -------
108    data : pandas.DataFrame
109        Dataframe with the content of the sqlite database.
110    """
111    if not fname.endswith('.csv') and not fname.endswith('.gz'):
112        fname += '.csv'
113
114    if gz is True:
115        if not fname.endswith('.gz'):
116            fname += '.gz'
117        with gzip.open(fname) as f:
118            re_import = pd.read_csv(f)
119    else:
120        if fname.endswith('.gz'):
121            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
122        re_import = pd.read_csv(fname)
123
124    return _deserialize_df(re_import, auto_gamma=auto_gamma)

Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.

Parameters
  • fname (str): Filename of the input file.
  • auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
Returns
  • data (pandas.DataFrame): Dataframe with the content of the sqlite database.