pyerrors.input.pandas
1import warnings 2import gzip 3import sqlite3 4import pandas as pd 5from ..obs import Obs 6from ..correlators import Corr 7from .json import create_json_string, import_json_string 8 9 10def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 11 """Write DataFrame including Obs or Corr valued columns to sqlite database. 12 13 Parameters 14 ---------- 15 df : pandas.DataFrame 16 Dataframe to be written to the database. 17 table_name : str 18 Name of the table in the database. 19 db : str 20 Path to the sqlite database. 21 if exists : str 22 How to behave if table already exists. Options 'fail', 'replace', 'append'. 23 gz : bool 24 If True the json strings are gzipped. 25 """ 26 se_df = _serialize_df(df, gz=gz) 27 con = sqlite3.connect(db) 28 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 29 con.close() 30 31 32def read_sql(sql, db, auto_gamma=False, **kwargs): 33 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 34 35 Parameters 36 ---------- 37 sql : str 38 SQL query to be executed. 39 db : str 40 Path to the sqlite database. 41 auto_gamma : bool 42 If True applies the gamma_method to all imported Obs objects with the default parameters for 43 the error analysis. Default False. 44 """ 45 con = sqlite3.connect(db) 46 extract_df = pd.read_sql(sql, con, **kwargs) 47 con.close() 48 return _deserialize_df(extract_df, auto_gamma=auto_gamma) 49 50 51def dump_df(df, fname, gz=True): 52 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 53 54 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 55 json format of pyerrors. 56 57 Parameters 58 ---------- 59 df : pandas.DataFrame 60 Dataframe to be dumped to a file. 61 fname : str 62 Filename of the output file. 63 gz : bool 64 If True, the output is a gzipped csv file. If False, the output is a csv file. 65 """ 66 out = _serialize_df(df, gz=False) 67 68 if not fname.endswith('.csv'): 69 fname += '.csv' 70 71 if gz is True: 72 if not fname.endswith('.gz'): 73 fname += '.gz' 74 out.to_csv(fname, index=False, compression='gzip') 75 else: 76 out.to_csv(fname, index=False) 77 78 79def load_df(fname, auto_gamma=False, gz=True): 80 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 81 82 Parameters 83 ---------- 84 fname : str 85 Filename of the input file. 86 auto_gamma : bool 87 If True applies the gamma_method to all imported Obs objects with the default parameters for 88 the error analysis. Default False. 89 gz : bool 90 If True, assumes that data is gzipped. If False, assumes JSON file. 91 """ 92 if not fname.endswith('.csv') and not fname.endswith('.gz'): 93 fname += '.csv' 94 95 if gz is True: 96 if not fname.endswith('.gz'): 97 fname += '.gz' 98 with gzip.open(fname) as f: 99 re_import = pd.read_csv(f) 100 else: 101 if fname.endswith('.gz'): 102 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 103 re_import = pd.read_csv(fname) 104 105 return _deserialize_df(re_import, auto_gamma=auto_gamma) 106 107 108def _serialize_df(df, gz=False): 109 """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. 110 111 Parameters 112 ---------- 113 df : pandas.DataFrame 114 DataFrame to be serilized. 115 gz: bool 116 gzip the json string representation. Default False. 117 """ 118 out = df.copy() 119 for column in out: 120 if isinstance(out[column][0], (Obs, Corr)): 121 out[column] = out[column].transform(lambda x: create_json_string(x, indent=0)) 122 if gz is True: 123 out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8'))) 124 return out 125 126 127def _deserialize_df(df, auto_gamma=False): 128 """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. 129 130 Parameters 131 ---------- 132 df : pandas.DataFrame 133 DataFrame to be deserilized. 134 auto_gamma : bool 135 If True applies the gamma_method to all imported Obs objects with the default parameters for 136 the error analysis. Default False. 137 138 Notes: 139 ------ 140 In case any column of the DataFrame is gzipped it is gunzipped in the process. 141 """ 142 for column in df.select_dtypes(include="object"): 143 if isinstance(df[column][0], bytes): 144 if df[column][0].startswith(b"\x1f\x8b\x08\x00"): 145 df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) 146 if isinstance(df[column][0], str): 147 if '"program":' in df[column][0][:20]: 148 df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False)) 149 if auto_gamma is True: 150 df[column].apply(lambda x: x.gamma_method()) 151 return df
def
to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 12 """Write DataFrame including Obs or Corr valued columns to sqlite database. 13 14 Parameters 15 ---------- 16 df : pandas.DataFrame 17 Dataframe to be written to the database. 18 table_name : str 19 Name of the table in the database. 20 db : str 21 Path to the sqlite database. 22 if exists : str 23 How to behave if table already exists. Options 'fail', 'replace', 'append'. 24 gz : bool 25 If True the json strings are gzipped. 26 """ 27 se_df = _serialize_df(df, gz=gz) 28 con = sqlite3.connect(db) 29 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 30 con.close()
Write DataFrame including Obs or Corr valued columns to sqlite database.
Parameters
- df (pandas.DataFrame): Dataframe to be written to the database.
- table_name (str): Name of the table in the database.
- db (str): Path to the sqlite database.
- if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
- gz (bool): If True the json strings are gzipped.
def
read_sql(sql, db, auto_gamma=False, **kwargs):
33def read_sql(sql, db, auto_gamma=False, **kwargs): 34 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 35 36 Parameters 37 ---------- 38 sql : str 39 SQL query to be executed. 40 db : str 41 Path to the sqlite database. 42 auto_gamma : bool 43 If True applies the gamma_method to all imported Obs objects with the default parameters for 44 the error analysis. Default False. 45 """ 46 con = sqlite3.connect(db) 47 extract_df = pd.read_sql(sql, con, **kwargs) 48 con.close() 49 return _deserialize_df(extract_df, auto_gamma=auto_gamma)
Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
Parameters
- sql (str): SQL query to be executed.
- db (str): Path to the sqlite database.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
def
dump_df(df, fname, gz=True):
52def dump_df(df, fname, gz=True): 53 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 54 55 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 56 json format of pyerrors. 57 58 Parameters 59 ---------- 60 df : pandas.DataFrame 61 Dataframe to be dumped to a file. 62 fname : str 63 Filename of the output file. 64 gz : bool 65 If True, the output is a gzipped csv file. If False, the output is a csv file. 66 """ 67 out = _serialize_df(df, gz=False) 68 69 if not fname.endswith('.csv'): 70 fname += '.csv' 71 72 if gz is True: 73 if not fname.endswith('.gz'): 74 fname += '.gz' 75 out.to_csv(fname, index=False, compression='gzip') 76 else: 77 out.to_csv(fname, index=False)
Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.
Parameters
- df (pandas.DataFrame): Dataframe to be dumped to a file.
- fname (str): Filename of the output file.
- gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
def
load_df(fname, auto_gamma=False, gz=True):
80def load_df(fname, auto_gamma=False, gz=True): 81 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 82 83 Parameters 84 ---------- 85 fname : str 86 Filename of the input file. 87 auto_gamma : bool 88 If True applies the gamma_method to all imported Obs objects with the default parameters for 89 the error analysis. Default False. 90 gz : bool 91 If True, assumes that data is gzipped. If False, assumes JSON file. 92 """ 93 if not fname.endswith('.csv') and not fname.endswith('.gz'): 94 fname += '.csv' 95 96 if gz is True: 97 if not fname.endswith('.gz'): 98 fname += '.gz' 99 with gzip.open(fname) as f: 100 re_import = pd.read_csv(f) 101 else: 102 if fname.endswith('.gz'): 103 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 104 re_import = pd.read_csv(fname) 105 106 return _deserialize_df(re_import, auto_gamma=auto_gamma)
Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
Parameters
- fname (str): Filename of the input file.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.