pyerrors.input.pandas
1import warnings 2import gzip 3import sqlite3 4import pandas as pd 5from ..obs import Obs 6from ..correlators import Corr 7from .json import create_json_string, import_json_string 8 9 10def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 11 """Write DataFrame including Obs or Corr valued columns to sqlite database. 12 13 Parameters 14 ---------- 15 df : pandas.DataFrame 16 Dataframe to be written to the database. 17 table_name : str 18 Name of the table in the database. 19 db : str 20 Path to the sqlite database. 21 if exists : str 22 How to behave if table already exists. Options 'fail', 'replace', 'append'. 23 gz : bool 24 If True the json strings are gzipped. 25 26 Returns 27 ------- 28 None 29 """ 30 se_df = _serialize_df(df, gz=gz) 31 con = sqlite3.connect(db) 32 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 33 con.close() 34 35 36def read_sql(sql, db, auto_gamma=False, **kwargs): 37 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 38 39 Parameters 40 ---------- 41 sql : str 42 SQL query to be executed. 43 db : str 44 Path to the sqlite database. 45 auto_gamma : bool 46 If True applies the gamma_method to all imported Obs objects with the default parameters for 47 the error analysis. Default False. 48 49 Returns 50 ------- 51 data : pandas.DataFrame 52 Dataframe with the content of the sqlite database. 53 """ 54 con = sqlite3.connect(db) 55 extract_df = pd.read_sql(sql, con, **kwargs) 56 con.close() 57 return _deserialize_df(extract_df, auto_gamma=auto_gamma) 58 59 60def dump_df(df, fname, gz=True): 61 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 62 63 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 64 json format of pyerrors. 65 66 Parameters 67 ---------- 68 df : pandas.DataFrame 69 Dataframe to be dumped to a file. 70 fname : str 71 Filename of the output file. 72 gz : bool 73 If True, the output is a gzipped csv file. If False, the output is a csv file. 74 75 Returns 76 ------- 77 None 78 """ 79 out = _serialize_df(df, gz=False) 80 81 if not fname.endswith('.csv'): 82 fname += '.csv' 83 84 if gz is True: 85 if not fname.endswith('.gz'): 86 fname += '.gz' 87 out.to_csv(fname, index=False, compression='gzip') 88 else: 89 out.to_csv(fname, index=False) 90 91 92def load_df(fname, auto_gamma=False, gz=True): 93 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 94 95 Parameters 96 ---------- 97 fname : str 98 Filename of the input file. 99 auto_gamma : bool 100 If True applies the gamma_method to all imported Obs objects with the default parameters for 101 the error analysis. Default False. 102 gz : bool 103 If True, assumes that data is gzipped. If False, assumes JSON file. 104 105 Returns 106 ------- 107 data : pandas.DataFrame 108 Dataframe with the content of the sqlite database. 109 """ 110 if not fname.endswith('.csv') and not fname.endswith('.gz'): 111 fname += '.csv' 112 113 if gz is True: 114 if not fname.endswith('.gz'): 115 fname += '.gz' 116 with gzip.open(fname) as f: 117 re_import = pd.read_csv(f) 118 else: 119 if fname.endswith('.gz'): 120 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 121 re_import = pd.read_csv(fname) 122 123 return _deserialize_df(re_import, auto_gamma=auto_gamma) 124 125 126def _serialize_df(df, gz=False): 127 """Serializes all Obs or Corr valued columns into json strings according to the pyerrors json specification. 128 129 Parameters 130 ---------- 131 df : pandas.DataFrame 132 DataFrame to be serilized. 133 gz: bool 134 gzip the json string representation. Default False. 135 """ 136 out = df.copy() 137 for column in out: 138 if isinstance(out[column][0], (Obs, Corr)): 139 out[column] = out[column].transform(lambda x: create_json_string(x, indent=0)) 140 if gz is True: 141 out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8'))) 142 return out 143 144 145def _deserialize_df(df, auto_gamma=False): 146 """Deserializes all pyerrors json strings into Obs or Corr objects according to the pyerrors json specification. 147 148 Parameters 149 ---------- 150 df : pandas.DataFrame 151 DataFrame to be deserilized. 152 auto_gamma : bool 153 If True applies the gamma_method to all imported Obs objects with the default parameters for 154 the error analysis. Default False. 155 156 Notes: 157 ------ 158 In case any column of the DataFrame is gzipped it is gunzipped in the process. 159 """ 160 for column in df.select_dtypes(include="object"): 161 if isinstance(df[column][0], bytes): 162 if df[column][0].startswith(b"\x1f\x8b\x08\x00"): 163 df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8')) 164 if isinstance(df[column][0], str): 165 if '"program":' in df[column][0][:20]: 166 df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False)) 167 if auto_gamma is True: 168 df[column].apply(lambda x: x.gamma_method()) 169 return df
def
to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs):
11def to_sql(df, table_name, db, if_exists='fail', gz=True, **kwargs): 12 """Write DataFrame including Obs or Corr valued columns to sqlite database. 13 14 Parameters 15 ---------- 16 df : pandas.DataFrame 17 Dataframe to be written to the database. 18 table_name : str 19 Name of the table in the database. 20 db : str 21 Path to the sqlite database. 22 if exists : str 23 How to behave if table already exists. Options 'fail', 'replace', 'append'. 24 gz : bool 25 If True the json strings are gzipped. 26 27 Returns 28 ------- 29 None 30 """ 31 se_df = _serialize_df(df, gz=gz) 32 con = sqlite3.connect(db) 33 se_df.to_sql(table_name, con, if_exists=if_exists, index=False, **kwargs) 34 con.close()
Write DataFrame including Obs or Corr valued columns to sqlite database.
Parameters
- df (pandas.DataFrame): Dataframe to be written to the database.
- table_name (str): Name of the table in the database.
- db (str): Path to the sqlite database.
- if exists (str): How to behave if table already exists. Options 'fail', 'replace', 'append'.
- gz (bool): If True the json strings are gzipped.
Returns
- None
def
read_sql(sql, db, auto_gamma=False, **kwargs):
37def read_sql(sql, db, auto_gamma=False, **kwargs): 38 """Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns. 39 40 Parameters 41 ---------- 42 sql : str 43 SQL query to be executed. 44 db : str 45 Path to the sqlite database. 46 auto_gamma : bool 47 If True applies the gamma_method to all imported Obs objects with the default parameters for 48 the error analysis. Default False. 49 50 Returns 51 ------- 52 data : pandas.DataFrame 53 Dataframe with the content of the sqlite database. 54 """ 55 con = sqlite3.connect(db) 56 extract_df = pd.read_sql(sql, con, **kwargs) 57 con.close() 58 return _deserialize_df(extract_df, auto_gamma=auto_gamma)
Execute SQL query on sqlite database and obtain DataFrame including Obs or Corr valued columns.
Parameters
- sql (str): SQL query to be executed.
- db (str): Path to the sqlite database.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.
def
dump_df(df, fname, gz=True):
61def dump_df(df, fname, gz=True): 62 """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file. 63 64 Before making use of pandas to_csv functionality Obs objects are serialized via the standardized 65 json format of pyerrors. 66 67 Parameters 68 ---------- 69 df : pandas.DataFrame 70 Dataframe to be dumped to a file. 71 fname : str 72 Filename of the output file. 73 gz : bool 74 If True, the output is a gzipped csv file. If False, the output is a csv file. 75 76 Returns 77 ------- 78 None 79 """ 80 out = _serialize_df(df, gz=False) 81 82 if not fname.endswith('.csv'): 83 fname += '.csv' 84 85 if gz is True: 86 if not fname.endswith('.gz'): 87 fname += '.gz' 88 out.to_csv(fname, index=False, compression='gzip') 89 else: 90 out.to_csv(fname, index=False)
Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
Before making use of pandas to_csv functionality Obs objects are serialized via the standardized json format of pyerrors.
Parameters
- df (pandas.DataFrame): Dataframe to be dumped to a file.
- fname (str): Filename of the output file.
- gz (bool): If True, the output is a gzipped csv file. If False, the output is a csv file.
Returns
- None
def
load_df(fname, auto_gamma=False, gz=True):
93def load_df(fname, auto_gamma=False, gz=True): 94 """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings. 95 96 Parameters 97 ---------- 98 fname : str 99 Filename of the input file. 100 auto_gamma : bool 101 If True applies the gamma_method to all imported Obs objects with the default parameters for 102 the error analysis. Default False. 103 gz : bool 104 If True, assumes that data is gzipped. If False, assumes JSON file. 105 106 Returns 107 ------- 108 data : pandas.DataFrame 109 Dataframe with the content of the sqlite database. 110 """ 111 if not fname.endswith('.csv') and not fname.endswith('.gz'): 112 fname += '.csv' 113 114 if gz is True: 115 if not fname.endswith('.gz'): 116 fname += '.gz' 117 with gzip.open(fname) as f: 118 re_import = pd.read_csv(f) 119 else: 120 if fname.endswith('.gz'): 121 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 122 re_import = pd.read_csv(fname) 123 124 return _deserialize_df(re_import, auto_gamma=auto_gamma)
Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
Parameters
- fname (str): Filename of the input file.
- auto_gamma (bool): If True applies the gamma_method to all imported Obs objects with the default parameters for the error analysis. Default False.
- gz (bool): If True, assumes that data is gzipped. If False, assumes JSON file.
Returns
- data (pandas.DataFrame): Dataframe with the content of the sqlite database.