From 42a6dbddd423da096bb4f0472f2f7d814164d42f Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 14:05:11 +0100
Subject: [PATCH 1/7] feat: dump and load functionality for pandas dataframes
 containing Obs objects added.

---
 pyerrors/input/__init__.py |  1 +
 pyerrors/input/pandas.py   | 72 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 pyerrors/input/pandas.py

diff --git a/pyerrors/input/__init__.py b/pyerrors/input/__init__.py
index 3b585614..e8cfff08 100644
--- a/pyerrors/input/__init__.py
+++ b/pyerrors/input/__init__.py
@@ -10,4 +10,5 @@ from . import hadrons
 from . import json
 from . import misc
 from . import openQCD
+from . import pandas
 from . import sfcf
diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py
new file mode 100644
index 00000000..b54c7617
--- /dev/null
+++ b/pyerrors/input/pandas.py
@@ -0,0 +1,72 @@
+import warnings
+import gzip
+import pandas as pd
+from ..obs import Obs
+from .json import create_json_string, import_json_string
+
+
+def dump_df(df, fname, gz=True):
+    """Exports a pandas DataFrame containing Obs valued columns to a (gzipped) csv file.
+
+    Before making use of pandas to_csv functionality Obs objects are serialized via the standardized
+    json format of pyerrors.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Dataframe to be dumped to a file.
+    fname : str
+        Filename of the output file.
+    gz : bool
+        If True, the output is a gzipped csv file. If False, the output is a csv file.
+    """
+
+    out = df.copy()
+    for column in out:
+        if isinstance(out[column][0], Obs):
+            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
+
+    if not fname.endswith('.csv'):
+        fname += '.csv'
+
+    out.to_csv(fname)
+    if gz is True:
+        with open(fname, 'rb') as f_in, gzip.open(fname + ".gz", 'wb') as f_out:
+            f_out.writelines(f_in)
+
+
+def load_df(fname, auto_gamma=False, gz=True):
+    """Imports a pandas DataFrame from a csv.(gz) file in which Obs objects are serialized as json strings.
+
+    Parameters
+    ----------
+    fname : str
+        Filename of the input file.
+    auto_gamma : bool
+        If True applies the gamma_method to all imported Obs objects with the default parameters for
+        the error analysis. Default False.
+    gz : bool
+        If True, assumes that data is gzipped. If False, assumes JSON file.
+    """
+
+    if not fname.endswith('.csv') and not fname.endswith('.gz'):
+        fname += '.csv'
+
+    if gz is True:
+        if not fname.endswith('.gz'):
+            fname += '.gz'
+        with gzip.open(fname) as f:
+            re_import = pd.read_csv(f)
+    else:
+        if fname.endswith('.gz'):
+            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
+        re_import = pd.read_csv(fname)
+
+    for column in re_import.select_dtypes(include="object"):
+        if isinstance(re_import[column][0], str):
+            if re_import[column][0][:20] == '{"program":"pyerrors':
+                re_import[column] = re_import[column].transform(lambda x: import_json_string(x, verbose=False))
+                if auto_gamma is True:
+                    re_import[column].apply(Obs.gamma_method)
+
+    return re_import

From 29820f8e61a05ed8af3590ef02fa37240438ea6d Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 14:12:51 +0100
Subject: [PATCH 2/7] fix: don't write index column to csv file in
 input.pandas.dump_df.

---
 pyerrors/input/pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py
index b54c7617..b3a2d3ef 100644
--- a/pyerrors/input/pandas.py
+++ b/pyerrors/input/pandas.py
@@ -29,7 +29,7 @@ def dump_df(df, fname, gz=True):
     if not fname.endswith('.csv'):
         fname += '.csv'
 
-    out.to_csv(fname)
+    out.to_csv(fname, index=False)
     if gz is True:
         with open(fname, 'rb') as f_in, gzip.open(fname + ".gz", 'wb') as f_out:
             f_out.writelines(f_in)

From f980229d5cbb1acd14f293d7ffb6fc10bef2c49d Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 14:14:59 +0100
Subject: [PATCH 3/7] build: pandas added as dependency.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0c00aad5..33bde5bc 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ setup(name='pyerrors',
       license="MIT",
       packages=find_packages(),
       python_requires='>=3.6.0',
-      install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1'],
+      install_requires=['numpy>=1.16', 'autograd>=1.4', 'numdifftools', 'matplotlib>=3.3', 'scipy>=1', 'iminuit>=2', 'h5py>=3', 'lxml>=4', 'python-rapidjson>=1', 'pandas>=1.1'],
       classifiers=[
           'Development Status :: 5 - Production/Stable',
           'Intended Audience :: Science/Research',

From feab699162abe12834f8492b023533eb6f892a4e Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 14:27:10 +0100
Subject: [PATCH 4/7] tests: basic test for pandas DataFrame export and
 re-import added.

---
 tests/pandas_test.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 tests/pandas_test.py

diff --git a/tests/pandas_test.py b/tests/pandas_test.py
new file mode 100644
index 00000000..610f980f
--- /dev/null
+++ b/tests/pandas_test.py
@@ -0,0 +1,18 @@
+import numpy as np
+import pandas as pd
+import pyerrors as pe
+
+def test_df_export_import(tmp_path):
+    for gz in [True, False]:
+        my_df = pd.DataFrame([{"int": 1,
+                            "float": -0.01,
+                            "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
+                            "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}])
+
+        pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
+        reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), gz=gz)
+        assert np.all(my_df == reconstructed_df)
+
+        pe.input.pandas.load_df((tmp_path / 'df_output.csv').as_posix(), gz=gz)
+
+

From c7c17256673519d016307e595b5c9819add1bfe1 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 14:33:14 +0100
Subject: [PATCH 5/7] tests: pandas io tests extended.

---
 tests/pandas_test.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/pandas_test.py b/tests/pandas_test.py
index 610f980f..d4833656 100644
--- a/tests/pandas_test.py
+++ b/tests/pandas_test.py
@@ -3,14 +3,15 @@ import pandas as pd
 import pyerrors as pe
 
 def test_df_export_import(tmp_path):
+    my_dict = {"int": 1,
+           "float": -0.01,
+           "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
+           "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}
     for gz in [True, False]:
-        my_df = pd.DataFrame([{"int": 1,
-                            "float": -0.01,
-                            "Obs1": pe.pseudo_Obs(87, 21, "test_ensemble"),
-                            "Obs2": pe.pseudo_Obs(-87, 21, "test_ensemble2")}])
+        my_df = pd.DataFrame([my_dict] * 10)
 
         pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix(), gz=gz)
-        reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), gz=gz)
+        reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True, gz=gz)
         assert np.all(my_df == reconstructed_df)
 
         pe.input.pandas.load_df((tmp_path / 'df_output.csv').as_posix(), gz=gz)

From 153cc795b882399066ec65cf08e8364fe0806186 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 15:26:31 +0100
Subject: [PATCH 6/7] feat: pandas DataFrames with Corr columns can now also be
 imported and exported.

---
 pyerrors/input/pandas.py |  5 +++--
 tests/pandas_test.py     | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py
index b3a2d3ef..67bd9bbd 100644
--- a/pyerrors/input/pandas.py
+++ b/pyerrors/input/pandas.py
@@ -2,6 +2,7 @@ import warnings
 import gzip
 import pandas as pd
 from ..obs import Obs
+from ..correlators import Corr
 from .json import create_json_string, import_json_string
 
 
@@ -23,7 +24,7 @@ def dump_df(df, fname, gz=True):
 
     out = df.copy()
     for column in out:
-        if isinstance(out[column][0], Obs):
+        if isinstance(out[column][0], (Obs, Corr)):
             out[column] = out[column].transform(lambda x: create_json_string(x, indent=0))
 
     if not fname.endswith('.csv'):
@@ -67,6 +68,6 @@ def load_df(fname, auto_gamma=False, gz=True):
             if re_import[column][0][:20] == '{"program":"pyerrors':
                 re_import[column] = re_import[column].transform(lambda x: import_json_string(x, verbose=False))
                 if auto_gamma is True:
-                    re_import[column].apply(Obs.gamma_method)
+                    re_import[column].apply(lambda x: x.gamma_method())
 
     return re_import
diff --git a/tests/pandas_test.py b/tests/pandas_test.py
index d4833656..658f4375 100644
--- a/tests/pandas_test.py
+++ b/tests/pandas_test.py
@@ -17,3 +17,14 @@ def test_df_export_import(tmp_path):
         pe.input.pandas.load_df((tmp_path / 'df_output.csv').as_posix(), gz=gz)
 
 
+def test_df_Corr(tmp_path):
+
+    my_corr = pe.Corr([pe.pseudo_Obs(-0.48, 0.04, "test"), pe.pseudo_Obs(-0.154, 0.03, "test")])
+
+    my_dict = {"int": 1,
+           "float": -0.01,
+           "Corr": my_corr}
+    my_df = pd.DataFrame([my_dict] * 5)
+
+    pe.input.pandas.dump_df(my_df, (tmp_path / 'df_output').as_posix())
+    reconstructed_df = pe.input.pandas.load_df((tmp_path / 'df_output').as_posix(), auto_gamma=True)

From c6ec11045c9f7864a9193996aab00b90e7bf57f8 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 30 Jun 2022 15:49:40 +0100
Subject: [PATCH 7/7] fix: redundant export of not gzipped pandas Dataframe
 removed.

---
 pyerrors/input/pandas.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyerrors/input/pandas.py b/pyerrors/input/pandas.py
index 67bd9bbd..caf3e0b6 100644
--- a/pyerrors/input/pandas.py
+++ b/pyerrors/input/pandas.py
@@ -30,10 +30,12 @@ def dump_df(df, fname, gz=True):
     if not fname.endswith('.csv'):
         fname += '.csv'
 
-    out.to_csv(fname, index=False)
     if gz is True:
-        with open(fname, 'rb') as f_in, gzip.open(fname + ".gz", 'wb') as f_out:
-            f_out.writelines(f_in)
+        if not fname.endswith('.gz'):
+            fname += '.gz'
+        out.to_csv(fname, index=False, compression='gzip')
+    else:
+        out.to_csv(fname, index=False)
 
 
 def load_df(fname, auto_gamma=False, gz=True):