From d4f355e43228c7e45075f7108b1e1553e763b40b Mon Sep 17 00:00:00 2001
From: perezgonzalez-irene <iregon@noc.ac.uk>
Date: Wed, 22 Jan 2020 09:39:40 +0000
Subject: [PATCH] Fixed chunking

---
 read.py               | 33 +++++++++++++++++++++++++--------
 reader/import_data.py |  4 ++--
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/read.py b/read.py
index 0af38af..01bc9c4 100644
--- a/read.py
+++ b/read.py
@@ -85,16 +85,33 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
 
     # 6. Output to files if requested
     if out_path:
+        enlisted = False
+        if not isinstance(data,pd.io.parsers.TextFileReader):
+            data = [data]
+            valid = [valid]
+            enlisted = True
         logging.info('WRITING DATA TO FILES IN: {}'.format(out_path))
-        cols = [ x for x in data ]
-        if isinstance(cols[0],tuple):
-            header = [":".join(x) for x in cols]
-            out_atts_json = { ":".join(x):out_atts.get(x) for x in out_atts.keys() }
+
+        for i, (data_df,valid_df) in enumerate(zip(data,valid)):
+            header = False
+            mode = 'a'
+            if i == 0:
+                mode = 'w'
+                cols = [ x for x in data_df ]
+                if isinstance(cols[0],tuple):
+                    header = [":".join(x) for x in cols]
+                    out_atts_json = { ":".join(x):out_atts.get(x) for x in out_atts.keys() }
+                else:
+                    header = cols
+                    out_atts_json = out_atts
+            data_df.to_csv(os.path.join(out_path,'data.csv'), header = header, mode = mode, encoding = 'utf-8',index = True, index_label='index')
+            valid_df.to_csv(os.path.join(out_path,'valid_mask.csv'), header = header, mode = mode, encoding = 'utf-8',index = True, index_label='index')  
+        if enlisted:
+            data = data[0]
+            valid = valid[0]
         else:
-            header = cols
-            out_atts_json = out_atts
-        data.to_csv(os.path.join(out_path,'data.csv'), header = header, encoding = 'utf-8',index = True, index_label='index')
-        valid.to_csv(os.path.join(out_path,'valid_mask.csv'), header = header, encoding = 'utf-8',index = True, index_label='index')
+            data = pandas_TextParser_hdlr.restore(data.f,data.orig_options)
+            valid = pandas_TextParser_hdlr.restore(valid.f,valid.orig_options)
         with open(os.path.join(out_path,'atts.json'),'w') as fileObj:
             json.dump(out_atts_json,fileObj,indent=4)
 
diff --git a/reader/import_data.py b/reader/import_data.py
index 26b0a98..a0a3e3a 100644
--- a/reader/import_data.py
+++ b/reader/import_data.py
@@ -61,10 +61,10 @@ def import_data(source,chunksize = None, skiprows = None):
     if isinstance(source,pd.io.parsers.TextFileReader):
         return source
     elif isinstance(source, io.StringIO):
-        TextParser = to_iterable_df(source,skiprows = None, chunksize = None)
+        TextParser = to_iterable_df(source,skiprows = None, chunksize = chunksize)
         return TextParser
     elif os.path.isfile(source):
-        TextParser = to_iterable_df(source,skiprows = None, chunksize = None)
+        TextParser = to_iterable_df(source,skiprows = None, chunksize = chunksize)
         return TextParser
     else:
         print('Error')
-- 
GitLab