Fixed chunking

d4f355e4 · iregon · c28e5b5e · d4f355e4 · d4f355e4
Commit d4f355e4 authored 5 years ago by iregon
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 10 deletions

read.py read.py +25 -8

reader/import_data.py reader/import_data.py +2 -2

No files found.
--- a/read.py
+++ b/read.py
@@ -85,16 +85,33 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun

    # 6. Output to files if requested
    if out_path:
+        enlisted = False
+        if not isinstance(data,pd.io.parsers.TextFileReader):
+            data = [data]
+            valid = [valid]
+            enlisted = True
        logging.info('WRITING DATA TO FILES IN: {}'.format(out_path))
-        cols = [ x for x in data ]
-        if isinstance(cols[0],tuple):
-            header = [":".join(x) for x in cols]
-            out_atts_json = { ":".join(x):out_atts.get(x) for x in out_atts.keys() }
+
+        for i, (data_df,valid_df) in enumerate(zip(data,valid)):
+            header = False
+            mode = 'a'
+            if i == 0:
+                mode = 'w'
+                cols = [ x for x in data_df ]
+                if isinstance(cols[0],tuple):
+                    header = [":".join(x) for x in cols]
+                    out_atts_json = { ":".join(x):out_atts.get(x) for x in out_atts.keys() }
+                else:
+                    header = cols
+                    out_atts_json = out_atts
+            data_df.to_csv(os.path.join(out_path,'data.csv'), header = header, mode = mode, encoding = 'utf-8',index = True, index_label='index')
+            valid_df.to_csv(os.path.join(out_path,'valid_mask.csv'), header = header, mode = mode, encoding = 'utf-8',index = True, index_label='index')  
+        if enlisted:
+            data = data[0]
+            valid = valid[0]
        else:
-            header = cols
-            out_atts_json = out_atts
-        data.to_csv(os.path.join(out_path,'data.csv'), header = header, encoding = 'utf-8',index = True, index_label='index')
-        valid.to_csv(os.path.join(out_path,'valid_mask.csv'), header = header, encoding = 'utf-8',index = True, index_label='index')
+            data = pandas_TextParser_hdlr.restore(data.f,data.orig_options)
+            valid = pandas_TextParser_hdlr.restore(valid.f,valid.orig_options)
        with open(os.path.join(out_path,'atts.json'),'w') as fileObj:
            json.dump(out_atts_json,fileObj,indent=4)


--- a/reader/import_data.py
+++ b/reader/import_data.py
@@ -61,10 +61,10 @@ def import_data(source,chunksize = None, skiprows = None):
    if isinstance(source,pd.io.parsers.TextFileReader):
        return source
    elif isinstance(source, io.StringIO):
-        TextParser = to_iterable_df(source,skiprows = None, chunksize = None)
+        TextParser = to_iterable_df(source,skiprows = None, chunksize = chunksize)
        return TextParser
    elif os.path.isfile(source):
-        TextParser = to_iterable_df(source,skiprows = None, chunksize = None)
+        TextParser = to_iterable_df(source,skiprows = None, chunksize = chunksize)
        return TextParser
    else:
        print('Error')