From 96591324e42ebabb2c9e178dbf9ec07884857fb4 Mon Sep 17 00:00:00 2001
From: perezgonzalez-irene <iregon@noc.ac.uk>
Date: Wed, 12 Feb 2020 09:09:05 +0000
Subject: [PATCH] Change output dtypes to object when datetime

---
 read.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/read.py b/read.py
index de13d03..8e8ab44 100644
--- a/read.py
+++ b/read.py
@@ -73,11 +73,12 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
     # This way it supports direct chunksize property inheritance if the input source was a pd.io.parsers.TextFileReader
     chunksize = TextParser.orig_options['chunksize'] if isinstance(TextParser,pd.io.parsers.TextFileReader) else None
     # 'datetime' is not a valid pandas dtype: Only on output (on reading) will be then converted (via parse_dates) to datetime64[ns] type,
-    # cannot specify 'datetime' (of any kind) here: would fail
+    # cannot specify 'datetime' (of any kind) here: would fail, need to change to 'object' and tell the date parser where it is
     date_columns = [] # Needs to be the numeric index of the column, as seems not to be able to work with tupples....
     for i,element in enumerate(list(out_dtypes)):
         if out_dtypes.get(element) == 'datetime':
             date_columns.append(i)
+            out_dtypes.update({element:'object'})
 
     data = pd.read_csv(data_buffer,names = data_df.columns, chunksize = chunksize, dtype = out_dtypes, parse_dates = date_columns)
     valid = pd.read_csv(valid_buffer,names = data_df.columns, chunksize = chunksize)
-- 
GitLab