From 96591324e42ebabb2c9e178dbf9ec07884857fb4 Mon Sep 17 00:00:00 2001 From: perezgonzalez-irene <iregon@noc.ac.uk> Date: Wed, 12 Feb 2020 09:09:05 +0000 Subject: [PATCH] Change output dtypes to object when datetime --- read.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/read.py b/read.py index de13d03..8e8ab44 100644 --- a/read.py +++ b/read.py @@ -73,11 +73,12 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path): # This way it supports direct chunksize property inheritance if the input source was a pd.io.parsers.TextFileReader chunksize = TextParser.orig_options['chunksize'] if isinstance(TextParser,pd.io.parsers.TextFileReader) else None # 'datetime' is not a valid pandas dtype: Only on output (on reading) will be then converted (via parse_dates) to datetime64[ns] type, - # cannot specify 'datetime' (of any kind) here: would fail + # cannot specify 'datetime' (of any kind) here: would fail, need to change to 'object' and tell the date parser where it is date_columns = [] # Needs to be the numeric index of the column, as seems not to be able to work with tupples.... for i,element in enumerate(list(out_dtypes)): if out_dtypes.get(element) == 'datetime': date_columns.append(i) + out_dtypes.update({element:'object'}) data = pd.read_csv(data_buffer,names = data_df.columns, chunksize = chunksize, dtype = out_dtypes, parse_dates = date_columns) valid = pd.read_csv(valid_buffer,names = data_df.columns, chunksize = chunksize) -- GitLab