From e2ecea16866976fe067b8ab5879f6e89896d956d Mon Sep 17 00:00:00 2001 From: perezgonzalez-irene <iregon@noc.ac.uk> Date: Thu, 27 Feb 2020 12:34:52 +0000 Subject: [PATCH] Work internally only with nullable pandas integers --- data_models/schemas.py | 5 +++++ properties.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/data_models/schemas.py b/data_models/schemas.py index 8776d5d..cd5fe77 100644 --- a/data_models/schemas.py +++ b/data_models/schemas.py @@ -115,6 +115,11 @@ def read_schema(schema_name = None, ext_schema_path = None): if not schema['sections'][section]['header'].get('field_layout'): delimiter = schema['sections'][section]['header'].get('delimiter') schema['sections'][section]['header']['field_layout'] = 'delimited' if delimiter else 'fixed_width' + for element in schema['sections'][section]['elements'].keys(): + if schema['sections'][section]['elements'][element].get('column_type') in properties.numpy_integers: + np_integer = schema['sections'][section]['elements'][element].get('column_type') + pd_integer = properties.pandas_nan_integers.get(np_integer) + schema['sections'][section]['elements'][element].update({'column_type':pd_integer}) return schema else: logging.error('Multile reports per line data model: not yet supported') diff --git a/properties.py b/properties.py index c768893..d9037c2 100644 --- a/properties.py +++ b/properties.py @@ -17,8 +17,14 @@ supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(s # Data types ------------------------------------------------------------------ numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64'] numpy_floats = ['float16','float32','float64'] + +pandas_nan_integers = {'int8':'Int8','int16':'Int16','int32':'Int32', + 'int64':'Int64','uint8':'UInt8','uint16':'UInt16', + 'uint32':'UInt32','uint64':'UInt64'} + numeric_types = numpy_integers.copy() numeric_types.extend(numpy_floats) +numeric_types.extend(pandas_nan_integers.values()) object_types = ['str','object','key','datetime'] -- GitLab