diff --git a/data_models/schemas.py b/data_models/schemas.py index 8776d5d3264a8feab0d3cd7ffb931047285e5247..cd5fe77098b2b3cfb24c23b7e2f641273f6ab500 100644 --- a/data_models/schemas.py +++ b/data_models/schemas.py @@ -115,6 +115,11 @@ def read_schema(schema_name = None, ext_schema_path = None): if not schema['sections'][section]['header'].get('field_layout'): delimiter = schema['sections'][section]['header'].get('delimiter') schema['sections'][section]['header']['field_layout'] = 'delimited' if delimiter else 'fixed_width' + for element in schema['sections'][section]['elements'].keys(): + if schema['sections'][section]['elements'][element].get('column_type') in properties.numpy_integers: + np_integer = schema['sections'][section]['elements'][element].get('column_type') + pd_integer = properties.pandas_nan_integers.get(np_integer) + schema['sections'][section]['elements'][element].update({'column_type':pd_integer}) return schema else: logging.error('Multile reports per line data model: not yet supported') diff --git a/properties.py b/properties.py index c768893ff3aafac8b52dc0b3d1eef71861ebf9cf..d9037c2f415619d42700e385e7866793610be9ec 100644 --- a/properties.py +++ b/properties.py @@ -17,8 +17,14 @@ supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(s # Data types ------------------------------------------------------------------ numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64'] numpy_floats = ['float16','float32','float64'] + +pandas_nan_integers = {'int8':'Int8','int16':'Int16','int32':'Int32', + 'int64':'Int64','uint8':'UInt8','uint16':'UInt16', + 'uint32':'UInt32','uint64':'UInt64'} + numeric_types = numpy_integers.copy() numeric_types.extend(numpy_floats) +numeric_types.extend(pandas_nan_integers.values()) object_types = ['str','object','key','datetime']