Commit a5073748 authored by iregon's avatar iregon
Browse files

int promotion to float on missing values now solved using pandas nullable integers

parent e2ecea16
......@@ -79,17 +79,13 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
sections_df = get_sections.main(string_df, schema, read_sections_list)
# 2. Read elements from sections: along data chunks, resulting data types
# may vary if gaps, keep track of data types: add Intxx pandas classes rather than intxx to avoid this!
# 2. Read elements from sections
# Along data chunks, resulting data types
# may vary if gaps, keep track of data dtypes: v1.0
# This has now been solved by working with Intxx pandas dtypes (nullable integers)
# Sections are parsed in the same order as sections_df.columns
[data_df, valid_df, out_dtypesi ] = read_sections.main(sections_df, schema)
if i_chunk == 0:
out_dtypes = copy.deepcopy(out_dtypesi)
for k in out_dtypesi:
if out_dtypesi in properties.numpy_floats:
out_dtypes.update({ k:out_dtypesi.get(k) })
[data_df, valid_df, out_dtypes ] = read_sections.main(sections_df, schema)
# 3. Validate data elements
......
......@@ -68,7 +68,6 @@ def read_data(section_df,section_schema):
section_valid = pd.DataFrame(index = section_df.index, columns = section_df.columns)
for element in section_dtypes.keys():
print(element)
missing = section_df[element].isna()
if element in encoded:
section_df[element] = decoders.get(section_encoding.get(element)).get(section_dtypes.get(element))(section_df[element])
......@@ -141,10 +140,8 @@ def main(sections_df, schema):
elements = [ x[1] for x in data_df.columns if x[0] == section ]
if multiindex:
out_dtypes.update({ (section,i):properties.pandas_dtypes.get(section_schema['elements'][i].get('column_type')) for i in elements } )
out_dtypes.update({ (section,i):data_df[(section,i)].dtype.name for i in elements if data_df[(section,i)].dtype.name in properties.numpy_floats})
else:
out_dtypes.update({ i:properties.pandas_dtypes.get(section_schema['elements'][i].get('column_type')) for i in elements } )
out_dtypes.update({ i:data_df[i].dtype.name for i in section_elements if data_df[i].dtype.name in properties.numpy_floats})
else:
if multiindex:
out_dtypes.update({ (section,section):'object' } )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment