From a507374821a85c82c23b141a77764f359d90f5ce Mon Sep 17 00:00:00 2001
From: perezgonzalez-irene <iregon@noc.ac.uk>
Date: Thu, 27 Feb 2020 13:40:11 +0000
Subject: [PATCH] int promotion to float on missing values now solved using
 pandas nullable integers

---
 read.py                 | 14 +++++---------
 reader/read_sections.py |  3 ---
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/read.py b/read.py
index 0c0e57e..a9167e3 100644
--- a/read.py
+++ b/read.py
@@ -79,17 +79,13 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
         
         sections_df = get_sections.main(string_df, schema, read_sections_list)
 
-        # 2. Read elements from sections: along data chunks, resulting data types
-        # may vary if gaps, keep track of data types: add Intxx pandas classes rather than intxx to avoid this!
+        # 2. Read elements from sections
+        # Along data chunks, resulting data types
+        # may vary if gaps, keep track of data dtypes: v1.0
+        # This has now been solved by working with Intxx pandas dtypes (nullable integers) 
         # Sections are parsed in the same order as sections_df.columns
         
-        [data_df, valid_df, out_dtypesi ] = read_sections.main(sections_df, schema)
-        if i_chunk == 0:
-            out_dtypes = copy.deepcopy(out_dtypesi)
-
-        for k in out_dtypesi:
-            if out_dtypesi in properties.numpy_floats:
-                out_dtypes.update({ k:out_dtypesi.get(k) })
+        [data_df, valid_df, out_dtypes ] = read_sections.main(sections_df, schema)
         
         # 3. Validate data elements
         
diff --git a/reader/read_sections.py b/reader/read_sections.py
index 3bbda03..2cdfee9 100644
--- a/reader/read_sections.py
+++ b/reader/read_sections.py
@@ -68,7 +68,6 @@ def read_data(section_df,section_schema):
     section_valid = pd.DataFrame(index = section_df.index, columns = section_df.columns)
 
     for element in section_dtypes.keys():
-        print(element)
         missing = section_df[element].isna()
         if element in encoded:
             section_df[element] = decoders.get(section_encoding.get(element)).get(section_dtypes.get(element))(section_df[element])
@@ -141,10 +140,8 @@ def main(sections_df, schema):
             elements = [ x[1] for x in data_df.columns if x[0] == section ]
             if multiindex:
                 out_dtypes.update({ (section,i):properties.pandas_dtypes.get(section_schema['elements'][i].get('column_type')) for i in elements } )
-                out_dtypes.update({ (section,i):data_df[(section,i)].dtype.name for i in elements if data_df[(section,i)].dtype.name in properties.numpy_floats})
             else:
                 out_dtypes.update({ i:properties.pandas_dtypes.get(section_schema['elements'][i].get('column_type')) for i in elements } )
-                out_dtypes.update({ i:data_df[i].dtype.name for i in section_elements if data_df[i].dtype.name in properties.numpy_floats})
         else:
             if multiindex:
                     out_dtypes.update({ (section,section):'object' } )
-- 
GitLab