Main functions as main in reader modules

c0251df1 · iregon · d510abd4 · c0251df1 · c0251df1 · c0251df1
Commit c0251df1 authored 5 years ago by iregon
Showing with 34 additions and 33 deletions

read.py read.py +6 -6

reader/get_sections.py reader/get_sections.py +1 -1

reader/import_data.py reader/import_data.py +1 -1

reader/read_sections.py reader/read_sections.py +26 -25

No files found.
--- a/read.py
+++ b/read.py
@@ -31,9 +31,9 @@ from io import StringIO as StringIO
 from .data_models import schemas
 from . import properties
 from .common import pandas_TextParser_hdlr
-from .reader import import_data
-from .reader import get_sections
-from .reader.read_sections import main as read_sections
+#from .reader import import_data
+#from .reader import get_sections
+from mdf_reader.reader import import_data, get_sections, read_sections
 from .validator import validate

 toolPath = os.path.dirname(os.path.abspath(__file__))
@@ -77,13 +77,13 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
        # - requested NA sections as NaN columns
        # - columns(sections) order as in read_sections_list
        
-        sections_df = get_sections.get_sections(string_df, schema, read_sections_list)
+        sections_df = get_sections.main(string_df, schema, read_sections_list)

        # 2. Read elements from sections: along data chunks, resulting data types
        # may vary if gaps, keep track of data types: add Intxx pandas classes rather than intxx to avoid this!
        # Sections are parsed in the same order as sections_df.columns
        
-        [data_df, valid_df, out_dtypesi ] = read_sections(sections_df, schema)
+        [data_df, valid_df, out_dtypesi ] = read_sections.main(sections_df, schema)
        if i_chunk == 0:
            out_dtypes = copy.deepcopy(out_dtypesi)

@@ -271,7 +271,7 @@ def main(source, data_model = None, data_model_path = None, sections = None,chun
    # 2.2 Homogeneize input data to an iterable with dataframes:
    # a list with a single dataframe or a pd.io.parsers.TextFileReader
    logging.info("Getting data string from source...")
-    TextParser = import_data.import_data(source, chunksize = chunksize, skiprows = skiprows)
+    TextParser = import_data.main(source, chunksize = chunksize, skiprows = skiprows)

    # 2.3. Extract, read and validate data in same loop
    logging.info("Extracting and reading sections")

--- a/reader/get_sections.py
+++ b/reader/get_sections.py
@@ -199,7 +199,7 @@ def extract_sections(string_df):
 #   ---------------------------------------------------------------------------
 #   MAIN
 #   ---------------------------------------------------------------------------
-def get_sections(string_df, schema, read_sections):
+def main(string_df, schema, read_sections):
    global sentinals, section_lens, sentinals_lens
    global parsing_order
    # Proceed to split sections if more than one

--- a/reader/import_data.py
+++ b/reader/import_data.py
@@ -41,7 +41,7 @@ import os

 from .. import properties

-def import_data(source,chunksize = None, skiprows = None):
+def main(source,chunksize = None, skiprows = None):

    if os.path.isfile(source):
        TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize)

--- a/reader/read_sections.py
+++ b/reader/read_sections.py
@@ -68,6 +68,7 @@ def read_data(section_df,section_schema):
    section_valid = pd.DataFrame(index = section_df.index, columns = section_df.columns)

    for element in section_dtypes.keys():
+        print(element)
        missing = section_df[element].isna()
        if element in encoded:
            section_df[element] = decoders.get(section_encoding.get(element)).get(section_dtypes.get(element))(section_df[element])
@@ -79,7 +80,7 @@ def read_data(section_df,section_schema):

    return section_df,section_valid

-def read_sections(sections_df, schema):
+def main(sections_df, schema):

    multiindex = True if len(sections_df.columns) > 1 or sections_df.columns[0] != properties.dummy_level else False
    data_df = pd.DataFrame(index = sections_df.index)