Commit 97a1ae67 authored by iregon's avatar iregon
Browse files

Cleaned

parent 515c10eb
...@@ -34,6 +34,7 @@ toolPath = os.path.dirname(os.path.abspath(__file__)) ...@@ -34,6 +34,7 @@ toolPath = os.path.dirname(os.path.abspath(__file__))
schema_lib = os.path.join(toolPath,'schemas','lib') schema_lib = os.path.join(toolPath,'schemas','lib')
def ERV(TextParser,read_sections_list, schema, code_tables_path): def ERV(TextParser,read_sections_list, schema, code_tables_path):
data_buffer = StringIO() data_buffer = StringIO()
valid_buffer = StringIO() valid_buffer = StringIO()
...@@ -69,7 +70,8 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path): ...@@ -69,7 +70,8 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
# (source is either pd.io.parsers.TextFileReader or a file with chunksize specified on input): # (source is either pd.io.parsers.TextFileReader or a file with chunksize specified on input):
# This way it supports direct chunksize property inheritance if the input source was a pd.io.parsers.TextFileReader # This way it supports direct chunksize property inheritance if the input source was a pd.io.parsers.TextFileReader
chunksize = TextParser.orig_options['chunksize'] if isinstance(TextParser,pd.io.parsers.TextFileReader) else None chunksize = TextParser.orig_options['chunksize'] if isinstance(TextParser,pd.io.parsers.TextFileReader) else None
# 'datetime' is not a valid pandas dtype: Only on output (on reading) will be then converted (via parse_dates) to datetime64[ns] type, cannot specify 'datetime' (of any kind) here: will fail # 'datetime' is not a valid pandas dtype: Only on output (on reading) will be then converted (via parse_dates) to datetime64[ns] type,
# cannot specify 'datetime' (of any kind) here: would fail
date_columns = [] # Needs to be the numeric index of the column, as seems not to be able to work with tupples.... date_columns = [] # Needs to be the numeric index of the column, as seems not to be able to work with tupples....
for i,element in enumerate(list(out_dtypes)): for i,element in enumerate(list(out_dtypes)):
if out_dtypes.get(element) == 'datetime': if out_dtypes.get(element) == 'datetime':
...@@ -104,7 +106,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun ...@@ -104,7 +106,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
return return
elif not os.path.isfile(source): elif not os.path.isfile(source):
logging.error('Could not open data source file {}'.format(source)) logging.error('Could not open data source file {}'.format(source))
logging.info('Otherwise, supported in-memory data sources are {}'.format(",".join([ str(x) for x in properties.supported_sources]))) logging.info('If input source was not a file: supported in-memory data sources are {}'.format(",".join([ str(x) for x in properties.supported_sources])))
return return
if not validate_arg('sections',sections,list): if not validate_arg('sections',sections,list):
return return
...@@ -124,6 +126,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun ...@@ -124,6 +126,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
else: else:
model_path = data_model_path model_path = data_model_path
code_tables_path = os.path.join(model_path,'code_tables') code_tables_path = os.path.join(model_path,'code_tables')
# For future use: some work already done in schema reading # For future use: some work already done in schema reading
if schema['header'].get('multiple_reports_per_line'): if schema['header'].get('multiple_reports_per_line'):
logging.error('File format not yet supported') logging.error('File format not yet supported')
...@@ -133,8 +136,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun ...@@ -133,8 +136,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
imodel = data_model if data_model else data_model_path imodel = data_model if data_model else data_model_path
logging.info("EXTRACTING DATA FROM MODEL: {}".format(imodel)) logging.info("EXTRACTING DATA FROM MODEL: {}".format(imodel))
# 2.1. Define output # 2.1. Subset data model sections to requested sections
# Subset data model sections to requested sections
parsing_order = schema['header'].get('parsing_order') parsing_order = schema['header'].get('parsing_order')
if not sections: if not sections:
sections = [ x.get(y) for x in parsing_order for y in x ] sections = [ x.get(y) for x in parsing_order for y in x ]
...@@ -152,7 +154,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun ...@@ -152,7 +154,7 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
data,valid = ERV(TextParser,read_sections_list, schema, code_tables_path) data,valid = ERV(TextParser,read_sections_list, schema, code_tables_path)
# 3. Create out data attributes # 3. Create out data attributes
logging.info("CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL(S)") logging.info("CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL")
data_columns = [ x for x in data ] if isinstance(data,pd.DataFrame) else data.orig_options['names'] data_columns = [ x for x in data ] if isinstance(data,pd.DataFrame) else data.orig_options['names']
out_atts = schemas.df_schema(data_columns, schema) out_atts = schemas.df_schema(data_columns, schema)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment