Commit 4cc0113b authored by iregon's avatar iregon
Browse files

Removed support to inputs other than external file

parent 6cf10acf
......@@ -13,7 +13,6 @@ import pandas as pd
# Supported formats, sources and internal data models -------------------------
schema_path = os.path.join(os.path.dirname(__file__),'schemas','lib')
supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(schema_path + '/*/*.json') if os.path.basename(x).split(".")[0] == os.path.dirname(x).split("/")[-1]]
supported_sources = [pd.io.parsers.TextFileReader, io.StringIO]
# Data types ------------------------------------------------------------------
numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64']
......
......@@ -3,7 +3,7 @@
"""
Created on Tue Apr 30 09:38:17 2019
Reads source data (file, pandas DataFrame or pd.io.parsers.TextFileReader) to
Reads source data (file) to
a pandas DataFrame. The source data model needs to be input to the module as
a named model (included in the module) or as the path to a valid data model.
......@@ -109,14 +109,9 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
if not data_model and not data_model_path:
logging.error('A valid data model name or path to data model must be provided')
return
if not isinstance(source,tuple(properties.supported_sources)):
if not source:
logging.error('Data source is empty (first argument to read()) ')
return
elif not os.path.isfile(source):
logging.error('Could not open data source file {}'.format(source))
logging.info('If input source was not a file: supported in-memory data sources are {}'.format(",".join([ str(x) for x in properties.supported_sources])))
return
if not os.path.isfile(source):
logging.error('Can\'t find input data file {}'.format(source))
return
if not validate_arg('sections',sections,list):
return
if not validate_arg('chunksize',chunksize,int):
......
......@@ -4,14 +4,11 @@
Created on Fri Jan 10 13:17:43 2020
FUNCTION TO PREPARE SOURCE DATA TO WHAT GET_SECTIONS() EXPECTS:
AN ITERABLE WITH DATAFRMAES
AN ITERABLE WITH DATAFRAMES
INPUT IS EITHER:
- pd.io.parsers.textfilereader
- io.StringIO
- file path
INPUT IS EITHER NOW ONLY A FILE PATH
OUTPUT IS AN ITERABLE, DEPENDING ON SOURCE TYPE AND CHUNKSIZE BEING SET:
OUTPUT IS AN ITERABLE, DEPENDING ON CHUNKSIZE BEING SET:
- a single dataframe in a list
- a pd.io.parsers.textfilereader
......@@ -23,10 +20,6 @@ to be stripped
@author: iregon
DEV NOTES:
1) What this module is able to ingest needs to align with properties.supported_sources
2) Check io.StringIO input: why there, does it actually work as it is?
3) Check pd.io.parsers.textfilereader input: why there, does it actually work as it is?
OPTIONS IN OLD DEVELOPMENT:
......@@ -49,22 +42,12 @@ import io
from .. import properties
def to_iterable_df(source,skiprows = None, chunksize = None):
TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize)
if not chunksize:
TextParser = [TextParser]
return TextParser
def import_data(source,chunksize = None, skiprows = None):
if isinstance(source,pd.io.parsers.TextFileReader):
return source
elif isinstance(source, io.StringIO):
TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize)
return TextParser
elif os.path.isfile(source):
TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize)
if os.path.isfile(source):
TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize)
if not chunksize:
TextParser = [TextParser]
return TextParser
else:
print('Error')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment