diff --git a/properties.py b/properties.py index 2ac823303e0426e727455f43655f0fc5a414260e..e17b27330f9abc79b96746d02994e915222547b9 100644 --- a/properties.py +++ b/properties.py @@ -13,7 +13,6 @@ import pandas as pd # Supported formats, sources and internal data models ------------------------- schema_path = os.path.join(os.path.dirname(__file__),'schemas','lib') supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(schema_path + '/*/*.json') if os.path.basename(x).split(".")[0] == os.path.dirname(x).split("/")[-1]] -supported_sources = [pd.io.parsers.TextFileReader, io.StringIO] # Data types ------------------------------------------------------------------ numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64'] diff --git a/read.py b/read.py index 8e8ab44248f213368ef95e2e55a4a36427eaf97e..7a1e940f76a3230af69ef3acd482e66f4bd3189d 100644 --- a/read.py +++ b/read.py @@ -3,7 +3,7 @@ """ Created on Tue Apr 30 09:38:17 2019 -Reads source data (file, pandas DataFrame or pd.io.parsers.TextFileReader) to +Reads source data (file) to a pandas DataFrame. The source data model needs to be input to the module as a named model (included in the module) or as the path to a valid data model. @@ -109,14 +109,9 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun if not data_model and not data_model_path: logging.error('A valid data model name or path to data model must be provided') return - if not isinstance(source,tuple(properties.supported_sources)): - if not source: - logging.error('Data source is empty (first argument to read()) ') - return - elif not os.path.isfile(source): - logging.error('Could not open data source file {}'.format(source)) - logging.info('If input source was not a file: supported in-memory data sources are {}'.format(",".join([ str(x) for x in properties.supported_sources]))) - return + if not os.path.isfile(source): + logging.error('Can\'t find input data file {}'.format(source)) + return if not validate_arg('sections',sections,list): return if not validate_arg('chunksize',chunksize,int): diff --git a/reader/import_data.py b/reader/import_data.py index b298e1993225f9bf6f2e9e0eaae1617f8843efdd..7ecf0c8cc899e48e472101f10f08042ebf9abc01 100644 --- a/reader/import_data.py +++ b/reader/import_data.py @@ -4,14 +4,11 @@ Created on Fri Jan 10 13:17:43 2020 FUNCTION TO PREPARE SOURCE DATA TO WHAT GET_SECTIONS() EXPECTS: - AN ITERABLE WITH DATAFRMAES + AN ITERABLE WITH DATAFRAMES -INPUT IS EITHER: - - pd.io.parsers.textfilereader - - io.StringIO - - file path +INPUT IS EITHER NOW ONLY A FILE PATH -OUTPUT IS AN ITERABLE, DEPENDING ON SOURCE TYPE AND CHUNKSIZE BEING SET: +OUTPUT IS AN ITERABLE, DEPENDING ON CHUNKSIZE BEING SET: - a single dataframe in a list - a pd.io.parsers.textfilereader @@ -23,10 +20,6 @@ to be stripped @author: iregon -DEV NOTES: -1) What this module is able to ingest needs to align with properties.supported_sources -2) Check io.StringIO input: why there, does it actually work as it is? -3) Check pd.io.parsers.textfilereader input: why there, does it actually work as it is? OPTIONS IN OLD DEVELOPMENT: @@ -49,22 +42,12 @@ import io from .. import properties -def to_iterable_df(source,skiprows = None, chunksize = None): - TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize) - if not chunksize: - TextParser = [TextParser] - return TextParser - - def import_data(source,chunksize = None, skiprows = None): - if isinstance(source,pd.io.parsers.TextFileReader): - return source - elif isinstance(source, io.StringIO): - TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize) - return TextParser - elif os.path.isfile(source): - TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize) + if os.path.isfile(source): + TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize) + if not chunksize: + TextParser = [TextParser] return TextParser else: print('Error')