Removed support to inputs other than external file

4cc0113b · iregon · 6cf10acf · 4cc0113b · 4cc0113b · 4cc0113b
Commit 4cc0113b authored 5 years ago by iregon
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 34 deletions

properties.py properties.py +0 -1

read.py read.py +4 -9

reader/import_data.py reader/import_data.py +7 -24

No files found.
--- a/properties.py
+++ b/properties.py
@@ -13,7 +13,6 @@ import pandas as pd
 # Supported formats, sources and internal data models -------------------------
 schema_path = os.path.join(os.path.dirname(__file__),'schemas','lib')
 supported_data_models = [ os.path.basename(x).split(".")[0] for x in glob.glob(schema_path + '/*/*.json') if os.path.basename(x).split(".")[0] == os.path.dirname(x).split("/")[-1]]
-supported_sources = [pd.io.parsers.TextFileReader, io.StringIO]

 # Data types ------------------------------------------------------------------
 numpy_integers = ['int8','int16','int32','int64','uint8','uint16','uint32','uint64']

--- a/read.py
+++ b/read.py
@@ -3,7 +3,7 @@
 """
 Created on Tue Apr 30 09:38:17 2019

-Reads source data (file, pandas DataFrame or pd.io.parsers.TextFileReader) to
+Reads source data (file) to
 a pandas DataFrame. The source data model needs to be input to the module as
 a named model (included in the module) or as the path to a valid data model.

@@ -109,14 +109,9 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
    if not data_model and not data_model_path:
        logging.error('A valid data model name or path to data model must be provided')
        return
-    if not isinstance(source,tuple(properties.supported_sources)):
-        if not source:
-            logging.error('Data source is empty (first argument to read()) ')
-            return
-        elif not os.path.isfile(source):
-            logging.error('Could not open data source file {}'.format(source))
-            logging.info('If input source was not a file: supported in-memory data sources are {}'.format(",".join([ str(x) for x in properties.supported_sources])))
-            return
+    if not os.path.isfile(source):
+        logging.error('Can\'t find input data file {}'.format(source))
+        return
    if not validate_arg('sections',sections,list):
        return
    if not validate_arg('chunksize',chunksize,int):

--- a/reader/import_data.py
+++ b/reader/import_data.py
@@ -4,14 +4,11 @@
 Created on Fri Jan 10 13:17:43 2020

 FUNCTION TO PREPARE SOURCE DATA TO WHAT GET_SECTIONS() EXPECTS:
-    AN ITERABLE WITH DATAFRMAES
+    AN ITERABLE WITH DATAFRAMES

-INPUT IS EITHER:
-    - pd.io.parsers.textfilereader
-    - io.StringIO
-    - file path
+INPUT IS EITHER NOW ONLY A FILE PATH

-OUTPUT IS AN ITERABLE, DEPENDING ON SOURCE TYPE AND CHUNKSIZE BEING SET:
+OUTPUT IS AN ITERABLE, DEPENDING ON CHUNKSIZE BEING SET:
    - a single dataframe in a list
    - a pd.io.parsers.textfilereader

@@ -23,10 +20,6 @@ to be stripped

 @author: iregon

-DEV NOTES:
-1) What this module is able to ingest needs to align with properties.supported_sources
-2) Check io.StringIO input: why there, does it actually work as it is?
-3) Check pd.io.parsers.textfilereader input: why there, does it actually work as it is?


 OPTIONS IN OLD DEVELOPMENT:
@@ -49,22 +42,12 @@ import io

 from .. import properties

-def to_iterable_df(source,skiprows = None, chunksize = None):
-    TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize)
-    if not chunksize:
-        TextParser = [TextParser]
-    return TextParser
-
-
 def import_data(source,chunksize = None, skiprows = None):

-    if isinstance(source,pd.io.parsers.TextFileReader):
-        return source
-    elif isinstance(source, io.StringIO):
-        TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize)
-        return TextParser
-    elif os.path.isfile(source):
-        TextParser = to_iterable_df(source,skiprows = skiprows, chunksize = chunksize)
+    if os.path.isfile(source):
+        TextParser = pd.read_fwf(source,widths=[properties.MAX_FULL_REPORT_WIDTH],header = None, delimiter="\t", skiprows = skiprows, chunksize = chunksize)
+        if not chunksize:
+            TextParser = [TextParser]
        return TextParser
    else:
        print('Error')