Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
mdf_reader
Commits
4cc0113b
Commit
4cc0113b
authored
5 years ago
by
iregon
Browse files
Options
Download
Email Patches
Plain Diff
Removed support to inputs other than external file
parent
6cf10acf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
34 deletions
+11
-34
properties.py
properties.py
+0
-1
read.py
read.py
+4
-9
reader/import_data.py
reader/import_data.py
+7
-24
No files found.
properties.py
View file @
4cc0113b
...
...
@@ -13,7 +13,6 @@ import pandas as pd
# Supported formats, sources and internal data models -------------------------
schema_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'schemas'
,
'lib'
)
supported_data_models
=
[
os
.
path
.
basename
(
x
).
split
(
"."
)[
0
]
for
x
in
glob
.
glob
(
schema_path
+
'/*/*.json'
)
if
os
.
path
.
basename
(
x
).
split
(
"."
)[
0
]
==
os
.
path
.
dirname
(
x
).
split
(
"/"
)[
-
1
]]
supported_sources
=
[
pd
.
io
.
parsers
.
TextFileReader
,
io
.
StringIO
]
# Data types ------------------------------------------------------------------
numpy_integers
=
[
'int8'
,
'int16'
,
'int32'
,
'int64'
,
'uint8'
,
'uint16'
,
'uint32'
,
'uint64'
]
...
...
This diff is collapsed.
Click to expand it.
read.py
View file @
4cc0113b
...
...
@@ -3,7 +3,7 @@
"""
Created on Tue Apr 30 09:38:17 2019
Reads source data (file
, pandas DataFrame or pd.io.parsers.TextFileReader
) to
Reads source data (file) to
a pandas DataFrame. The source data model needs to be input to the module as
a named model (included in the module) or as the path to a valid data model.
...
...
@@ -109,14 +109,9 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
if
not
data_model
and
not
data_model_path
:
logging
.
error
(
'A valid data model name or path to data model must be provided'
)
return
if
not
isinstance
(
source
,
tuple
(
properties
.
supported_sources
)):
if
not
source
:
logging
.
error
(
'Data source is empty (first argument to read()) '
)
return
elif
not
os
.
path
.
isfile
(
source
):
logging
.
error
(
'Could not open data source file {}'
.
format
(
source
))
logging
.
info
(
'If input source was not a file: supported in-memory data sources are {}'
.
format
(
","
.
join
([
str
(
x
)
for
x
in
properties
.
supported_sources
])))
return
if
not
os
.
path
.
isfile
(
source
):
logging
.
error
(
'Can
\'
t find input data file {}'
.
format
(
source
))
return
if
not
validate_arg
(
'sections'
,
sections
,
list
):
return
if
not
validate_arg
(
'chunksize'
,
chunksize
,
int
):
...
...
This diff is collapsed.
Click to expand it.
reader/import_data.py
View file @
4cc0113b
...
...
@@ -4,14 +4,11 @@
Created on Fri Jan 10 13:17:43 2020
FUNCTION TO PREPARE SOURCE DATA TO WHAT GET_SECTIONS() EXPECTS:
AN ITERABLE WITH DATAFR
M
AES
AN ITERABLE WITH DATAFRA
M
ES
INPUT IS EITHER:
- pd.io.parsers.textfilereader
- io.StringIO
- file path
INPUT IS EITHER NOW ONLY A FILE PATH
OUTPUT IS AN ITERABLE, DEPENDING ON
SOURCE TYPE AND
CHUNKSIZE BEING SET:
OUTPUT IS AN ITERABLE, DEPENDING ON CHUNKSIZE BEING SET:
- a single dataframe in a list
- a pd.io.parsers.textfilereader
...
...
@@ -23,10 +20,6 @@ to be stripped
@author: iregon
DEV NOTES:
1) What this module is able to ingest needs to align with properties.supported_sources
2) Check io.StringIO input: why there, does it actually work as it is?
3) Check pd.io.parsers.textfilereader input: why there, does it actually work as it is?
OPTIONS IN OLD DEVELOPMENT:
...
...
@@ -49,22 +42,12 @@ import io
from
..
import
properties
def
to_iterable_df
(
source
,
skiprows
=
None
,
chunksize
=
None
):
TextParser
=
pd
.
read_fwf
(
source
,
widths
=
[
properties
.
MAX_FULL_REPORT_WIDTH
],
header
=
None
,
delimiter
=
"
\t
"
,
skiprows
=
skiprows
,
chunksize
=
chunksize
)
if
not
chunksize
:
TextParser
=
[
TextParser
]
return
TextParser
def
import_data
(
source
,
chunksize
=
None
,
skiprows
=
None
):
if
isinstance
(
source
,
pd
.
io
.
parsers
.
TextFileReader
):
return
source
elif
isinstance
(
source
,
io
.
StringIO
):
TextParser
=
to_iterable_df
(
source
,
skiprows
=
skiprows
,
chunksize
=
chunksize
)
return
TextParser
elif
os
.
path
.
isfile
(
source
):
TextParser
=
to_iterable_df
(
source
,
skiprows
=
skiprows
,
chunksize
=
chunksize
)
if
os
.
path
.
isfile
(
source
):
TextParser
=
pd
.
read_fwf
(
source
,
widths
=
[
properties
.
MAX_FULL_REPORT_WIDTH
],
header
=
None
,
delimiter
=
"
\t
"
,
skiprows
=
skiprows
,
chunksize
=
chunksize
)
if
not
chunksize
:
TextParser
=
[
TextParser
]
return
TextParser
else
:
print
(
'Error'
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment