Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
mdf_reader
Commits
c0251df1
Commit
c0251df1
authored
5 years ago
by
iregon
Browse files
Options
Download
Email Patches
Plain Diff
Main functions as main in reader modules
parent
d510abd4
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
33 deletions
+34
-33
read.py
read.py
+6
-6
reader/get_sections.py
reader/get_sections.py
+1
-1
reader/import_data.py
reader/import_data.py
+1
-1
reader/read_sections.py
reader/read_sections.py
+26
-25
No files found.
read.py
View file @
c0251df1
...
...
@@ -31,9 +31,9 @@ from io import StringIO as StringIO
from
.data_models
import
schemas
from
.
import
properties
from
.common
import
pandas_TextParser_hdlr
from
.reader
import
import_data
from
.reader
import
get_sections
from
.
reader.read
_sections
import
main
as
read_sections
#
from .reader import import_data
#
from .reader import get_sections
from
mdf_
reader.read
er
import
import_data
,
get_sections
,
read_sections
from
.validator
import
validate
toolPath
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
...
...
@@ -77,13 +77,13 @@ def ERV(TextParser,read_sections_list, schema, code_tables_path):
# - requested NA sections as NaN columns
# - columns(sections) order as in read_sections_list
sections_df
=
get_sections
.
get_sections
(
string_df
,
schema
,
read_sections_list
)
sections_df
=
get_sections
.
main
(
string_df
,
schema
,
read_sections_list
)
# 2. Read elements from sections: along data chunks, resulting data types
# may vary if gaps, keep track of data types: add Intxx pandas classes rather than intxx to avoid this!
# Sections are parsed in the same order as sections_df.columns
[
data_df
,
valid_df
,
out_dtypesi
]
=
read_sections
(
sections_df
,
schema
)
[
data_df
,
valid_df
,
out_dtypesi
]
=
read_sections
.
main
(
sections_df
,
schema
)
if
i_chunk
==
0
:
out_dtypes
=
copy
.
deepcopy
(
out_dtypesi
)
...
...
@@ -271,7 +271,7 @@ def main(source, data_model = None, data_model_path = None, sections = None,chun
# 2.2 Homogeneize input data to an iterable with dataframes:
# a list with a single dataframe or a pd.io.parsers.TextFileReader
logging
.
info
(
"Getting data string from source..."
)
TextParser
=
import_data
.
import_data
(
source
,
chunksize
=
chunksize
,
skiprows
=
skiprows
)
TextParser
=
import_data
.
main
(
source
,
chunksize
=
chunksize
,
skiprows
=
skiprows
)
# 2.3. Extract, read and validate data in same loop
logging
.
info
(
"Extracting and reading sections"
)
...
...
This diff is collapsed.
Click to expand it.
reader/get_sections.py
View file @
c0251df1
...
...
@@ -199,7 +199,7 @@ def extract_sections(string_df):
# ---------------------------------------------------------------------------
# MAIN
# ---------------------------------------------------------------------------
def
get_sections
(
string_df
,
schema
,
read_sections
):
def
main
(
string_df
,
schema
,
read_sections
):
global
sentinals
,
section_lens
,
sentinals_lens
global
parsing_order
# Proceed to split sections if more than one
...
...
This diff is collapsed.
Click to expand it.
reader/import_data.py
View file @
c0251df1
...
...
@@ -41,7 +41,7 @@ import os
from
..
import
properties
def
import_data
(
source
,
chunksize
=
None
,
skiprows
=
None
):
def
main
(
source
,
chunksize
=
None
,
skiprows
=
None
):
if
os
.
path
.
isfile
(
source
):
TextParser
=
pd
.
read_fwf
(
source
,
widths
=
[
properties
.
MAX_FULL_REPORT_WIDTH
],
header
=
None
,
delimiter
=
"
\t
"
,
skiprows
=
skiprows
,
chunksize
=
chunksize
)
...
...
This diff is collapsed.
Click to expand it.
reader/read_sections.py
View file @
c0251df1
...
...
@@ -68,6 +68,7 @@ def read_data(section_df,section_schema):
section_valid
=
pd
.
DataFrame
(
index
=
section_df
.
index
,
columns
=
section_df
.
columns
)
for
element
in
section_dtypes
.
keys
():
print
(
element
)
missing
=
section_df
[
element
].
isna
()
if
element
in
encoded
:
section_df
[
element
]
=
decoders
.
get
(
section_encoding
.
get
(
element
)).
get
(
section_dtypes
.
get
(
element
))(
section_df
[
element
])
...
...
@@ -79,7 +80,7 @@ def read_data(section_df,section_schema):
return
section_df
,
section_valid
def
read_sections
(
sections_df
,
schema
):
def
main
(
sections_df
,
schema
):
multiindex
=
True
if
len
(
sections_df
.
columns
)
>
1
or
sections_df
.
columns
[
0
]
!=
properties
.
dummy_level
else
False
data_df
=
pd
.
DataFrame
(
index
=
sections_df
.
index
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment