Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
mdf_reader
Commits
d4f355e4
Commit
d4f355e4
authored
5 years ago
by
iregon
Browse files
Options
Download
Email Patches
Plain Diff
Fixed chunking
parent
c28e5b5e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
10 deletions
+27
-10
read.py
read.py
+25
-8
reader/import_data.py
reader/import_data.py
+2
-2
No files found.
read.py
View file @
d4f355e4
...
...
@@ -85,16 +85,33 @@ def read(source, data_model = None, data_model_path = None, sections = None,chun
# 6. Output to files if requested
if
out_path
:
enlisted
=
False
if
not
isinstance
(
data
,
pd
.
io
.
parsers
.
TextFileReader
):
data
=
[
data
]
valid
=
[
valid
]
enlisted
=
True
logging
.
info
(
'WRITING DATA TO FILES IN: {}'
.
format
(
out_path
))
cols
=
[
x
for
x
in
data
]
if
isinstance
(
cols
[
0
],
tuple
):
header
=
[
":"
.
join
(
x
)
for
x
in
cols
]
out_atts_json
=
{
":"
.
join
(
x
):
out_atts
.
get
(
x
)
for
x
in
out_atts
.
keys
()
}
for
i
,
(
data_df
,
valid_df
)
in
enumerate
(
zip
(
data
,
valid
)):
header
=
False
mode
=
'a'
if
i
==
0
:
mode
=
'w'
cols
=
[
x
for
x
in
data_df
]
if
isinstance
(
cols
[
0
],
tuple
):
header
=
[
":"
.
join
(
x
)
for
x
in
cols
]
out_atts_json
=
{
":"
.
join
(
x
):
out_atts
.
get
(
x
)
for
x
in
out_atts
.
keys
()
}
else
:
header
=
cols
out_atts_json
=
out_atts
data_df
.
to_csv
(
os
.
path
.
join
(
out_path
,
'data.csv'
),
header
=
header
,
mode
=
mode
,
encoding
=
'utf-8'
,
index
=
True
,
index_label
=
'index'
)
valid_df
.
to_csv
(
os
.
path
.
join
(
out_path
,
'valid_mask.csv'
),
header
=
header
,
mode
=
mode
,
encoding
=
'utf-8'
,
index
=
True
,
index_label
=
'index'
)
if
enlisted
:
data
=
data
[
0
]
valid
=
valid
[
0
]
else
:
header
=
cols
out_atts_json
=
out_atts
data
.
to_csv
(
os
.
path
.
join
(
out_path
,
'data.csv'
),
header
=
header
,
encoding
=
'utf-8'
,
index
=
True
,
index_label
=
'index'
)
valid
.
to_csv
(
os
.
path
.
join
(
out_path
,
'valid_mask.csv'
),
header
=
header
,
encoding
=
'utf-8'
,
index
=
True
,
index_label
=
'index'
)
data
=
pandas_TextParser_hdlr
.
restore
(
data
.
f
,
data
.
orig_options
)
valid
=
pandas_TextParser_hdlr
.
restore
(
valid
.
f
,
valid
.
orig_options
)
with
open
(
os
.
path
.
join
(
out_path
,
'atts.json'
),
'w'
)
as
fileObj
:
json
.
dump
(
out_atts_json
,
fileObj
,
indent
=
4
)
...
...
This diff is collapsed.
Click to expand it.
reader/import_data.py
View file @
d4f355e4
...
...
@@ -61,10 +61,10 @@ def import_data(source,chunksize = None, skiprows = None):
if
isinstance
(
source
,
pd
.
io
.
parsers
.
TextFileReader
):
return
source
elif
isinstance
(
source
,
io
.
StringIO
):
TextParser
=
to_iterable_df
(
source
,
skiprows
=
None
,
chunksize
=
Non
e
)
TextParser
=
to_iterable_df
(
source
,
skiprows
=
None
,
chunksize
=
chunksiz
e
)
return
TextParser
elif
os
.
path
.
isfile
(
source
):
TextParser
=
to_iterable_df
(
source
,
skiprows
=
None
,
chunksize
=
Non
e
)
TextParser
=
to_iterable_df
(
source
,
skiprows
=
None
,
chunksize
=
chunksiz
e
)
return
TextParser
else
:
print
(
'Error'
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment