Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
mdf_reader
Commits
dc6a92f7
Commit
dc6a92f7
authored
5 years ago
by
iregon
Browse files
Options
Download
Email Patches
Plain Diff
New comments
parent
5578e606
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
12 deletions
+22
-12
schemas/schemas.py
schemas/schemas.py
+22
-12
No files found.
schemas/schemas.py
View file @
dc6a92f7
...
...
@@ -3,7 +3,14 @@
"""
Created on Thu Sep 13 15:14:51 2018
Read data file format json schema to dictionary
.read_schema: read data model json schema to dictionary
.df_schema: create a simple version of the schema reflecting only relevant attributes
of the data elements after being read into a dataframe
.templates: get list of available schema file templates
.copy_templates: get a copy of a schema file template
"""
...
...
@@ -29,7 +36,8 @@ templates_path = os.path.join(schema_lib,'templates','schemas')
def
read_schema
(
schema_name
=
None
,
ext_schema_path
=
None
):
# 1. Validate input
if
schema_name
:
if
schema_name
not
in
properties
.
supported_data_models
:
print
(
'ERROR:
\n\t
Input data model "{}" not supported. See mdf_reader.properties.supported_data_models for supported data models'
.
format
(
schema_name
))
...
...
@@ -44,24 +52,27 @@ def read_schema(schema_name = None, ext_schema_path = None):
if
not
os
.
path
.
isfile
(
schema_file
):
logging
.
error
(
'Can
\'
t find input schema file {}'
.
format
(
schema_file
))
return
# 2. Get schema
with
open
(
schema_file
)
as
fileObj
:
schema
=
json
.
load
(
fileObj
)
# 3. Expand schema
# Fill in the initial schema to "full complexity": to homogeneize schema,
# explicitly add info that is implicit to given situations/data models
# ---------------------------------------------------------------------------
# FILL IN THE INITIAL SCHEMA TO "FULL COMPLEXITY" TO HOMOGEINIZE
# EXPLICITY ADD INFO THAT IS IMPLICIT TO GIVEN SITUATIONS/SUBFORMATS
# ---------------------------------------------------------------------------
# One report per record: make sure later changes are reflected in MULTIPLE
# REPORTS PER RECORD case below if we ever use it!
# Currently only suppo
e
rted case: one report per record (line)
# First check for no header case: sequential sections
# Currently only supported case: one report per record (line)
#
3.1.
First check for no header case: sequential sections
if
not
schema
[
'header'
]:
if
not
schema
[
'sections'
]:
logging
.
error
(
'
\'
sections
\'
block needs to be defined in a schema with no header. Error in data model schema file {}'
.
format
(
schema_file
))
return
schema
[
'header'
]
=
dict
()
if
not
schema
[
'header'
].
get
(
'multiple_reports_per_line'
):
# Make no section formats be 1 section format
#
3.2.
Make no section formats be
internally treated as
1 section format
if
not
schema
.
get
(
'sections'
):
if
not
schema
.
get
(
'elements'
):
logging
.
error
(
'Data elements not defined in data model schema file {} under key
\'
elements
\'
'
.
format
(
schema_file
))
...
...
@@ -73,10 +84,10 @@ def read_schema(schema_name = None, ext_schema_path = None):
schema
[
'header'
].
pop
(
'delimiter'
,
None
)
schema
[
'sections'
][
properties
.
dummy_level
][
'header'
][
'field_layout'
]
=
schema
[
'header'
].
get
(
'field_layout'
)
schema
[
'header'
].
pop
(
'field_layout'
,
None
)
# Make parsing order explicit
#
3.3.
Make parsing order explicit
if
not
schema
[
'header'
].
get
(
'parsing_order'
):
# assume sequential
schema
[
'header'
][
'parsing_order'
]
=
[{
's'
:
list
(
schema
[
'sections'
].
keys
())}]
# Make disable_read and field_layout explicit: this is ruled by delimiter
or length
being set,
#
3.4.
Make disable_read and field_layout explicit: this is ruled by delimiter being set,
# unless explicitly set
for
section
in
schema
[
'sections'
].
keys
():
if
schema
[
'sections'
][
section
][
'header'
].
get
(
'disable_read'
):
...
...
@@ -138,7 +149,6 @@ def df_schema(df_columns, schema):
return
flat_schema
def
templates
():
schemas
=
glob
.
glob
(
os
.
path
.
join
(
templates_path
,
'*.json'
))
return
[
os
.
path
.
basename
(
x
).
split
(
"."
)[
0
]
for
x
in
schemas
]
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment