From dc6a92f7bd0095629c08817e2045d0f8779add1d Mon Sep 17 00:00:00 2001 From: perezgonzalez-irene <iregon@noc.ac.uk> Date: Mon, 24 Feb 2020 09:45:15 +0000 Subject: [PATCH] New comments --- schemas/schemas.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/schemas/schemas.py b/schemas/schemas.py index 680e548..da0bc38 100644 --- a/schemas/schemas.py +++ b/schemas/schemas.py @@ -3,7 +3,14 @@ """ Created on Thu Sep 13 15:14:51 2018 -Read data file format json schema to dictionary +.read_schema: read data model json schema to dictionary + +.df_schema: create a simple version of the schema reflecting only relevant attributes +of the data elements after being read into a dataframe + +.templates: get list of available schema file templates + +.copy_templates: get a copy of a schema file template """ @@ -29,7 +36,8 @@ templates_path = os.path.join(schema_lib,'templates','schemas') def read_schema(schema_name = None, ext_schema_path = None): - + + # 1. Validate input if schema_name: if schema_name not in properties.supported_data_models: print('ERROR: \n\tInput data model "{}" not supported. See mdf_reader.properties.supported_data_models for supported data models'.format(schema_name)) @@ -44,24 +52,27 @@ def read_schema(schema_name = None, ext_schema_path = None): if not os.path.isfile(schema_file): logging.error('Can\'t find input schema file {}'.format(schema_file)) return + + # 2. Get schema with open(schema_file) as fileObj: schema = json.load(fileObj) + + # 3. Expand schema + # Fill in the initial schema to "full complexity": to homogeneize schema, + # explicitly add info that is implicit to given situations/data models - # --------------------------------------------------------------------------- - # FILL IN THE INITIAL SCHEMA TO "FULL COMPLEXITY" TO HOMOGEINIZE - # EXPLICITY ADD INFO THAT IS IMPLICIT TO GIVEN SITUATIONS/SUBFORMATS - # --------------------------------------------------------------------------- # One report per record: make sure later changes are reflected in MULTIPLE # REPORTS PER RECORD case below if we ever use it! - # Currently only suppoerted case: one report per record (line) - # First check for no header case: sequential sections + # Currently only supported case: one report per record (line) + # 3.1. First check for no header case: sequential sections if not schema['header']: if not schema['sections']: logging.error('\'sections\' block needs to be defined in a schema with no header. Error in data model schema file {}'.format(schema_file)) return schema['header'] = dict() + if not schema['header'].get('multiple_reports_per_line'): - # Make no section formats be 1 section format + # 3.2. Make no section formats be internally treated as 1 section format if not schema.get('sections'): if not schema.get('elements'): logging.error('Data elements not defined in data model schema file {} under key \'elements\' '.format(schema_file)) @@ -73,10 +84,10 @@ def read_schema(schema_name = None, ext_schema_path = None): schema['header'].pop('delimiter',None) schema['sections'][properties.dummy_level]['header']['field_layout'] = schema['header'].get('field_layout') schema['header'].pop('field_layout',None) - # Make parsing order explicit + # 3.3. Make parsing order explicit if not schema['header'].get('parsing_order'):# assume sequential schema['header']['parsing_order'] = [{'s':list(schema['sections'].keys())}] - # Make disable_read and field_layout explicit: this is ruled by delimiter or length being set, + # 3.4. Make disable_read and field_layout explicit: this is ruled by delimiter being set, # unless explicitly set for section in schema['sections'].keys(): if schema['sections'][section]['header'].get('disable_read'): @@ -138,7 +149,6 @@ def df_schema(df_columns, schema): return flat_schema - def templates(): schemas = glob.glob(os.path.join(templates_path,'*.json')) return [ os.path.basename(x).split(".")[0] for x in schemas ] -- GitLab