From dc6a92f7bd0095629c08817e2045d0f8779add1d Mon Sep 17 00:00:00 2001
From: perezgonzalez-irene <iregon@noc.ac.uk>
Date: Mon, 24 Feb 2020 09:45:15 +0000
Subject: [PATCH] New comments

---
 schemas/schemas.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/schemas/schemas.py b/schemas/schemas.py
index 680e548..da0bc38 100644
--- a/schemas/schemas.py
+++ b/schemas/schemas.py
@@ -3,7 +3,14 @@
 """
 Created on Thu Sep 13 15:14:51 2018
 
-Read data file format json schema to dictionary
+.read_schema: read data model json schema to dictionary
+
+.df_schema: create a simple version of the schema reflecting only relevant attributes
+of the data elements after being read into a dataframe
+
+.templates: get list of available schema file templates
+
+.copy_templates: get a copy of a schema file template
 
 """
 
@@ -29,7 +36,8 @@ templates_path = os.path.join(schema_lib,'templates','schemas')
 
 
 def read_schema(schema_name = None, ext_schema_path = None):
-
+    
+    # 1. Validate input
     if schema_name:
         if schema_name not in properties.supported_data_models:
             print('ERROR: \n\tInput data model "{}" not supported. See mdf_reader.properties.supported_data_models for supported data models'.format(schema_name))
@@ -44,24 +52,27 @@ def read_schema(schema_name = None, ext_schema_path = None):
     if not os.path.isfile(schema_file):
         logging.error('Can\'t find input schema file {}'.format(schema_file))
         return
+    
+    # 2. Get schema
     with open(schema_file) as fileObj:
         schema = json.load(fileObj)
+        
+    # 3. Expand schema
+    # Fill in the initial schema to "full complexity": to homogeneize schema,
+    # explicitly add info that is implicit to given situations/data models
 
-    #   ---------------------------------------------------------------------------
-    #   FILL IN THE INITIAL SCHEMA TO "FULL COMPLEXITY" TO HOMOGEINIZE
-    #   EXPLICITY ADD INFO THAT IS IMPLICIT TO GIVEN SITUATIONS/SUBFORMATS
-    #   ---------------------------------------------------------------------------
     # One report per record: make sure later changes are reflected in MULTIPLE
     # REPORTS PER RECORD case below if we ever use it!
-    # Currently only suppoerted case: one report per record (line)
-    # First check for no header case: sequential sections
+    # Currently only supported case: one report per record (line)
+    # 3.1. First check for no header case: sequential sections
     if not schema['header']:
         if not schema['sections']:
             logging.error('\'sections\' block needs to be defined in a schema with no header. Error in data model schema file {}'.format(schema_file))
             return
         schema['header'] = dict()
+        
     if not schema['header'].get('multiple_reports_per_line'):
-        # Make no section formats be 1 section format
+        # 3.2. Make no section formats be internally treated as 1 section format
         if not schema.get('sections'):
             if not schema.get('elements'):
                 logging.error('Data elements not defined in data model schema file {} under key \'elements\' '.format(schema_file))
@@ -73,10 +84,10 @@ def read_schema(schema_name = None, ext_schema_path = None):
             schema['header'].pop('delimiter',None)
             schema['sections'][properties.dummy_level]['header']['field_layout'] = schema['header'].get('field_layout')
             schema['header'].pop('field_layout',None)
-        # Make parsing order explicit
+        # 3.3. Make parsing order explicit
         if not schema['header'].get('parsing_order'):# assume sequential
             schema['header']['parsing_order'] = [{'s':list(schema['sections'].keys())}]
-        # Make disable_read and field_layout explicit: this is ruled by delimiter or length being set,
+        # 3.4. Make disable_read and field_layout explicit: this is ruled by delimiter being set,
         # unless explicitly set
         for section in schema['sections'].keys():
             if schema['sections'][section]['header'].get('disable_read'):
@@ -138,7 +149,6 @@ def df_schema(df_columns, schema):
 
     return flat_schema
 
-
 def templates():
     schemas = glob.glob(os.path.join(templates_path,'*.json'))
     return [ os.path.basename(x).split(".")[0] for x in schemas ]
-- 
GitLab