From 0383a95bc437bea2b2cb0f890b701337974a347c Mon Sep 17 00:00:00 2001
From: Dan Jones <danjon@noc.ac.uk>
Date: Tue, 3 Sep 2024 11:50:18 +0100
Subject: [PATCH] refactor: commit cache of remote refs

Only reload remote refs if absent or on request
This avoids the runtime resolving the live refs on startup
---
 generate_schema_config.py                     | 59 ++++++++++++++++---
 remotes/README.md                             | 13 ++++
 remotes/geojson.org/schema/Feature.json       |  1 +
 .../geojson.org/schema/FeatureCollection.json |  1 +
 remotes/geojson.org/schema/LineString.json    |  1 +
 .../geojson.org/schema/MultiLineString.json   |  1 +
 remotes/geojson.org/schema/MultiPoint.json    |  1 +
 remotes/geojson.org/schema/MultiPolygon.json  |  1 +
 remotes/geojson.org/schema/Point.json         |  1 +
 remotes/geojson.org/schema/Polygon.json       |  1 +
 10 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 remotes/README.md
 create mode 100644 remotes/geojson.org/schema/Feature.json
 create mode 100644 remotes/geojson.org/schema/FeatureCollection.json
 create mode 100644 remotes/geojson.org/schema/LineString.json
 create mode 100644 remotes/geojson.org/schema/MultiLineString.json
 create mode 100644 remotes/geojson.org/schema/MultiPoint.json
 create mode 100644 remotes/geojson.org/schema/MultiPolygon.json
 create mode 100644 remotes/geojson.org/schema/Point.json
 create mode 100644 remotes/geojson.org/schema/Polygon.json

diff --git a/generate_schema_config.py b/generate_schema_config.py
index cb3b238..09687e3 100644
--- a/generate_schema_config.py
+++ b/generate_schema_config.py
@@ -19,6 +19,7 @@ import json
 import os
 import re
 import requests
+from urllib.parse import urlparse
 
 
 # Enable running on domain sub-path
@@ -31,7 +32,10 @@ FLASK_PORT = os.getenv("FLASK_PORT", 5000)
 FLASK_DEBUG = os.getenv("FLASK_DEBUG", "False").lower() in ("true", "1", "t")
 
 
-def get_swagger_config():
+def get_swagger_config(reload=False):
+    if reload:
+        print("Reload specified: Ignoring cached refs")
+        
     swagger_config = {
         "openapi": "3.0.2",
         "swagger_ui": True,
@@ -115,7 +119,7 @@ def get_swagger_config():
             }
         },
     }
-    import_remote_refs(swagger_config)
+    import_remote_refs(swagger_config, reload)
     return swagger_config
 
 
@@ -191,7 +195,30 @@ def downgrade_schema_30x_compatible(schema):
                 downgrade_schema_30x_compatible(propConfig["items"])
 
 
-def inject_schema(schema, remote_ref):
+def get_remote_ref_cache_path(remote_ref): 
+    parsed_ref = urlparse(remote_ref)
+    return f"remotes/{parsed_ref.hostname}{parsed_ref.path}"
+
+
+def get_cached_ref(remote_ref): 
+    ref_path = get_remote_ref_cache_path(remote_ref)
+    ref = None
+    if os.path.exists(ref_path):
+        print(f"loading cached ref: {remote_ref}")
+        with open(ref_path, 'r') as ref_file:
+            ref = json.load(ref_file)
+    return ref
+
+
+def store_cached_ref(remote_ref, definition):
+    ref_path = get_remote_ref_cache_path(remote_ref)
+    ref_dirs = re.sub(r'\/[^\/]+$', '', ref_path)
+    os.makedirs(ref_dirs, 0o775, True)
+    with open(ref_path, 'w') as ref_file:
+        json.dump(definition, ref_file)
+
+
+def inject_schema(schema, remote_ref, reload=False):
     """
     Given a parent schema and a remote ref
 
@@ -204,8 +231,14 @@ def inject_schema(schema, remote_ref):
     """
     local_name = rename_ref(remote_ref)
     local_ref = f"#/components/schemas/{local_name}"
-    ref_schema = resolve_ref(remote_ref)
-    downgrade_schema_30x_compatible(ref_schema)
+    # get schema from cache if present
+    ref_schema = None if reload else get_cached_ref(remote_ref)
+    if not ref_schema:
+        print(f"ref not cached: {remote_ref}") 
+        ref_schema = resolve_ref(remote_ref)
+        downgrade_schema_30x_compatible(ref_schema)
+        store_cached_ref(remote_ref, ref_schema)
+
     if ref_schema is not None:
         nested_replace(schema, "$ref", remote_ref, local_ref)
         schema["components"]["schemas"][local_name] = ref_schema
@@ -214,7 +247,7 @@ def inject_schema(schema, remote_ref):
         return False
 
 
-def import_remote_refs(swagger_config):
+def import_remote_refs(swagger_config, reload=False):
     """
     inject the following remote refs into the schema
     and replace the remote refs with local refs
@@ -235,7 +268,7 @@ def import_remote_refs(swagger_config):
         "https://geojson.org/schema/Polygon.json",
     ]
 
-    return all([inject_schema(swagger_config, ref) for ref in ref_imports])
+    return all([inject_schema(swagger_config, ref, reload) for ref in ref_imports])
 
 
 def configure_flask(swagger_config):
@@ -339,6 +372,14 @@ def get_options():
         help="Save output to schema file",
         default=False,
     )
+    parser.add_argument(
+        "-r",
+        "--reload",
+        dest="reload_schemas",
+        action="store_true",
+        help="Overwrite local copies of remote reference schemas",
+        default=False,
+    )
     parser.add_argument("filename", nargs="?", default="project/soar/swagger.json")
     args = parser.parse_args()
     config = vars(args)
@@ -349,11 +390,11 @@ def get_options():
 
 
 if __name__ == "__main__":
-    swagger_config = get_swagger_config()
-
     # Parse script args
     config = get_options()
 
+    swagger_config = get_swagger_config(config.get('reload_schemas'))
+
     # Output compiled schema
     if config.get("output_file"):
         write_schema(swagger_config, config.get("filename"))
diff --git a/remotes/README.md b/remotes/README.md
new file mode 100644
index 0000000..ecfffd4
--- /dev/null
+++ b/remotes/README.md
@@ -0,0 +1,13 @@
+# Local store for remote refs 
+
+You don't want to retrieve remote refs live for reliability and change
+control.
+
+The schema retains the references to the remotes.
+
+The actual schemas are stored and committed locally.
+
+This means if there are breaking changes you can decide how and when to
+move to the new definitions.
+
+The validation is then running against a collection of local refs.
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/Feature.json b/remotes/geojson.org/schema/Feature.json
new file mode 100644
index 0000000..13e3a55
--- /dev/null
+++ b/remotes/geojson.org/schema/Feature.json
@@ -0,0 +1 @@
+{"title": "GeoJSON Feature", "type": "object", "required": ["type", "properties", "geometry"], "properties": {"type": {"type": "string", "enum": ["Feature"]}, "id": {"oneOf": [{"type": "number"}, {"type": "string"}]}, "properties": {"oneOf": [{"type": "object"}]}, "geometry": {"oneOf": [{"title": "GeoJSON Point", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Point"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "number"}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON LineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["LineString"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON Polygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Polygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPoint", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPoint"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiLineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiLineString"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPolygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPolygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON GeometryCollection", "type": "object", "required": ["type", "geometries"], "properties": {"type": {"type": "string", "enum": ["GeometryCollection"]}, "geometries": {"type": "array", "items": {"oneOf": [{"title": "GeoJSON Point", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Point"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "number"}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON LineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["LineString"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON Polygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Polygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPoint", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPoint"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiLineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiLineString"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPolygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPolygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}]}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}]}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/FeatureCollection.json b/remotes/geojson.org/schema/FeatureCollection.json
new file mode 100644
index 0000000..ecdfee1
--- /dev/null
+++ b/remotes/geojson.org/schema/FeatureCollection.json
@@ -0,0 +1 @@
+{"title": "GeoJSON FeatureCollection", "type": "object", "required": ["type", "features"], "properties": {"type": {"type": "string", "enum": ["FeatureCollection"]}, "features": {"type": "array", "items": {"title": "GeoJSON Feature", "type": "object", "required": ["type", "properties", "geometry"], "properties": {"type": {"type": "string", "enum": ["Feature"]}, "id": {"oneOf": [{"type": "number"}, {"type": "string"}]}, "properties": {"oneOf": [{"type": "object"}]}, "geometry": {"oneOf": [{"title": "GeoJSON Point", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Point"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "number"}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON LineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["LineString"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON Polygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Polygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPoint", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPoint"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiLineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiLineString"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPolygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPolygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON GeometryCollection", "type": "object", "required": ["type", "geometries"], "properties": {"type": {"type": "string", "enum": ["GeometryCollection"]}, "geometries": {"type": "array", "items": {"oneOf": [{"title": "GeoJSON Point", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Point"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "number"}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON LineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["LineString"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON Polygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Polygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPoint", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPoint"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiLineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiLineString"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}, {"title": "GeoJSON MultiPolygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPolygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}]}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}]}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/LineString.json b/remotes/geojson.org/schema/LineString.json
new file mode 100644
index 0000000..a573e44
--- /dev/null
+++ b/remotes/geojson.org/schema/LineString.json
@@ -0,0 +1 @@
+{"title": "GeoJSON LineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["LineString"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/MultiLineString.json b/remotes/geojson.org/schema/MultiLineString.json
new file mode 100644
index 0000000..b459b30
--- /dev/null
+++ b/remotes/geojson.org/schema/MultiLineString.json
@@ -0,0 +1 @@
+{"title": "GeoJSON MultiLineString", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiLineString"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/MultiPoint.json b/remotes/geojson.org/schema/MultiPoint.json
new file mode 100644
index 0000000..8520dec
--- /dev/null
+++ b/remotes/geojson.org/schema/MultiPoint.json
@@ -0,0 +1 @@
+{"title": "GeoJSON MultiPoint", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPoint"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/MultiPolygon.json b/remotes/geojson.org/schema/MultiPolygon.json
new file mode 100644
index 0000000..4369fef
--- /dev/null
+++ b/remotes/geojson.org/schema/MultiPolygon.json
@@ -0,0 +1 @@
+{"title": "GeoJSON MultiPolygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["MultiPolygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/Point.json b/remotes/geojson.org/schema/Point.json
new file mode 100644
index 0000000..8c94619
--- /dev/null
+++ b/remotes/geojson.org/schema/Point.json
@@ -0,0 +1 @@
+{"title": "GeoJSON Point", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Point"]}, "coordinates": {"type": "array", "minItems": 2, "items": {"type": "number"}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
diff --git a/remotes/geojson.org/schema/Polygon.json b/remotes/geojson.org/schema/Polygon.json
new file mode 100644
index 0000000..dedbdca
--- /dev/null
+++ b/remotes/geojson.org/schema/Polygon.json
@@ -0,0 +1 @@
+{"title": "GeoJSON Polygon", "type": "object", "required": ["type", "coordinates"], "properties": {"type": {"type": "string", "enum": ["Polygon"]}, "coordinates": {"type": "array", "items": {"type": "array", "minItems": 4, "items": {"type": "array", "minItems": 2, "items": {"type": "number"}}}}, "bbox": {"type": "array", "minItems": 4, "items": {"type": "number"}}}}
\ No newline at end of file
-- 
GitLab