diff --git a/docs/.gitignore b/docs/.gitignore
index a7084b1553e6aea5154c0ac53cda87d2d8d77e5e..e87b171c2d8d1044ff08d891350e97c55a7f2f6c 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,3 +1,3 @@
-notebooks/.ipynb_checkpoints/
-!.gitignore
-!User_manual.docx
+*.swp
+/_build
+/doctrees
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_static/images/elements.png b/docs/_static/images/elements.png
new file mode 100644
index 0000000000000000000000000000000000000000..581126b12d82193dafdf613ffa7063021db57e0b
Binary files /dev/null and b/docs/_static/images/elements.png differ
diff --git a/docs/_static/images/fig1.png b/docs/_static/images/fig1.png
new file mode 100644
index 0000000000000000000000000000000000000000..433494b8d4fbf9d3eb8b9ebc2d76ee202dbdba5c
Binary files /dev/null and b/docs/_static/images/fig1.png differ
diff --git a/docs/_static/images/logos_c3s/LOGO_2020_-_NOC_1_COLOUR.png b/docs/_static/images/logos_c3s/LOGO_2020_-_NOC_1_COLOUR.png
new file mode 100644
index 0000000000000000000000000000000000000000..df2e606f50341e9574068865acefa37db257d46b
Binary files /dev/null and b/docs/_static/images/logos_c3s/LOGO_2020_-_NOC_1_COLOUR.png differ
diff --git a/docs/_static/images/logos_c3s/copernicus.png b/docs/_static/images/logos_c3s/copernicus.png
new file mode 100644
index 0000000000000000000000000000000000000000..411537f894078bf843f81c8d56ca2f2671349f44
Binary files /dev/null and b/docs/_static/images/logos_c3s/copernicus.png differ
diff --git a/docs/_static/images/logos_c3s/icoadsLogo.png b/docs/_static/images/logos_c3s/icoadsLogo.png
new file mode 100644
index 0000000000000000000000000000000000000000..7f636576a8a45bcb901cd4231f5cd971882984c6
Binary files /dev/null and b/docs/_static/images/logos_c3s/icoadsLogo.png differ
diff --git a/docs/_static/images/logos_c3s/logo_c3s-392x154.png b/docs/_static/images/logos_c3s/logo_c3s-392x154.png
new file mode 100644
index 0000000000000000000000000000000000000000..353576b874ff19c66b8fcf79d2cfdc53022be281
Binary files /dev/null and b/docs/_static/images/logos_c3s/logo_c3s-392x154.png differ
diff --git a/docs/_static/images/mdf_reader_diagram.svg b/docs/_static/images/mdf_reader_diagram.svg
new file mode 100644
index 0000000000000000000000000000000000000000..63eb4a1f45a9d60452057c609358b8c30872ffeb
--- /dev/null
+++ b/docs/_static/images/mdf_reader_diagram.svg
@@ -0,0 +1 @@
+<svg id="mermaid-1620374813353" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" height="524.193359375" style="max-width: 852.703125px;" viewBox="0 0 852.703125 524.193359375"><style>#mermaid-1620374813353{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}#mermaid-1620374813353 .error-icon{fill:#552222;}#mermaid-1620374813353 .error-text{fill:#552222;stroke:#552222;}#mermaid-1620374813353 .edge-thickness-normal{stroke-width:2px;}#mermaid-1620374813353 .edge-thickness-thick{stroke-width:3.5px;}#mermaid-1620374813353 .edge-pattern-solid{stroke-dasharray:0;}#mermaid-1620374813353 .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-1620374813353 .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-1620374813353 .marker{fill:#333333;stroke:#333333;}#mermaid-1620374813353 .marker.cross{stroke:#333333;}#mermaid-1620374813353 svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-1620374813353 .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-1620374813353 .cluster-label text{fill:#333;}#mermaid-1620374813353 .cluster-label span{color:#333;}#mermaid-1620374813353 .label text,#mermaid-1620374813353 span{fill:#333;color:#333;}#mermaid-1620374813353 .node rect,#mermaid-1620374813353 .node circle,#mermaid-1620374813353 .node ellipse,#mermaid-1620374813353 .node polygon,#mermaid-1620374813353 .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-1620374813353 .node .label{text-align:center;}#mermaid-1620374813353 .node.clickable{cursor:pointer;}#mermaid-1620374813353 .arrowheadPath{fill:#333333;}#mermaid-1620374813353 .edgePath .path{stroke:#333333;stroke-width:1.5px;}#mermaid-1620374813353 .flowchart-link{stroke:#333333;fill:none;}#mermaid-1620374813353 .edgeLabel{background-color:#e8e8e8;text-align:center;}#mermaid-1620374813353 .edgeLabel rect{opacity:0.5;background-color:#e8e8e8;fill:#e8e8e8;}#mermaid-1620374813353 .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-1620374813353 .cluster text{fill:#333;}#mermaid-1620374813353 .cluster span{color:#333;}#mermaid-1620374813353 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(80,100%,96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-1620374813353:root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}#mermaid-1620374813353 flowchart{fill:apa;}</style><g><g class="output"><g class="clusters"><g class="cluster" id="flowchart-data_models-9898" transform="translate(304.6796875,126)" style="opacity: 1;"><rect style="fill:#ffffff;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;" width="593.359375" height="236" x="-296.6796875" y="-118"></rect><g class="label" transform="translate(0, -104)" id="mermaid-1620374813353Text"><g style="text-align: center;" transform="translate(-45.5859375,-9.5)"><foreignObject width="91.171875" height="19"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">data_models</div></foreignObject></g></g></g><g class="cluster" id="flowchart-Data-9899" transform="translate(164.4765625,398.193359375)" style="opacity: 1;"><rect style="fill:#ffffff;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;" width="312.953125" height="236" x="-156.4765625" y="-118"></rect><g class="label" transform="translate(0, -104)" id="mermaid-1620374813353Text"><g style="text-align: center;" transform="translate(-16.484375,-9.5)"><foreignObject width="32.96875" height="19"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">Data</div></foreignObject></g></g></g></g><g class="edgePaths"><g class="edgePath LS-B LE-H" id="L-B-H" style="opacity: 1;"><path class="path" d="M213.90458079592597,134.806640625L231.7460048299383,124.3388671875C249.58742886395066,113.87109375,285.27027693197533,92.935546875,307.27836763265435,82.4677734375C329.2864583333333,72,337.6197916666667,72,345.953125,72C354.2864583333333,72,362.6197916666667,72,366.7864583333333,72L370.953125,72" marker-end="url(#arrowhead7605)" style="fill:none"></path><defs><marker id="arrowhead7605" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g><g class="edgePath LS-B LE-E" id="L-B-E" style="opacity: 1;"><path class="path" d="M229.671875,170.55353682472915L244.88541666666666,172.12794735394095C260.0989583333333,173.70235788315276,290.5260416666667,176.85117894157636,309.90625,178.4255894707882C329.2864583333333,180,337.6197916666667,180,349.2421875,180C360.8645833333333,180,375.7760416666667,180,383.2317708333333,180L390.6875,180" marker-end="url(#arrowhead7606)" style="fill:none"></path><defs><marker id="arrowhead7606" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g><g class="edgePath LS-one LE-mdf_reader" id="L-one-mdf_reader" style="opacity: 1;"><path class="path" d="M223.96875,344.193359375L240.1328125,344.193359375C256.296875,344.193359375,288.625,344.193359375,308.9557291666667,344.193359375C329.2864583333333,344.193359375,337.6197916666667,344.193359375,351.1156654719489,345.4491131738844C364.6115392772311,346.7048669727689,383.26995355446223,349.21637457053777,392.5991606930777,350.4721283694223L401.92836783169327,351.72788216830673" marker-end="url(#arrowhead7607)" style="fill:none"></path><defs><marker id="arrowhead7607" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g><g class="edgePath LS-two LE-mdf_reader" id="L-two-mdf_reader" style="opacity: 1;"><path class="path" d="M295.953125,452.193359375L300.1197916666667,452.193359375C304.2864583333333,452.193359375,312.6197916666667,452.193359375,320.953125,452.193359375C329.2864583333333,452.193359375,337.6197916666667,452.193359375,355.2623201419845,442.6487133711512C372.9048486173024,433.1040673673024,399.85657223460476,414.01477535960476,413.33243404325594,404.470129355756L426.8082958519072,394.9254833519072" marker-end="url(#arrowhead7608)" style="fill:none"></path><defs><marker id="arrowhead7608" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g><g class="edgePath LS-B LE-mdf_reader" id="L-B-mdf_reader" style="opacity: 1;"><path class="path" d="M229.671875,183.88620445666282L244.88541666666666,188.57183704721902C260.0989583333333,193.25746963777522,290.5260416666667,202.62873481888764,309.90625,207.3143674094438C329.2864583333333,212,337.6197916666667,212,356.8795177109419,229.52412812239137C376.13924375521725,247.04825624478272,406.32536251043456,282.0965124895655,421.41842188804316,299.62064061195684L436.51148126565175,317.14476873434825" marker-end="url(#arrowhead7609)" style="fill:none"></path><defs><marker id="arrowhead7609" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g><g class="edgePath LS-mdf_reader LE-D" id="L-mdf_reader-D" style="opacity: 1;"><path class="path" d="M555.54296875,360.88671875L563.1790364583334,360.8033854166667C570.8151041666666,360.7200520833333,586.0872395833334,360.5533854166667,597.8899739583334,360.4700520833333C609.6927083333334,360.38671875,618.0260416666666,360.38671875,626.359375,360.38671875C634.6927083333334,360.38671875,643.0260416666666,360.38671875,647.1927083333334,360.38671875L651.359375,360.38671875" marker-end="url(#arrowhead7610)" style="fill:none"></path><defs><marker id="arrowhead7610" viewBox="0 0 10 10" refX="9" refY="5" markerUnits="strokeWidth" markerWidth="8" markerHeight="6" orient="auto"><path d="M 0 0 L 10 5 L 0 10 z" class="arrowheadPath" style="stroke-width: 1; stroke-dasharray: 1, 0;"></path></marker></defs></g></g><g class="edgeLabels"><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-B-H" class="edgeLabel L-LS-B' L-LE-H"></span></div></foreignObject></g></g><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-B-E" class="edgeLabel L-LS-B' L-LE-E"></span></div></foreignObject></g></g><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-one-mdf_reader" class="edgeLabel L-LS-one' L-LE-mdf_reader"></span></div></foreignObject></g></g><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-two-mdf_reader" class="edgeLabel L-LS-two' L-LE-mdf_reader"></span></div></foreignObject></g></g><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-B-mdf_reader" class="edgeLabel L-LS-B' L-LE-mdf_reader"></span></div></foreignObject></g></g><g class="edgeLabel" transform="" style="opacity: 1;"><g transform="translate(0,0)" class="label"><rect rx="0" ry="0" width="0" height="0"></rect><foreignObject width="0" height="0"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;"><span id="L-L-mdf_reader-D" class="edgeLabel L-LS-mdf_reader' L-LE-D"></span></div></foreignObject></g></g></g><g class="nodes"><g class="node default" id="flowchart-H-9887" transform="translate(473.65625,72)" style="opacity: 1;"><rect rx="0" ry="0" x="-102.703125" y="-29" width="205.40625" height="58" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-92.703125,-19)"><foreignObject width="185.40625" height="38"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">code_tables <br/> default: ICOADS.keycodes</div></foreignObject></g></g></g><g class="node default" id="flowchart-B-9886" transform="translate(164.4765625,163.806640625)" style="opacity: 1;"><rect rx="0" ry="0" x="-65.1953125" y="-29" width="130.390625" height="58" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-55.1953125,-19)"><foreignObject width="110.390625" height="38"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">Schema <br/> default: imma1</div></foreignObject></g></g></g><g class="node default" id="flowchart-E-9889" transform="translate(473.65625,180)" style="opacity: 1;"><rect rx="0" ry="0" x="-82.96875" y="-29" width="165.9375" height="58" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-72.96875,-19)"><foreignObject width="145.9375" height="38"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">schema.json <br/> default: imma1.json</div></foreignObject></g></g></g><g class="node default" id="flowchart-one-9884" transform="translate(164.4765625,344.193359375)" style="opacity: 1;"><rect rx="0" ry="0" x="-59.4921875" y="-29" width="118.984375" height="58" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-49.4921875,-19)"><foreignObject width="98.984375" height="38"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">ICOADS files <br/> .imma format</div></foreignObject></g></g></g><g class="node default" id="flowchart-two-9885" transform="translate(164.4765625,452.193359375)" style="opacity: 1;"><rect rx="0" ry="0" x="-131.4765625" y="-29" width="262.953125" height="58" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-121.4765625,-19)"><foreignObject width="242.953125" height="38"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">any other data stored <br/> in a fix-width or delimited format</div></foreignObject></g></g></g><g class="node default" id="flowchart-mdf_reader-9891" transform="translate(473.65625,360.38671875)" style="opacity: 1;"><polygon points="81.38671875,0 162.7734375,-81.38671875 81.38671875,-162.7734375 0,-81.38671875" transform="translate(-81.38671875,81.38671875)" class="label-container" style="fill:#fcc679;stroke:#333;stroke-width:1px;font-size:20px;font-weight:100;"></polygon><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-60.9296875,-9.5)"><foreignObject width="121.859375" height="19"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">mdf_reader.read</div></foreignObject></g></g></g><g class="node default" id="flowchart-D-9897" transform="translate(748.03125,360.38671875)" style="opacity: 1;"><rect rx="0" ry="0" x="-96.671875" y="-57.5" width="193.34375" height="115" class="label-container" style="fill:#e8eaf6;stroke:#333;stroke-width:1px;font-size:20px;font-weight:500;"></rect><g class="label" transform="translate(0,0)"><g style="text-align: center;" transform="translate(-86.671875,-47.5)"><foreignObject width="173.34375" height="95"><div xmlns="http://www.w3.org/1999/xhtml" style="display: inline-block; white-space: nowrap;">Output: <br/> <br/> pandas.Dataframe <br/> structured according to  <br/> the schema</div></foreignObject></g></g></g></g></g></g></svg>
\ No newline at end of file
diff --git a/docs/_static/images/new_schema.png b/docs/_static/images/new_schema.png
new file mode 100644
index 0000000000000000000000000000000000000000..2343601608e077078b56a51ec745afacb0a2220c
Binary files /dev/null and b/docs/_static/images/new_schema.png differ
diff --git a/docs/_static/images/schema.png b/docs/_static/images/schema.png
new file mode 100644
index 0000000000000000000000000000000000000000..bd89b8f94531bcb7c12d760d4b81c6e444f4c82a
Binary files /dev/null and b/docs/_static/images/schema.png differ
diff --git a/docs/buildDocs.sh b/docs/buildDocs.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f556857916f7dd77f4078f61862c087c8c38d154
--- /dev/null
+++ b/docs/buildDocs.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+set -x
+################################################################################
+# File:    buildDocs.sh
+# Purpose: Script that builds our documentation using sphinx and updates GitHub
+#          Pages. This script is executed by:
+#            .github/workflows/docs_pages_workflow.yml
+#
+# Authors: Beatriz Recinos <beatriz.recinos.rivas@noc.ac.uk>
+# Created: 2021-06-24
+# Updated: 2021-06-24
+# Version: 0.1
+################################################################################
+ 
+###################
+# INSTALL DEPENDS #
+###################
+ 
+apt-get update
+apt-get -y install git rsync python3-sphinx python3-sphinx-rtd-theme
+
+
+#####################
+# DECLARE VARIABLES #
+#####################
+ 
+pwd
+ls -lah
+export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
+ 
+##############
+# BUILD DOCS #
+##############
+ 
+# build our documentation with sphinx (see docs/conf.py)
+# * https://www.sphinx-doc.org/en/master/usage/quickstart.html#running-the-build
+make -C docs clean
+make -C docs html
+ 
+#######################
+# Update GitHub Pages #
+#######################
+ 
+git config --global user.name "${GITHUB_ACTOR}"
+git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
+ 
+docroot=`mktemp -d`
+rsync -av "docs/_build/html/" "${docroot}/"
+ 
+pushd "${docroot}"
+ 
+# don't bother maintaining history; just generate fresh
+git init
+git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
+git checkout -b gh-pages
+ 
+# add .nojekyll to the root so that github won't 404 on content added to dirs
+# that start with an underscore (_), such as our "_content" dir..
+touch .nojekyll
+ 
+# Add README
+cat > README.md <<EOF
+# GitHub Pages Cache
+ 
+Nothing to see here. The contents of this branch are essentially a cache that's not intended to be viewed on github.com.
+ 
+ 
+If you're looking to update our documentation, check the relevant development branch's 'docs/' dir.
+ 
+EOF
+ 
+# copy the resulting html pages built from sphinx above to our new git repo
+git add .
+ 
+# commit all the new files
+msg="Updating Docs for commit ${GITHUB_SHA} made on `date -d"@${SOURCE_DATE_EPOCH}" --iso-8601=seconds` from ${GITHUB_REF} by ${GITHUB_ACTOR}"
+git commit -am "${msg}"
+ 
+# overwrite the contents of the gh-pages branch on our github.com repo
+git push deploy gh-pages --force
+ 
+popd # return to main repo sandbox root
+ 
+# exit cleanly
+exit 0
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0a850858f3c2d58d90f3d0c5e5398ca0f89ad37
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,80 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'mdf_reader'
+copyright = '2021, David Berry, Irene Perez Gonzalez and Beatriz Recinos'
+author = 'David Berry, Irene Perez Gonzalez and Beatriz Recinos'
+
+# The full version, including alpha/beta/rc tags
+release = 'v1.3'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+# extensions = []
+
+# extensions.append('autoapi.extension')
+extensions = ['sphinx.ext.autodoc',
+              'sphinx.ext.autosummary',
+              'sphinx.ext.viewcode',
+              'autoapi.extension',
+              'sphinx.ext.napoleon',
+              'sphinx_autodoc_typehints']
+
+
+autoapi_type = 'python'
+autoapi_dirs = ['../']
+add_module_names = False
+autoapi_keep_files = False
+autodoc_typehints = "description"
+
+#autoapi_options = ['members', 'undoc-members', 'private-members']
+autoapi_options = ['members', 'undoc-members', 'private-members', 'show-inheritance',
+                    'show-module-summary', 'special-members', 'imported-members']
+autoapi_ignore = ['*mymodel*', '*conf*', '*gather_stats_c99.py*']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+pygments_style = 'sphinx'
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+html_theme_options = {
+    'logo_only': True,
+    'display_version': False,
+    "collapse_navigation": False,
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
\ No newline at end of file
diff --git a/docs/data-models.rst b/docs/data-models.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fb711e568c422722919a99f5b8ee9c5065ea183e
--- /dev/null
+++ b/docs/data-models.rst
@@ -0,0 +1,215 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+.. _data-models:
+
+===========
+Data Models
+===========
+
+Schema
+======
+
+The schema file gathers a collection of descriptors that enables the mdf_reader to access and extract meaningful units of information for each element.
+
+Valid schemas files are json files that the tool accesses and stores internally as dictionaries. The basename of the schema file must be the same as the data model directory and its extension ``.json``
+
+.. figure:: _static/images/schema.png
+    :width: 45%
+
+    Data model directory
+
+There are two levels of information in the schema:
+
+   1. **General** information on the data format layout, that helps the tool decide which approach to follow in order to access the data content. This information is included in the **header block** at the top of the schema (see figure below).
+
+
+   2. **Specific** information on the data elements and, optionally, on the sections. In the case that the data model has its report elements organised in one or multiple sections (as shown in the figure below). This information is included in the **elements block** of the schema.
+
+.. figure:: _static/images/new_schema.png
+    :width: 80%
+
+    Content inside a ``schema.json`` file.
+
+The mdf_reader supports reading and validation of both internal and external schemas:
+
+- An **internal data model** has its schema registered within the tool. To read and validate data from these models, we only need to pass its reference name to the reader and validation modules, using the argument ``data_model``. A list of the reference names for internally supported data models can be access via the tool's function::
+
+   import mdf_reader
+   mdf_reader.properties.supported_data_models()
+
+- An **external data model** is a data format that is unknown to the tool. If the data model meets the specifications for which the tool was built, then a model can be built externally and fed into it for both functions data reading and model validation using the argument ``data_model_path``::
+
+   model_path = '~/mdf_reader/data_models/lib/imma1_d701'
+   data_file_path = '~/mdf_reader/tests/data/069-701_1845-04_subset.imma'
+   data = mdf_reader.read(data_file_path, data_model_path= model_path)
+
+.. _code-tables:
+
+Code tables
+===========
+
+.. figure:: _static/images/elements.png
+    :width: 80%
+
+    Element content inside a ``schema.json`` file.
+
+Elements defined in the data model ``schema.json`` with an element attribute ``"column_type": "key"`` are linked to a code table in the data model through a codetable descriptor in the schema (e.g. ``"codetable": "ICOADS.C99.FORM"``). Code tables contain the ``key:value`` pairs and are stored as individual ``.json`` files in the ``data_models/schema/code_tables`` subdirectory.
+
+The content of a code table translating a ship-log report type into its real meaning (``ICOADS.C99.FORM.json``) can be seen in text below::
+
+     {
+     " 1": "daily",
+     " 2": "reports more than once a day"
+     }
+
+This code table is part of the ``imma1_d701`` data model included in this tool.
+
+The following range of code table structures are currently supported:
+
+- Simple code tables: code tables with a list of ``key:value`` pairs.
+- Nested code tables: code tables with multiple (2 or more) keys mapping to a value ``-> key(1):â€¦:key(n):value.``
+- Range-keyed code tables: code tables (simple or multi-keyed) where one or more keys is a (integer) range of values.
+
+Code tables can be imported as python dictionaries directly using the json package. To be fully read by the tool, however, keys in **range-keyed code tables** need to be expanded and access to all code tables is managed in the application through a **code table manager module**.
+
+The following commands typed in a python console, show how to access code table templates to create new code tables::
+
+      template_names = mdf_reader.code_tables.templates()
+
+To copy a template to edit::
+
+      mdf_reader.code_tables.copy_template(template_name,out_path=file_path)
+or::
+
+      mdf_reader. code_tables.copy_template(template_name,out_dir=dir_path)
+
+
+Common features
+---------------
+As code tables are stored as ``.json`` files, the json syntax rules must be met when they are generated. See the following `link <https://www.w3schools.com/js/js_json_syntax.asp>`_ to a basic introduction to json syntax.
+
+To create code tables it is important to highlight that:
+
+- String values must be written with double quotes
+- Keys must be strings
+- Values can be strings, numbers, objects (JSON objects), arrays, booleans (``true|false``) or ``null``.
+- Due to the way range keyed tables are parsed, keys cannot have the string ``range_key`` as initial substring (unless they are range keys).
+
+Simple code tables
+------------------
+
+Simple code tables are built using a single json object (enclosed in curly braces) with the ``key:value`` pairs separated by commas like the following example for a weather visibility indicator, the file name is ``visibility_ind.json``::
+
+      {
+         " ": "Not measured",
+         "0": "Measured",
+         "1": "Fog present"
+      }
+
+Nested code tables
+------------------
+
+Nested code tables are included to deal with situations when a coded element's encoding, varies according to an indicator (contained in a different element in the data) or/and changes along time (different code table versions). Instead of storing these tables in separate files, the tool allows to create nested code tables.
+
+The following ``.json`` file example shows a code table with 2 levels of indexing. It is built as a single **json object** in which the values of the ``key:value`` pairs of the outer indexing level are simple code tables, instead of individual values.
+
+Nested table (named: ``visibility.json``) example::
+
+      {
+         "0":
+             {"90":"<0.05 km",
+              "91":"0.05 km",
+              "92":"0.2 km",
+              "93":"0.5 km",
+              "94":"1 km",
+              "95":"2 km",
+              "96":"4 km",
+              "97":"10 km",
+              "98":"20 km",
+              "99":"50 km or more"},
+         "1":
+             {"90":"<0.05 km",
+              "91":"0.05 km",
+              "92":"0.2 km",
+              "93":"Fog present, no visibility reported",
+              "94":"1 km",
+              "95":"2 km",
+              "96":"4 km",
+              "97":"10 km",
+              "98":"20 km",
+              "99":"50 km or more"}
+      }
+
+This type of nested code table requires an additional ``.keys`` (named: ``visibility.keys``) file with the following format::
+
+      {
+         "('core1','VIS')" : ["('core1','VIS I')","('core1','VIS')"]
+      }
+
+This **code_table** can be called from the ``schema.json`` by setting the element descriptor ``column_type`` to ``key`` in the following way::
+
+       "VIS": {
+                    "description": "Visibility",
+                    "field_length": 2,
+                    "column_type": "key",
+                    "codetable": "visibility"
+                }
+
+Note that only the **nested code table** ``visibility`` is called not the .keys, and we do not require the ``.json`` extension.
+
+The data file schema provides the ``element:codetable`` correspondence. However, to map the element to its value in the code table, it is necessary to know the elements in the data file from which the outer keys are derived. Each nested table ``table_name.json`` has a companion ``.json`` file ``table_name.keys`` with a set of ``key:value`` pairs. The key is the actual element the table decodes and the value is a list with the complete set of key elements, from outer to inner.
+
+As a single table can be potentially used to code different data file elements, a key must be provided for every element wishing to be decoded with a nested table (even if it is unique)
+
+Range-keyed code tables
+-----------------------
+
+Range-keyed code tables can be any a simple or a nested type of code table. This term will apply if any of its ``key:value`` pairs is a range, like a period of years (1910-1945) or simply an integer interval (1-10).
+
+Instead of building the table repeating each of the ``key:value`` pairs for every value in the range, the corresponding range key pairs are defined as range (init, end [, step]):value in the json file. The code table manager will identify this special type of key and will expand the keys in the dictionary as is read internally.
+
+Range keys rules and use:
+
+   - Only integer ranges are currently supported
+   - Parameter step is optional. Defaults to 1.
+   - In ranges that apply to a range of years, the keyword yyyy can be used in the place of the end parameter. It will expand the period to the current year.
+
+Example of a Range-key nested table named: ``ICOADS.CO.VS.json`` is shown below::
+
+      {
+         "range_key(1750,1967)":
+              {
+                "0":"0 knots;[0.0,0.0,0.0] ms-1",
+                "1":"1-3 knots;[0.51444,1.02888,1.54332] ms-1",
+                "2":"4-6 knots;[2.05776,2.5722,3.08664] ms-1",
+                "3":"7-9 knots;[3.60108,4.11552,4.62996] ms-1",
+                "4":"10-12 knots;[5.1444,5.65884,6.17328] ms-1",
+                "5":"13-15 knots;[6.68772,7.20216,7.7166] ms-1",
+                "6":"16-18 knots;[8.23104,8.74548,9.25992] ms-1",
+                "7":"19-21 knots;[9.77436,10.2888,10.8032] ms-1",
+                "8":"22-24 knots;[11.3177,11.8321,12.3466] ms-1",
+                "9":"over 24 knots;[12.3466,12.861,null] ms-1"
+              },
+         "range_key(1968,yyyy)":
+              {
+                "0":"0 knots;[0.0,0.0,0.0] ms-1",
+                "1":"1-5 knots;[0.51444,1.54332,2.5722] ms-1",
+                "2":"6-10 knots;[3.08664,4.11552,5.1444] ms-1",
+                "3":"11-15 knots;[5.65884,6.68772,7.7166] ms-1",
+                "4":"16-20 knots;[8.23104,9.25992,10.2888] ms-1",
+                "5":"21-25 knots;[10.8032,11.8321,12.861] ms-1",
+                "6":"26-30 knots;[13.3754,14.4043,15.4332] ms-1",
+                "7":"31-35 knots;[15.9476,16.9765,18.0054] ms-1",
+                "8":"36-40 knots;[18.5198,19.5487,20.5776] ms-1",
+                "9":"over 40 knots;[21.092,22.1209,null] ms-1"
+              }
+      }
+
+As is nested the corresponding ``ICOADS.CO.VS.keys`` file looks as follows::
+
+      {
+         "('core','VS')" : ["('core','YR')","('core','VS')"]
+      }
diff --git a/docs/getting-started.rst b/docs/getting-started.rst
new file mode 100644
index 0000000000000000000000000000000000000000..10bb5e09b4a1457c11b9dfa96a76a1a30f7df155
--- /dev/null
+++ b/docs/getting-started.rst
@@ -0,0 +1,39 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+.. _getting-started:
+
+Getting started
+===============
+
+1. Test the tool
+
+You can test the tool very easy by using a sample data set that comes with the repository. For this you need to run the following code::
+
+   import sys
+   sys.path.append('/path_to_folder_directory_containing_the_mdf_reader_folder/')
+
+   import mdf_reader
+   import matplotlib.pyplot as plt
+
+   data = mdf_reader.tests.read_imma1_buoys_nosupp()
+
+2. Read an IMMA file
+
+Read a sample ``.imma`` file from the folder ``~/mdf_reader/test/data/`` via the following code::
+
+   filepath = '~/mdf_reader/test/data/069-701_1845-04_subset.imma'
+   imma_data = mdf_reader.read(filepath, data_model = 'imma1',sections = ['core','c1','c98'])
+
+
+For more details on how to run this in your python session see :py:func:`mdf_reader.read.main()`
+
+3. To call the function from a terminal type::
+
+   $ python mdf_reader_dir/read.py source data_model data_model_path sections chunksize skiprows out_path
+
+For more details and an overview of the tool check out the following python notebook:
+
+- `Test and overview of the mdf_reader tool <https://git.noc.ac.uk/brecinosrivas/mdf_reader/-/blob/master/docs/notebooks/mdf_reader_test_overview.ipynb>`_
diff --git a/docs/how-to-build-a-data-model.rst b/docs/how-to-build-a-data-model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..bb3bfae546982c014e41cf23a0811f26745c810a
--- /dev/null
+++ b/docs/how-to-build-a-data-model.rst
@@ -0,0 +1,384 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+.. _how-to-build-a-data-model:
+
+=========================
+How to build a data model
+=========================
+
+The main steps to create a data model (or schema) for the mdf_reader are:
+
+1. Create a valid directory tree to hold the model **(mymodel)** as shown in the figure below. The correct directory path to store your schema is ``~/mdf_reader/data_models/lib/``.
+
+.. figure:: _static/images/schema.png
+    :width: 45%
+
+    Data model directory
+
+2. Create a valid **schema** file under ``../lib/mymodel/mymodel.json``:
+
+To create the schema file, two important aspects of the schema need to be clear beforehand; i) the order and field lengths of each element in the data input string, ii) do the information in the data input needs to be organised into sections, like ICOADS ``.imma`` data format. With this in mind, one can access all the schema file templates available from within the tool via::
+
+   template_names = mdf_reader.schemas.templates()
+
+These templates have been created to ease the generation of new valid schema files, these templates cover from a basic schema format to a more complex one:
+
+- Fixed width or delimited: *fixed_width_* or *delimited_*
+- With no sections or with sections: *_basic* or *_sections*
+- More complex options include blocks of sections which in the case of ICOADS data are exclusive for certain decks (e.g. deck ``td11``) or blocks of sections that are optional: ``_complex_exc.json`` or ``_complex_opt.json``
+
+To copy a template to edit you can run the following functions::
+
+   mdf_reader.schemas.copy_template(template_name,out_path=file_path)
+
+
+3. Create valid code tables under ``../lib/mymodel/code_tables/table_name[i].json`` if the data model includes code tables.
+
+The general structure of a schema and the description of each attribute is explain in the table below:
+
++---------------+-----------------+-----------------------------------+
+|*Schema block* |*Scope*          |*Attribute*                        |
++---------------+-----------------+-----------------------------------+
+|Header         |common           |``encoding``                       |
+|               +-----------------+-----------------------------------+
+|               |no sections      |``field_layout``, ``delimiter``    |
+|               +-----------------+-----------------------------------+
+|               |sections         |``parsing_order``                  |
++---------------+-----------------+-----------------------------------+
+|Elements       |common           |``column_type``, ``description``,  |
+|               |                 |``ignore``, ``missing_value``      |
+|               +-----------------+-----------------------------------+
+|               |numeric          |``decimal_places``, ``encoding``,  |
+|               |                 |                                   |
+|               |                 |``offset``, ``scale``,  ``units``  |
+|               |                 |                                   |
+|               |                 |``valid_max``, ``valid_min``       |
+|               +-----------------+-----------------------------------+
+|               |object, str      |``disable_white_strip``            |
+|               +-----------------+-----------------------------------+
+|               |key              |``code_table``,  ``encoding``      |
+|               |                 |``disable_white_strip``,           |
+|               +-----------------+-----------------------------------+
+|               |datetime         |``datetime_format``                |
+|               +-----------------+-----------------------------------+
+|               |fixed_width      |``field_length``                   |
++---------------+-----------------+-----------------------------------+
+|Sections       |common           |``delimiter``, ``disable_read``    |
+|(header)       |                 |``field_layout``                   |
+|               +-----------------+-----------------------------------+
+|               |fixed_width      |``length``, ``sentinal``           |
++---------------+-----------------+-----------------------------------+
+
+.. _schema-header-block:
+
+Schema header block
+===================
+
+The **header** block is the first block of the schema file, and is common to all schema types, but some of its descriptors are, however, specific to certain model types.
+There is no need to declare a **header** block in data models for which sections are sequential (e.g. all elements in the data source appear in the same order as declared in the sections block).
+
+- Example of a header block for a ``.imma`` based schema::
+
+      "header": {
+           "parsing_order": [
+               {"s": ["core"]},
+               {"o": ["c1","c5","c6","c7","c8","c9","c95","c96","c97","c98"]},
+               {"s": ["c99_sentinal", "c99_data", "c99_header", "c99_qc"]}]
+       },
+
++---------------------------+-------------------+
+| Scope                     | Descriptor name   |
++===========================+===================+
+| Common                    | ``encoding``      |
++---------------------------+-------------------+
+| Data models with          | ``parsing_order`` |
+| sections (1 or Multiple)  |                   |
++---------------------------+-------------------+
+| Data models with no       | ``field_layout``, |
+| sections                  | ``delimiter``     |
++---------------------------+-------------------+
+
+
+- ``delimiter``
+      - String type descriptor that defines the field delimiter for data models.
+      - Setting this descriptor makes the default value of ``field_layout`` == ``delimited``
+      - Mainly this descriptor will be use if ``field_layout`` == ``delimited``
+      - When use together with ``field_layout`` == ``fixed_width`` the code understands that the data layout is a mixture of *delimited* and *fixed_width* strings. In this case the delimiter is removed and the section is read as a ``fixed_width`` type of section.
+      - This case has been added to overcome how pandas managed the ``c99`` section in ``.imma1`` model. e.g. Deck 704 c99 section, which is a sequence of fixed width elements separated by commas.
+      - Applies to ``delimited`` and ``fixed_width`` field layouts
+      - It is a mandatory field only in the case that ``field_layout`` == ``delimited``
+
+- ``encoding``
+      - String type descriptor that denotes the file encoding
+      - Applies to all elements
+      - It is not a mandatory field descriptor
+      - Options:
+         1. all python supported, see the following `link <https://docs.python.org/3.7/library/codecs.html#standard-encodings>`_ for all possible encodings.
+         2. defaults to `utf-8`
+
+- ``filed_layout``
+      - String type descriptor that defines the layout of fields in the data model with no sections
+      - Applies to all data models with no sections
+      - Is mandatory descriptor (for data models with no sections)
+      - Options:
+         1. ``delimited`` or ``fix_width``
+         2. Defaults to ``delimited`` if ``delimiter`` is set, but can be specified to ``fixed_width`` type together with a ``delimiter`` option.
+
+- ``parsing_order``
+      - List of dictionaries containing the order in which the tool must look for sections in a report and grouped the data by section block types. This field applies to those data types which reports are divided into multiple sections i.e. ICOADS data
+      - Applies to all data models with multiple sections
+      - The different section block types are:
+
+         1. ``s``: *sequential*. Sections in this block appear as listed in all reports.
+         2. ``e``: *exclusive*. Among the sections listed in the block, only one of them appears in every report.
+         3. ``o``: *optional*. Any combination of sections listed in the block can be present in the report. Any order, any missing or present (but does not handle repetitions).
+
+      - Example::
+
+         ``parsing_order``: [{"s":["core"]}, {"o":["c1", "c99"]}]
+
+.. _schema-element-block:
+
+Schema element block
+====================
+The elements block is a feature common to all data model types. It is the second and last block of data in a schema file with no sections, while it is part of each of the sections' blocks in more complex schemas. This is an example of an element block::
+
+         "elements": {
+                      "YR": {
+                          "description": "year UTC",
+                          "field_length": 4,
+                          "column_type": "uint16",
+                          "valid_max": 2024,
+                          "valid_min": 1600,
+                          "units": "year"
+                      },
+                      "MO": {
+                          "description": "month UTC",
+                          "field_length": 2,
+                          "column_type": "uint8",
+                          "valid_max": 12,
+                          "valid_min": 1,
+                          "units": "month"
+                      },
+                      "DY": {
+                          "description": "day UTC",
+                          "field_length": 2,
+                          "column_type": "uint8",
+                          "valid_max": 31,
+                          "valid_min": 1,
+                          "units": "day"
+                      },
+                      "HR": {
+                          "description": "hour UTC",
+                          "field_length": 4,
+                          "column_type": "float32",
+                          "valid_max": 23.99,
+                          "valid_min": 0.0,
+                          "scale": 0.01,
+                          "decimal_places": 2,
+                          "units": "hour"
+                      }}
+
+Elements in the data are parsed in the order they are declare here. The element block above would define a file / section with elements named: `YR`, `MO`, `DY` and `HR`.
+All elements attributes, some of which are data type specific, are listed and detailed in the following table:
+
++---------------------------+----------------------------------------------------------------+
+| Scope                     | Descriptor name                                                |
++===========================+================================================================+
+| Common                    | ``column_type``, ``description``, ``ignore``, ``missing_value``|
++---------------------------+----------------------------------------------------------------+
+| Fixed width types         | ``field_length``                                               |
++---------------------------+----------------------------------------------------------------+
+| Numeric types             | ``decimal_places``, ``encoding``, ``offset``, ``scale``,       |
+|                           | ``valid_max``, ``valid_min``                                   |
++---------------------------+----------------------------------------------------------------+
+| Object, `str` types       | ``disable_white_strip``                                        |
++---------------------------+----------------------------------------------------------------+
+| Key type                  | ``codetable``, ``disable_white_strip``, ``encoding``           |
++---------------------------+----------------------------------------------------------------+
+| Datetime type             | ``datetime_format``                                            |
++---------------------------+----------------------------------------------------------------+
+
+
+- ``description``
+      - String type descriptor that describes the data element (e.g. free text describing the data element).
+      - Applies to all elements
+
+- ``field_length``
+      - Numeric integer descriptor that determines the field length of the elements (number of bytes or number of characters in a report string).
+      - Applies to the schema format type: ``fixed_width`` and is a mandatory field in the element block.
+      - It can be set to `null`, or not present; if the element is unique in a section whose length is unknown and if this section is the last in the data model (e.g. like it is usually the case for ICOADS supplemental data section c99). If this is the case and the length is unknown the default will be set by the function `mdf_reader.properties.MAX_FULL_REPORT_WIDTH() <https://mdf-reader.readthedocs.io/en/mdf_reader/autoapi/mdf_reader/properties/index.html#module-mdf_reader.properties>`_, which sets the ``field_length`` to 100000.
+
+- ``column_type``
+      - Numeric integer descriptor that determines the element data type.
+      - Mandatory field.
+      - Applies to all elements
+      - Options:
+         1. Numeric data types: all types interpreter by `numpy <https://numpy.org/devdocs/user/basics.types.html>`_.
+         2. Datetimes: string or ``datetime64[ns]`` object that formats dates or datetimes when read in a single field. The object must be a `datetime.datetime <https://docs.python.org/3/library/datetime.html#module-datetime>`_ valid format. Can be also read via code tables and the parameter ``key``.
+
+- ``missing_value``
+      - String type descriptor that denotes if there are additional missing values to tag for an element in a schema.
+      - Applies to all elements
+      - Default values are the same as `pandas default missing values <https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html#working-with-missing-data>`_
+
+- ``ignore``
+      - Boolean type descriptor that ignores an element on the output
+      - Options: ``True`` or ``False``, defaults to ``False``
+      - Applies to all elements
+      - Is not a mandatory field descriptor
+
+- ``units``
+      - String type descriptor that states the units of the measured data element.
+      - Applies to *column_type. [numerics]* elements.
+      - Is not a mandatory field descriptor
+      - Defaults to ``None``
+
+- ``encoding``
+      - String type descriptor added if an element needs it
+      - Is not a mandatory field
+      - Not to be confuse with file ``encoding``
+      - Applies to *column_type. [numerics]* elements and *column_type. [key]* elements
+      - Defaults to ``None``
+      - Options:
+            1. ``base36``
+            2. ``signed_overpunch``
+
+- ``valid_max``
+      - Numeric type of descriptor that indicates the valid maximum value for numeric elements. This should be the valid maximum in variable declared units, after decoding and conversion (offset, scale...) and it is use for element validation.
+      - Applies to *column_type. [numerics]* elements
+      - Is not a mandatory field
+      - Defaults to *+inf*
+
+- ``valid_min``
+      - Numeric type of descriptor that indicates the minimum value for numeric elements. This should be the valid minimum in variable declared units, after encoding and conversion (offset, scale ...) and it is use for element validation.
+      - Applies to *column_type. [numerics]* elements
+      - Is not a mandatory field
+      - Defaults to *-inf*
+
+- ``scale``
+      - Numeric type of descriptor. This scale is applied to numeric elements in order to convert the original value to the declared element units.
+      - Applies to *column_type. [numerics]* elements
+      - Is not a mandatory field
+      - Defaults to *1*
+
+- ``offset``
+      - Numeric type of descriptor. This offset is applied to numeric elements in order to convert the original value to the declared element units.
+      - Applies to *column_type. [numerics]* elements
+      - Is not a mandatory field
+      - Defaults to *0*
+
+- ``decimal_places``
+      - Numeric integer descriptor that defines the number of decimal places to which the observed value is reported.
+      - Applies to *column_type. [numeric_floats]* elements
+      - Is not a mandatory field
+      - Defaults to ``pandas.display.precision`` = 6.
+
+- ``codetable``
+      - String type of descriptor containing the key code look up table name. It is the File basename of a code table (with no .json extension) located in the ``mymodel/code_tables`` directory. See :ref:`code-tables` for more information.
+      - Applies to *column_type. [key]* elements
+      - Is mandatory if ``"column_type": "key"``.
+
+- ``disable_white_strip``
+      - Boolean or string type descriptor that modifies the default leading/trailing blank stripping.
+      - Applies to *column_type. [key, object, str]* elements
+      - Options:
+            1. *do not perform any stripping: true*
+            2. *do not perform right stripping (trailing blanks): `r`*
+            3. *do not perform left stripping (leading blanks): `l`*
+      - Is not a mandatory field
+      - Defaults to *false*
+
+- ``datetime_format``
+      - String type of descriptor that sets the format for the dates.
+      - Applies to *column_type. [datetime]* elements
+      - Is not a mandatory field
+      - Defaults to ``%Y%m%d``
+      - All python.datetime formats are valid.
+
+
+Schema section block
+====================
+
+If the data model is organized in sections then the schema has two main blocks: **the header** (see :ref:`schema-header-block`) and **the sections blocks**. The sections block has a separate block per section, with the following general layout:
+
+   - A section specific header (or sub-header) with info on how to access that specific section.
+   - The section's elements block (See :ref:`schema-element-block`)
+
+Example of a schema section block: "core" section of the ``.imma`` schema::
+
+      "sections": {
+           "core": {
+               "header": {"sentinal": null,"length": 108},
+               "elements": {
+                   "YR": {
+                       "description": "year UTC",
+                       "field_length": 4,
+                       "column_type": "uint16",
+                       "valid_max": 2024,
+                       "valid_min": 1600,
+                       "units": "year"
+                   },
+                   "MO": {
+                       "description": "month UTC",
+                       "field_length": 2,
+                       "column_type": "uint8",
+                       "valid_max": 12,
+                       "valid_min": 1,
+                       "units": "month"
+                   }
+              }
+          }
+      }
+
+
+
+Section header
+--------------
+
+- ``delimiter``
+      - String type descriptor that defines the field delimiter for the data model section.
+      - Setting this descriptor makes the default value of ``field_layout`` == ``delimited``
+      - Mainly this descriptor will be use if ``field_layout`` == ``delimited``
+      - When use together with ``field_layout`` == ``fixed_width`` the code understands that the data layout is a mixture of *delimited* and *fixed_width* strings. In this case the delimiter is removed and the section is read as a ``fixed_width`` type of section.
+      - Applies to ``delimited`` and ``fixed_width`` field layouts
+      - It is a mandatory field only in the case that ``field_layout`` == ``delimited``
+
+- ``disable_read``
+      - Boolean type descriptor that if set to True will ignore the elements of that section. This section will then be produced in the output as a single string.
+      - Options: ``True`` of ``False``
+      - Defaults to False
+
+- ``field_layout``
+      - String type descriptor that defines the layout of fields in the section of the data model
+      - Applies to all sections
+      - If field ``delimiter`` is set, then ``field_layout`` defaults to ``delimited``, else to ``fixed_width``.
+      - This descriptor does not need to be specified in the schema files in the majority of the cases. However, to account for mixed formats, like c99 section in imma1 files for deck 704, this default setting can be overridden by specifying the ``field_layout`` parameter.
+      - Options:
+         1. ``delimited`` or ``fix_width``
+         2. Defaults to ``delimited`` if ``delimiter`` is set, else defaults to what ever is set in the ``fixed_width``.
+
+- ``sentinal``
+      - String type of descriptor that allows the code to identify a section.
+      - Applies to sections of *format.fixed_width*
+      - It is a mandatory field if the section is unique, unique in a parsing_order block, or part of a sequential parsing_order block.
+      - Elements bearing the sentinal need to be, additionally, declared in the elements block.
+
+- ``length``
+      - Numeric integer type of descriptor that defines the length of the section (how many bytes or characters in a string).
+      - Applies to *format.fixed_width*
+      - It is a mandatory field
+      - Can be also set to ``null``, or not reported, if the section is the last one to be parsed and the length is unknown (like the c99 section of the `.imma` model.
+
+Section elements
+----------------
+
+Same as :ref:`schema-element-block`.
+
+Code Tables
+===========
+
+To learn about how to construct a code table, please read the :ref:`code-tables` section.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a966413759b86a78df6824c15c8a09a01cbc4170
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,70 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Data reader toolbox documentation
+---------------------------------
+
+The **mdf_reader** is a `python3 <https://www.python.org/download/releases/3.0/>`_ tool designed to read data files compliant with a user specified data model.
+
+It was developed with the initial idea of reading data from the `International Comprehensive Ocean-Atmosphere Data Set (ICOADS) <https://icoads.noaa.gov/>`_ stored in the `International Maritime Meteorological Archive (IMMA) data format <https://icoads.noaa.gov/e-doc/imma/R3.0-imma1.pdf>`_.
+
+The tool has been further enhanced to account for any marine meteorological data format, provided that this data meets the following specifications:
+
+-	Data is stored in a human-readable manner: `ASCII <https://en.wikipedia.org/wiki/ASCII>`_.
+-	Data is organized in single line reports (e.g. rows of observations separated by a delimiter like .csv).
+-	Reports have a coherent internal structure that can be modelized.
+-	Reports are fixed width or field delimited types.
+-	Reports can be organized in sections, in which case each section can be of different types (fixed width of delimited).
+
+The mdf_reader uses the information provided in a `data model <https://en.wikipedia.org/wiki/Data>`_ to read meteorological data into a python `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_, with the column names and data types set according to each data elementâ€™s description specified in the data model or **schema**. In addition to reading, the mdf_reader validates data elements against the **schema** provided.
+
+This tool outputs a python object with the following attributes:
+
+1.	A `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ (DF) with the data values.
+2.	A `boolean pandas <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.bool.html>`_ DF with the data validation mask.
+3.	A `dictionary <https://realpython.com/python-dicts/>`_ with a simplified version of the input data model.
+
+The reader allows for basic transformations of the data. This feature includes `basic numeric data decoding <https://realpython.com/python-encodings-guide/#enter-unicode>`_ (base36, signed_overpunch) and numeric data conversion (scale and offset).
+
+Several data models have been added to the tool including the IMMA schema: ``~/mdf_reader/data_models/lib/imma1``.
+
+.. note:: **Data from other data models than those already available can be read, providing that this data meets the basic specifications listed above. A data model can be built externally and fed into the tool.**
+
+.. toctree::
+   :maxdepth: 2
+   :glob:
+   :hidden:
+   :caption: Guide
+
+   tool-set-up.rst
+   tool-overview.rst
+   getting-started.rst
+   data-models.rst
+   how-to-build-a-data-model.rst
+
+
+
+About
+-----
+
+:Version:
+
+:Citation:
+
+:License:
+
+:Authors:
+   David Berry, Irene Perez Gonzalez and Beatriz Recinos
+
+
+.. image:: _static/images/logos_c3s/logo_c3s-392x154.png
+    :width: 25%
+    :target: https://climate.copernicus.eu/
+.. image:: _static/images/logos_c3s/LOGO_2020_-_NOC_1_COLOUR.png
+    :width: 25%
+    :target: https://noc.ac.uk/
+.. image:: _static/images/logos_c3s/icoadsLogo.png
+    :width: 20%
+    :target: https://icoads.noaa.gov/
\ No newline at end of file
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..2119f51099bf37e4fdb6071dce9f451ea44c62dd
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/tool-overview.rst b/docs/tool-overview.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c460108627daedb4bad9db7202a29d51d0de8ce8
--- /dev/null
+++ b/docs/tool-overview.rst
@@ -0,0 +1,121 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Tool overview
+=============
+
+In the tool's context, a data model is the combination of a **schema file** with information on the file format and its contents and, optionally, the data model contains a set of code tables with ``key:value`` pairs, to translate encoded information in some data elements:
+
+   e.g. Temperature units might be store as numeric values 1 or 2 and this translates to ``1:Celsius`` and ``2:Fahrenheit``.
+
+
+Workflow
+--------
+
+.. figure:: _static/images/mdf_reader_diagram.svg
+    :width: 100%
+
+    Simplified workflow of the main function in the tool
+
+Input data: ``.imma`` files and schemas
+---------------------------------------
+
+The tool has been created to read meteorological data from `ICOADS <https://icoads.noaa.gov/r3.html>`_ stored in the ``.imma`` format, please read the `following guide <https://icoads.noaa.gov/e-doc/imma/R3.0-imma1.pdf>`_ to know more details regarding the database and the data format.
+
+Each meteorological report in ICOADS can come from multiple countries, sources and platforms and each report has a source ID (SID) and a deck (DCK) number assigned. â€œDeckâ€ was originally referred to a punched card deck, but is now used as the primary field to track ICOADS data **collections**. Each deck may contain a single Source ID (SID) or a mixture of SIDs.
+
+The data stored in the ``.imma`` format is stored as a fixed width and/or a field delimited file. The mdf_reader reads the data, organise it into sections and validates them against a declared data model (also referred here as **schema**) which can be source ID and deck dependent.
+
+The **core** meteorological variables stored in the ``.imma`` format can be read by using the general ``imma1`` schema included in this tool.
+
+**Supplemental metadata attachments** require a specific **schema** customized to read supplemental metadata from a specific source and deck ("collection"). Several **schemas** are already included in this tool in order to read 18th century ship meteorological metadata.
+
+All schemas are located under the following directory: ``~/mdf_reader/data_models/lib/``
+
+.. note:: For each SID-DCK number the data model or schema use to read supplemental metadata will different. e.g. to read metadata from the `US Maury <https://icoads.noaa.gov/maury.html>`_ Ship data collection SID 69 and DCK 701, we will use the schema ``imma_d701``)
+
+Output:
+-------
+
+The output of the mdf_reader is a python object with three attributes:
+
+â€¢ **data**: python `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_ with data values.
+â€¢ **atts**: `python dictionary <https://docs.python.org/3/tutorial/datastructures.html#dictionaries>`_ with attributes of each of the output elements inherited from the input data model **schema**.
+â€¢ **mask**: boolean DF with the results of the validation of each of the data model elements in its columns.
+
+
+Processing of the data elements
+-------------------------------
+
+The individual data element definitions in the schema determines how each element is extracted, transformed and validated within the tool. If the data model or schema has its data elements organised in sections, the reader first identifies the string chunks corresponding to the different sections.
+
+If the data model has no sections, the reader works with the full report as a single chunk.
+
+Afterwards, data elements are extracted from each of these chunks, as shown in the figure below, where each element in the input dataframe is linked to its attributes (orange text) defined within the data model/schema (e.g. elements encoding type, bytes length, etc).
+
+.. figure:: _static/images/fig1.png
+    :width: 100%
+
+    Schematic representation of the integral process of reading, transforming and validating a data element.
+
+Data elements extraction and transformation
+-------------------------------------------
+
+The data element extraction and transformation from the initial string to the output dataframe occurs mainly in 3 steps:
+
+1. **Elements extraction and missing data tagging**:
+
+   Done using `mdf_reader.import_data.main() <https://mdf-reader.readthedocs.io/en/mdf_reader/autoapi/mdf_reader/reader/import_data/index.html#module-mdf_reader.reader.import_data>`_, where individual data elements are extracted as 'objects' from the full report string and missing data is recognised as ``NA/NaN`` values in the resulting dataframe.
+
+   Strings that are recognised as missing from the source are `pandas` defaults, plus:
+
+      * Those defined in the data model's/schema as NaN by making use of the ``missing_value`` attribute.
+      * Those defined as blanks if ``disable_white_strip`` is set to not ``True``
+
+2. **Unpacking of encoded elements**:
+
+   Data elements with encoding defined in the schema element attributes are decoded and casted to their declared ``column_type`` [#f1]_. Elements where the decoding fails or is not recognised by the tool, are marked as ``NA/NaN`` values in the resulting dataframe.
+
+3.	**Element conversion**:
+
+   Data elements are converted (and optionally transformed) to their final data types (and units) if specified in the data model/schema.
+
+   *Numeric* type elements:
+      *	Safe conversion to numeric; ``NaN`` where conversion is not possible.
+      *	There is the option of applying to each element a *scale* and an *offset*: ``offset + scale*i``
+      *	Safe conversion of ``column_type``
+
+
+   *object*, *string* and *key* type elements:
+      Leading and trailing whitespace stripping unless otherwise declared in ``disable_white_strip`` (disable all, leading or trailing blank stripping).
+
+   *datetime* type elements:
+      Safe parsing to datetime objects with `pandas.to_datetime() <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html>`_, assigning `NaT` where the conversion is not possible.
+
+Validation of elements against the schema or data model
+-------------------------------------------------------
+
+Data model validation is initiated after each element unpacking and conversion. New ``Na/NaN`` values in the data (not identified as missing values during extraction) are understood by the tool to have fail unpacking or conversion, and thus, are not validate against the data model. The resulting preliminary validation mask values are:
+
+   *	``False``: invalid decoding, conversion
+   *	``True``: missing data, rest
+
+Once elements are in the final form, *numeric* and *key* elements are validated against their corresponding attributes in the schema (``valid_max|valid_min`` and ``codetable``, respectively), with the final values in the validation mask being:
+
+   *	``False``: invalid decoding, conversion, data model values
+   *	``True``: missing data, rest
+
+Overall, the validation process exception handling is:
+
+   *	Missing values: ``True``
+   *	Numeric type elements where either upper|lower bound is missing: ``False``
+   *	key type elements where no codetable is found (or defined in the data model): ``False``
+   *	Rest: ``True``
+
+.. rubric:: Footnotes
+.. [#f1] If ``NaN`` values are present, and column_type is integer, conversion to column_type will not be possible and data type will be as pandas casting rules (`Missing data casting rules and indexing <https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html>`_).
+
+
+
diff --git a/docs/tool-set-up.rst b/docs/tool-set-up.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c625d19764ba88b6f0e9ccd19dec6f829ac8c8e0
--- /dev/null
+++ b/docs/tool-set-up.rst
@@ -0,0 +1,78 @@
+.. mdf_reader documentation master file, created by
+   sphinx-quickstart on Fri Apr 16 14:18:24 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Tool set up
+===========
+
+The mdf_reader is a pure Python package, but it has a few dependencies that rely in a specific python and module version. The tool has been tested with Python version 3.7 on Linux and Mac OS systems.
+
+1. Clone the repository
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Clone the latest version via::
+
+      $ git clone git@git.noc.ac.uk:brecinosrivas/mdf_reader.git
+
+.. _git: https://git-scm.com/book/en/v2/Getting-Started-Installing-Git
+
+2. Install a python environment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For this you can use and install `pyenv <https://github.com/pyenv/pyenv>`_ and create a new virtual environment
+with a the python version needed (**3.7.3**) using `pyenv-virtualenv <https://github.com/pyenv/pyenv-virtualenv>`_.
+
+If you install pyenv and pyenv-virtualenv you can create an environment with a fix python version::
+
+    $ pyenv install 3.7.3
+    $ pyenv virtualenv 3.7.3 mdfreader_env
+    $ pyenv activate mdfreader_env
+
+As another option you can use conda. See the `conda docs <https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands>`_
+for more information about how to create an environment from the command line.
+
+Or you can do what I usually do (much faster), install `mamba <https://github.com/mamba-org/mamba>`_.
+
+3. Install dependencies
+~~~~~~~~~~~~~~~~~~~~~~~
+
+If you used **pyenv** for your environment, once activated you can install the dependencies using `pip <https://pip.pypa.io/en/stable/>`_::
+
+ $ pip install numpy==1.16.2 pandas==0.24.2 matplotlib==3.0.3
+
+Check the conda or mamba documentation to install dependencies via those tools.
+
+.. warning:: **The pandas version is particularly important since needs to be compatible with the way of importing the json module used in the code.**
+
+4. Optional step: install jupyter notebook
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Install `jupyter notebook <https://jupyter.org/install>`_ and `IPython <https://jupyter.readthedocs.io/en/latest/install.html>`_ for an easy overview of the tool and to make use of the tutorials under ``~/mdf_reader/docs/notebooks``::
+
+    $ pip install notebook
+    $ pip install ipykernel
+
+Check the libraries documentation in the links above to install them via conda or mamba.
+
+Add a new kernel to load your notebooks with the right environment (``mdfreader_env``) run::
+
+    $ python -m ipykernel install --user --name=mdfreader_env
+    $ jupyter notebook
+
+When you open the notebook, make sure you select the kernel or environment with the name ``mdfreader_env``. You can also
+test the notebook by adding and executing the following code in a jupyter-notebook cell::
+
+    from platform import python_version
+    import sys
+    print(python_version())
+    print(sys.executable)
+    print(sys.version)
+    print(sys.version_info)
+
+And you should see the following information for your ``mdfreader_env``::
+
+    /Users/username/.pyenv/versions/3.7.3/envs/mdfreader_env/bin/python
+    3.7.3 (default, Feb  4 2021, 14:32:54)
+    [Clang 12.0.0 (clang-1200.0.32.28)]
+    sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)