{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Generating a data model for CLIWOC" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:42:25,535 - root - INFO - init basic configure of logging success\n" ] } ], "source": [ "import os\n", "import sys\n", "sys.path.append('/Users/brivas/c3s_work')\n", "import mdf_reader\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:42:38,945 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", "2021-01-08 15:42:38,950 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n", "2021-01-08 15:42:38,951 - root - INFO - Getting data string from source...\n", "2021-01-08 15:42:38,964 - root - INFO - Extracting and reading sections\n", "2021-01-08 15:42:38,965 - root - INFO - Processing section partitioning threads\n", "2021-01-08 15:42:38,965 - root - INFO - 1000 ...\n", "2021-01-08 15:42:38,978 - root - INFO - done\n", "2021-01-08 15:42:38,979 - root - INFO - 211000 ...\n", "2021-01-08 15:42:39,000 - root - INFO - done\n", "2021-01-08 15:42:39,001 - root - INFO - 29211000 ...\n", "2021-01-08 15:42:39,016 - root - INFO - done\n", "2021-01-08 15:42:39,017 - root - INFO - 2929211000 ...\n", "2021-01-08 15:42:39,022 - root - INFO - done\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section core\n", "Reading section c1\n", "Reading section c5\n", "Reading section c6\n", "Reading section c7\n", "Reading section c8\n", "Reading section c9\n", "Reading section c95\n", "Reading section c96\n", "Reading section c97\n", "Reading section c98\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:42:39,980 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n", "2021-01-08 15:42:39,981 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section c99\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:42:41,021 - root - INFO - Wrapping output....\n", "2021-01-08 15:42:41,128 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], "source": [ "schema = 'imma1'\n", "\n", "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n", "\n", "data_raw = mdf_reader.read(data_file_path, data_model = schema)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c99
099 0 NAN NATIONAAL ARCHIEF OF THE NETHERLA...
199 0 PRO PUBLIC RECORD OFFICE ...
299 0 AGI ARCHIVO GENERAL DE INDIAS ...
399 0 AGI ARCHIVO GENERAL DE INDIAS ...
499 0 NMM NATIONAL MARITIME MUSEUM ...
\n", "
" ], "text/plain": [ " c99\n", "0 99 0 NAN NATIONAAL ARCHIEF OF THE NETHERLA...\n", "1 99 0 PRO PUBLIC RECORD OFFICE ...\n", "2 99 0 AGI ARCHIVO GENERAL DE INDIAS ...\n", "3 99 0 AGI ARCHIVO GENERAL DE INDIAS ...\n", "4 99 0 NMM NATIONAL MARITIME MUSEUM ..." ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_raw.data['c99'].head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'99 0 NAN NATIONAAL ARCHIEF OF THE NETHERLANDS DEN HAAG NEDERLAND 1.04.02 152 VOC 5137 VOC_152_5137 DUTCH VOC_152_5137_0071 0191500N1020200E185900N 2 4TENERIFE NOORDELIJKSTE LAND NTW ZUIDELIJKSTE WTN 10 0 21776100112 3 VM 8UNKNOWN TEMPEL DUTCH VOC C. OVERBEEK ASMUS HENDRIK STERRENBERG SCHIPPER TEXEL KASSERAIJEN 0 4259.317760102NW 11.50 UNKNOWN UNKNOWN 0 DUITSE DUITSE 360 DEGREES N-NNW SLAPPE-LABBER BRAMZEILSKOELTE GOED WEER OVERDRIJVENDE DEINSIGE LUCHT 00000000CLIWOC VERSION 2.0'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line = data_raw.data['c99'].iloc[0].values[0]\n", "line" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path_to_folder = '/Users/brivas/c3s_work/mdf_reader/data_models/lib/'\n", "model_name = 'imma1_d730'\n", "model_path = os.path.join(path_to_folder, model_name)\n", "model_path" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:43:38,121 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", "2021-01-08 15:43:38,126 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n", "2021-01-08 15:43:38,127 - root - INFO - Getting data string from source...\n", "2021-01-08 15:43:38,139 - root - INFO - Extracting and reading sections\n", "2021-01-08 15:43:38,140 - root - INFO - Processing section partitioning threads\n", "2021-01-08 15:43:38,141 - root - INFO - 1000 ...\n", "2021-01-08 15:43:38,154 - root - INFO - done\n", "2021-01-08 15:43:38,155 - root - INFO - 211000 ...\n", "2021-01-08 15:43:38,169 - root - INFO - done\n", "2021-01-08 15:43:38,170 - root - INFO - 29211000 ...\n", "2021-01-08 15:43:38,190 - root - INFO - done\n", "2021-01-08 15:43:38,190 - root - INFO - 3029211000 ...\n", "2021-01-08 15:43:38,194 - root - INFO - done\n", "2021-01-08 15:43:38,195 - root - INFO - 303029211000 ...\n", "2021-01-08 15:43:38,199 - root - INFO - done\n", "2021-01-08 15:43:38,200 - root - INFO - 30303029211000 ...\n", "2021-01-08 15:43:38,204 - root - INFO - done\n", "2021-01-08 15:43:38,205 - root - INFO - 3030303029211000 ...\n", "2021-01-08 15:43:38,207 - root - INFO - done\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section core\n", "Reading section c1\n", "Reading section c5\n", "Reading section c6\n", "Reading section c7\n", "Reading section c8\n", "Reading section c9\n", "Reading section c95\n", "Reading section c96\n", "Reading section c97\n", "Reading section c98\n", "Reading section c99_sentinal\n", "Reading section c99_logbook\n", "Reading section c99_voyage\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:43:39,541 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n", "2021-01-08 15:43:39,542 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section c99_data\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-01-08 15:43:48,904 - root - INFO - Wrapping output....\n", "2021-01-08 15:43:49,012 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], "source": [ "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n", "\n", "data = mdf_reader.read(data_file_path, data_model_path= model_path)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c99_sentinal
ATTIATTLBLK
0990NaN
1990NaN
2990NaN
3990NaN
4990NaN
\n", "
" ], "text/plain": [ " c99_sentinal \n", " ATTI ATTL BLK\n", "0 99 0 NaN\n", "1 99 0 NaN\n", "2 99 0 NaN\n", "3 99 0 NaN\n", "4 99 0 NaN" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data.data[[\"c99_sentinal\"]].head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InstAbbrInsNameInsPlaceInsLandNo_data_entryNameArchiveSetArchivePartSpecificationLogbook_idLogbook_languageImage_NoIllustr
0NANNATIONAAL ARCHIEF OF THE NETHERLANDSDEN HAAGNEDERLAND1.04.02152VOC5137VOC_152_5137DUTCHVOC_152_5137_00710
1PROPUBLIC RECORD OFFICEKEWUNITED KINGDOMNaNNaNNaNNaNPRO BH1 1602ENGLISHNaN0
2AGIARCHIVO GENERAL DE INDIASSEVILLESPAINNaNNaNNaNNaNCORREOS, 193B R8SPANISHNaN0
3AGIARCHIVO GENERAL DE INDIASSEVILLESPAINNaNNaNNaNNaNCORREOS, 193A R5SPANISHNaN0
4NMMNATIONAL MARITIME MUSEUMGREENWICHUNITED KINGDOMNaNNaNNaNNaNNMM ADM/L/S543ENGLISHNaN0
\n", "
" ], "text/plain": [ " InstAbbr InsName InsPlace InsLand \\\n", "0 NAN NATIONAAL ARCHIEF OF THE NETHERLANDS DEN HAAG NEDERLAND \n", "1 PRO PUBLIC RECORD OFFICE KEW UNITED KINGDOM \n", "2 AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN \n", "3 AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN \n", "4 NMM NATIONAL MARITIME MUSEUM GREENWICH UNITED KINGDOM \n", "\n", " No_data_entry NameArchiveSet ArchivePart Specification Logbook_id \\\n", "0 1.04.02 152 VOC 5137 VOC_152_5137 \n", "1 NaN NaN NaN NaN PRO BH1 1602 \n", "2 NaN NaN NaN NaN CORREOS, 193B R8 \n", "3 NaN NaN NaN NaN CORREOS, 193A R5 \n", "4 NaN NaN NaN NaN NMM ADM/L/S543 \n", "\n", " Logbook_language Image_No Illustr \n", "0 DUTCH VOC_152_5137_0071 0 \n", "1 ENGLISH NaN 0 \n", "2 SPANISH NaN 0 \n", "3 SPANISH NaN 0 \n", "4 ENGLISH NaN 0 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_logbook\"]].c99_logbook.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drLatDegdrLatMindrLatSecdrLatHemdrLonDegdrLonMindrLonSecdrLonHemLatDegLatMin...voyage_toAnchored_indAnchorPlaceDASnoVoyageIniCourse_shipShip_speedDistanceEncNameEncNat
019.015.00.0N102.02.00.0E18.059.0...KASSERAIJEN0NaN4259.31776-01-02NWNaN11.50NaNNaN
1NaNNaNNaNNaN11.05.00.0W49.040.0...GRAVESEND0NaNNaN1776-09-18EBS1/4SNaN131.00NaNNaN
238.032.00.0NNaNNaNNaNNaNNaNNaN...LA CORU�A0NaNNaN1776-07-28NaNNaN212.00NaNNaN
315.051.00.0SNaNNaNNaNNaNNaNNaN...MONTEVIDEO0NaNNaN1776-08-16NaNNaN129.00NaNNaN
445.030.00.0N49.08.00.0WNaNNaN...NaN0NaNNaN1776-09-24N87WNaN92.00NaNNaN
\n", "

5 rows × 58 columns

\n", "
" ], "text/plain": [ " drLatDeg drLatMin drLatSec drLatHem drLonDeg drLonMin drLonSec \\\n", "0 19.0 15.0 0.0 N 102.0 2.0 0.0 \n", "1 NaN NaN NaN NaN 11.0 5.0 0.0 \n", "2 38.0 32.0 0.0 N NaN NaN NaN \n", "3 15.0 51.0 0.0 S NaN NaN NaN \n", "4 45.0 30.0 0.0 N 49.0 8.0 0.0 \n", "\n", " drLonHem LatDeg LatMin ... voyage_to Anchored_ind AnchorPlace \\\n", "0 E 18.0 59.0 ... KASSERAIJEN 0 NaN \n", "1 W 49.0 40.0 ... GRAVESEND 0 NaN \n", "2 NaN NaN NaN ... LA CORU�A 0 NaN \n", "3 NaN NaN NaN ... MONTEVIDEO 0 NaN \n", "4 W NaN NaN ... NaN 0 NaN \n", "\n", " DASno VoyageIni Course_ship Ship_speed Distance EncName EncNat \n", "0 4259.3 1776-01-02 NW NaN 11.50 NaN NaN \n", "1 NaN 1776-09-18 EBS1/4S NaN 131.00 NaN NaN \n", "2 NaN 1776-07-28 NaN NaN 212.00 NaN NaN \n", "3 NaN 1776-08-16 NaN NaN 129.00 NaN NaN \n", "4 NaN 1776-09-24 N87W NaN 92.00 NaN NaN \n", "\n", "[5 rows x 58 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_voyage\"]].c99_voyage.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So for the ship type on this deck for example we will have tons of ship types in different languages. There is no code table for this. Would this information will be enough (e.g. for things that Liz is doing)?" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2 PAQUEBOT\n", "3 FRAGATA CORREO\n", "4 6TH RATE\n", "5 FRAGATA CORREO\n", "6 6TH RATE\n", "Name: Ship_type, dtype: object" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_voyage\"]].c99_voyage.Ship_type.dropna().head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AT_reading_unitsSST_reading_unitsAP_reading_unitsBART_reading_unitsReferenceCourseReferenceWindDirectionDeclDistance_unitsDistance_units_to_landmarkDistance_units_travelledLongitude_unitsunits_of_measurementhumidity_unitswater_at_pump_unitswind_scaleBARO_typeBARO_brandAPIHumidity_methodcompas_errorcompas_correctionAT_outsideSSTAPwind_dircurrent_dircurrent_speedattached_tempump_waterHumiditywind_forceweatherprcp_descriptorsea_stateshape_couldsdir_couldsClearnesscloud_fractiongustsRainFogSnowThunderHailSea_iceTrivial_correctionRelease
0NaNNaNNaNNaNUNKNOWNUNKNOWN0NaNDUITSEDUITSE360 DEGREESNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNN-NNWNaNNaNNaNNaNNaNSLAPPE-LABBER BRAMZEILSKOELTEGOED WEERNaNNaNOVERDRIJVENDE DEINSIGE LUCHTNaNNaNNaN00000000CLIWOC VERSION 2.0
1NaNNaNNaNNaNUNKNOWNUNKNOWN-23NaNLEAGUESNM180 DEGREESNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNEBN, NE, NNE, NEBENaNNaNNaNNaNNaNPLEASANT BREEZE, FRESH BREEZENaNSPITS OF RAINNaNNaNNaNCLEAR THEN CLOUDYNaN01000000CLIWOC VERSION 2.0
2NaNNaNNaNNaNNaNUNKNOWN-17NaNNaNUNKNOWN360 DEGREESNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNENaNNaNNaNNaNNaNFRESCACH�NNaNNaNGRUESANaNNaNNaNNaN00000000CLIWOC VERSION 2.0
3NaNNaNNaNNaNNaNUNKNOWN-1NaNNaNNM360 DEGREESNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNENaNNaNNaNNaNNaNFRESQUITONaNNaNLLANANaNNaNNaNNaN00000000CLIWOC VERSION 1.0
4NaNNaNNaNNaNUNKNOWNUNKNOWN-17NaNLEAGUESNMUNKNOWNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNSSWNaNNaNNaNNaNNaNMODERATENaNNaNNaNNaNNaNFAIRNaN00000000CLIWOC VERSION 1.1
\n", "
" ], "text/plain": [ " AT_reading_units SST_reading_units AP_reading_units BART_reading_units \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " ReferenceCourse ReferenceWindDirection Decl Distance_units \\\n", "0 UNKNOWN UNKNOWN 0 NaN \n", "1 UNKNOWN UNKNOWN -23 NaN \n", "2 NaN UNKNOWN -17 NaN \n", "3 NaN UNKNOWN -1 NaN \n", "4 UNKNOWN UNKNOWN -17 NaN \n", "\n", " Distance_units_to_landmark Distance_units_travelled Longitude_units \\\n", "0 DUITSE DUITSE 360 DEGREES \n", "1 LEAGUES NM 180 DEGREES \n", "2 NaN UNKNOWN 360 DEGREES \n", "3 NaN NM 360 DEGREES \n", "4 LEAGUES NM UNKNOWN \n", "\n", " units_of_measurement humidity_units water_at_pump_units wind_scale \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " BARO_type BARO_brand API Humidity_method compas_error compas_correction \\\n", "0 NaN NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " AT_outside SST AP wind_dir current_dir current_speed \\\n", "0 NaN NaN NaN N-NNW NaN NaN \n", "1 NaN NaN NaN NEBN, NE, NNE, NEBE NaN NaN \n", "2 NaN NaN NaN E NaN NaN \n", "3 NaN NaN NaN E NaN NaN \n", "4 NaN NaN NaN SSW NaN NaN \n", "\n", " attached_tem pump_water Humidity wind_force weather \\\n", "0 NaN NaN NaN SLAPPE-LABBER BRAMZEILSKOELTE GOED WEER \n", "1 NaN NaN NaN PLEASANT BREEZE, FRESH BREEZE NaN \n", "2 NaN NaN NaN FRESCACH�N NaN \n", "3 NaN NaN NaN FRESQUITO NaN \n", "4 NaN NaN NaN MODERATE NaN \n", "\n", " prcp_descriptor sea_state shape_coulds dir_coulds \\\n", "0 NaN NaN OVERDRIJVENDE DEINSIGE LUCHT NaN \n", "1 SPITS OF RAIN NaN NaN NaN \n", "2 NaN GRUESA NaN NaN \n", "3 NaN LLANA NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " Clearness cloud_fraction gusts Rain Fog Snow Thunder Hail Sea_ice \\\n", "0 NaN NaN 0 0 0 0 0 0 0 \n", "1 CLEAR THEN CLOUDY NaN 0 1 0 0 0 0 0 \n", "2 NaN NaN 0 0 0 0 0 0 0 \n", "3 NaN NaN 0 0 0 0 0 0 0 \n", "4 FAIR NaN 0 0 0 0 0 0 0 \n", "\n", " Trivial_correction Release \n", "0 0 CLIWOC VERSION 2.0 \n", "1 0 CLIWOC VERSION 2.0 \n", "2 0 CLIWOC VERSION 2.0 \n", "3 0 CLIWOC VERSION 1.0 \n", "4 0 CLIWOC VERSION 1.1 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_columns = None\n", "data.data[[\"c99_data\"]].c99_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What about the different scales of wind force from different languages?" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 SLAPPE-LABBER BRAMZEILSKOELTE\n", "1 PLEASANT BREEZE, FRESH BREEZE\n", "2 FRESCACH�N\n", "3 FRESQUITO\n", "4 MODERATE\n", "Name: wind_force, dtype: object" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_data\"]].c99_data.wind_force.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For some of this we would have to make a nested .json file I think ... with the key as the language in which the information is written and then find the wind force scale that corresponds to that description.\n", "\n", "More ideas on how to do this are welcome." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I have also taken done some of the easy key tables including the Meridian one. You can find those in this [link under ICOADS.C99.Variable.json]()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }