{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generating a data model for CLIWOC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:42:25,535 - root - INFO - init basic configure of logging success\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.append('/Users/brivas/c3s_work')\n",
    "import mdf_reader\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:42:38,945 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
      "2021-01-08 15:42:38,950 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n",
      "2021-01-08 15:42:38,951 - root - INFO - Getting data string from source...\n",
      "2021-01-08 15:42:38,964 - root - INFO - Extracting and reading sections\n",
      "2021-01-08 15:42:38,965 - root - INFO - Processing section partitioning threads\n",
      "2021-01-08 15:42:38,965 - root - INFO - 1000 ...\n",
      "2021-01-08 15:42:38,978 - root - INFO - done\n",
      "2021-01-08 15:42:38,979 - root - INFO - 211000 ...\n",
      "2021-01-08 15:42:39,000 - root - INFO - done\n",
      "2021-01-08 15:42:39,001 - root - INFO - 29211000 ...\n",
      "2021-01-08 15:42:39,016 - root - INFO - done\n",
      "2021-01-08 15:42:39,017 - root - INFO - 2929211000 ...\n",
      "2021-01-08 15:42:39,022 - root - INFO - done\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section core\n",
      "Reading section c1\n",
      "Reading section c5\n",
      "Reading section c6\n",
      "Reading section c7\n",
      "Reading section c8\n",
      "Reading section c9\n",
      "Reading section c95\n",
      "Reading section c96\n",
      "Reading section c97\n",
      "Reading section c98\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:42:39,980 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n",
      "2021-01-08 15:42:39,981 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section c99\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:42:41,021 - root - INFO - Wrapping output....\n",
      "2021-01-08 15:42:41,128 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n"
     ]
    }
   ],
   "source": [
    "schema = 'imma1'\n",
    "\n",
    "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n",
    "\n",
    "data_raw = mdf_reader.read(data_file_path, data_model = schema)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c99</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99 0 NAN     NATIONAAL ARCHIEF OF THE NETHERLA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99 0 PRO     PUBLIC RECORD OFFICE             ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99 0 NMM     NATIONAL MARITIME MUSEUM         ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 c99\n",
       "0  99 0 NAN     NATIONAAL ARCHIEF OF THE NETHERLA...\n",
       "1  99 0 PRO     PUBLIC RECORD OFFICE             ...\n",
       "2  99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...\n",
       "3  99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...\n",
       "4  99 0 NMM     NATIONAL MARITIME MUSEUM         ..."
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_raw.data['c99'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'99 0 NAN     NATIONAAL ARCHIEF OF THE NETHERLANDS              DEN HAAG  NEDERLAND     1.04.02        152              VOC                                    5137                           VOC_152_5137                  DUTCH  VOC_152_5137_0071      0191500N1020200E185900N         2 4TENERIFE                                   NOORDELIJKSTE LAND                                NTW                   ZUIDELIJKSTE                                      WTN                 10                                                                        0 21776100112         3         VM    8UNKNOWN        TEMPEL                        DUTCH                  VOC                               C. OVERBEEK                                            ASMUS HENDRIK STERRENBERG     SCHIPPER                                                                        TEXEL                                        KASSERAIJEN                                       0                                                  4259.317760102NW                    11.50                                                                                                        UNKNOWN        UNKNOWN           0                    DUITSE DUITSE 360 DEGREES                                                                                                                                                                                                 N-NNW                                                                                                                                                                          SLAPPE-LABBER BRAMZEILSKOELTE                                                                                                                                                                                                                     GOED WEER                                                                                                                                                                                                                                                                                                                                                                                                       OVERDRIJVENDE DEINSIGE LUCHT                                                                                                                      00000000CLIWOC VERSION 2.0'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "line = data_raw.data['c99'].iloc[0].values[0]\n",
    "line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_to_folder = '/Users/brivas/c3s_work/mdf_reader/data_models/lib/'\n",
    "model_name = 'imma1_d730'\n",
    "model_path = os.path.join(path_to_folder, model_name)\n",
    "model_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:43:38,121 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
      "2021-01-08 15:43:38,126 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n",
      "2021-01-08 15:43:38,127 - root - INFO - Getting data string from source...\n",
      "2021-01-08 15:43:38,139 - root - INFO - Extracting and reading sections\n",
      "2021-01-08 15:43:38,140 - root - INFO - Processing section partitioning threads\n",
      "2021-01-08 15:43:38,141 - root - INFO - 1000 ...\n",
      "2021-01-08 15:43:38,154 - root - INFO - done\n",
      "2021-01-08 15:43:38,155 - root - INFO - 211000 ...\n",
      "2021-01-08 15:43:38,169 - root - INFO - done\n",
      "2021-01-08 15:43:38,170 - root - INFO - 29211000 ...\n",
      "2021-01-08 15:43:38,190 - root - INFO - done\n",
      "2021-01-08 15:43:38,190 - root - INFO - 3029211000 ...\n",
      "2021-01-08 15:43:38,194 - root - INFO - done\n",
      "2021-01-08 15:43:38,195 - root - INFO - 303029211000 ...\n",
      "2021-01-08 15:43:38,199 - root - INFO - done\n",
      "2021-01-08 15:43:38,200 - root - INFO - 30303029211000 ...\n",
      "2021-01-08 15:43:38,204 - root - INFO - done\n",
      "2021-01-08 15:43:38,205 - root - INFO - 3030303029211000 ...\n",
      "2021-01-08 15:43:38,207 - root - INFO - done\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section core\n",
      "Reading section c1\n",
      "Reading section c5\n",
      "Reading section c6\n",
      "Reading section c7\n",
      "Reading section c8\n",
      "Reading section c9\n",
      "Reading section c95\n",
      "Reading section c96\n",
      "Reading section c97\n",
      "Reading section c98\n",
      "Reading section c99_sentinal\n",
      "Reading section c99_logbook\n",
      "Reading section c99_voyage\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:43:39,541 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n",
      "2021-01-08 15:43:39,542 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section c99_data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-01-08 15:43:48,904 - root - INFO - Wrapping output....\n",
      "2021-01-08 15:43:49,012 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n"
     ]
    }
   ],
   "source": [
    "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n",
    "\n",
    "data = mdf_reader.read(data_file_path, data_model_path= model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">c99_sentinal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>ATTI</th>\n",
       "      <th>ATTL</th>\n",
       "      <th>BLK</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  c99_sentinal          \n",
       "          ATTI ATTL  BLK\n",
       "0           99    0  NaN\n",
       "1           99    0  NaN\n",
       "2           99    0  NaN\n",
       "3           99    0  NaN\n",
       "4           99    0  NaN"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data.data[[\"c99_sentinal\"]].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InstAbbr</th>\n",
       "      <th>InsName</th>\n",
       "      <th>InsPlace</th>\n",
       "      <th>InsLand</th>\n",
       "      <th>No_data_entry</th>\n",
       "      <th>NameArchiveSet</th>\n",
       "      <th>ArchivePart</th>\n",
       "      <th>Specification</th>\n",
       "      <th>Logbook_id</th>\n",
       "      <th>Logbook_language</th>\n",
       "      <th>Image_No</th>\n",
       "      <th>Illustr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NAN</td>\n",
       "      <td>NATIONAAL ARCHIEF OF THE NETHERLANDS</td>\n",
       "      <td>DEN HAAG</td>\n",
       "      <td>NEDERLAND</td>\n",
       "      <td>1.04.02</td>\n",
       "      <td>152</td>\n",
       "      <td>VOC</td>\n",
       "      <td>5137</td>\n",
       "      <td>VOC_152_5137</td>\n",
       "      <td>DUTCH</td>\n",
       "      <td>VOC_152_5137_0071</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PRO</td>\n",
       "      <td>PUBLIC RECORD OFFICE</td>\n",
       "      <td>KEW</td>\n",
       "      <td>UNITED KINGDOM</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>PRO BH1 1602</td>\n",
       "      <td>ENGLISH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AGI</td>\n",
       "      <td>ARCHIVO GENERAL DE INDIAS</td>\n",
       "      <td>SEVILLE</td>\n",
       "      <td>SPAIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CORREOS, 193B R8</td>\n",
       "      <td>SPANISH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AGI</td>\n",
       "      <td>ARCHIVO GENERAL DE INDIAS</td>\n",
       "      <td>SEVILLE</td>\n",
       "      <td>SPAIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CORREOS, 193A R5</td>\n",
       "      <td>SPANISH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NMM</td>\n",
       "      <td>NATIONAL MARITIME MUSEUM</td>\n",
       "      <td>GREENWICH</td>\n",
       "      <td>UNITED KINGDOM</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NMM ADM/L/S543</td>\n",
       "      <td>ENGLISH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  InstAbbr                               InsName   InsPlace         InsLand  \\\n",
       "0      NAN  NATIONAAL ARCHIEF OF THE NETHERLANDS   DEN HAAG       NEDERLAND   \n",
       "1      PRO                  PUBLIC RECORD OFFICE        KEW  UNITED KINGDOM   \n",
       "2      AGI             ARCHIVO GENERAL DE INDIAS    SEVILLE           SPAIN   \n",
       "3      AGI             ARCHIVO GENERAL DE INDIAS    SEVILLE           SPAIN   \n",
       "4      NMM              NATIONAL MARITIME MUSEUM  GREENWICH  UNITED KINGDOM   \n",
       "\n",
       "  No_data_entry NameArchiveSet ArchivePart Specification        Logbook_id  \\\n",
       "0       1.04.02            152         VOC          5137      VOC_152_5137   \n",
       "1           NaN            NaN         NaN           NaN      PRO BH1 1602   \n",
       "2           NaN            NaN         NaN           NaN  CORREOS, 193B R8   \n",
       "3           NaN            NaN         NaN           NaN  CORREOS, 193A R5   \n",
       "4           NaN            NaN         NaN           NaN    NMM ADM/L/S543   \n",
       "\n",
       "  Logbook_language           Image_No Illustr  \n",
       "0            DUTCH  VOC_152_5137_0071       0  \n",
       "1          ENGLISH                NaN       0  \n",
       "2          SPANISH                NaN       0  \n",
       "3          SPANISH                NaN       0  \n",
       "4          ENGLISH                NaN       0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_logbook\"]].c99_logbook.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>drLatDeg</th>\n",
       "      <th>drLatMin</th>\n",
       "      <th>drLatSec</th>\n",
       "      <th>drLatHem</th>\n",
       "      <th>drLonDeg</th>\n",
       "      <th>drLonMin</th>\n",
       "      <th>drLonSec</th>\n",
       "      <th>drLonHem</th>\n",
       "      <th>LatDeg</th>\n",
       "      <th>LatMin</th>\n",
       "      <th>...</th>\n",
       "      <th>voyage_to</th>\n",
       "      <th>Anchored_ind</th>\n",
       "      <th>AnchorPlace</th>\n",
       "      <th>DASno</th>\n",
       "      <th>VoyageIni</th>\n",
       "      <th>Course_ship</th>\n",
       "      <th>Ship_speed</th>\n",
       "      <th>Distance</th>\n",
       "      <th>EncName</th>\n",
       "      <th>EncNat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>102.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>E</td>\n",
       "      <td>18.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>...</td>\n",
       "      <td>KASSERAIJEN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4259.3</td>\n",
       "      <td>1776-01-02</td>\n",
       "      <td>NW</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.50</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>W</td>\n",
       "      <td>49.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>...</td>\n",
       "      <td>GRAVESEND</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1776-09-18</td>\n",
       "      <td>EBS1/4S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>LA CORU�A</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1776-07-28</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>212.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15.0</td>\n",
       "      <td>51.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>MONTEVIDEO</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1776-08-16</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>129.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>45.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>49.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>W</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1776-09-24</td>\n",
       "      <td>N87W</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 58 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   drLatDeg  drLatMin  drLatSec drLatHem  drLonDeg  drLonMin  drLonSec  \\\n",
       "0      19.0      15.0       0.0        N     102.0       2.0       0.0   \n",
       "1       NaN       NaN       NaN      NaN      11.0       5.0       0.0   \n",
       "2      38.0      32.0       0.0        N       NaN       NaN       NaN   \n",
       "3      15.0      51.0       0.0        S       NaN       NaN       NaN   \n",
       "4      45.0      30.0       0.0        N      49.0       8.0       0.0   \n",
       "\n",
       "  drLonHem  LatDeg  LatMin  ...    voyage_to Anchored_ind  AnchorPlace  \\\n",
       "0        E    18.0    59.0  ...  KASSERAIJEN            0          NaN   \n",
       "1        W    49.0    40.0  ...    GRAVESEND            0          NaN   \n",
       "2      NaN     NaN     NaN  ...    LA CORU�A            0          NaN   \n",
       "3      NaN     NaN     NaN  ...   MONTEVIDEO            0          NaN   \n",
       "4        W     NaN     NaN  ...          NaN            0          NaN   \n",
       "\n",
       "    DASno   VoyageIni Course_ship Ship_speed Distance EncName EncNat  \n",
       "0  4259.3  1776-01-02          NW        NaN    11.50     NaN    NaN  \n",
       "1     NaN  1776-09-18     EBS1/4S        NaN   131.00     NaN    NaN  \n",
       "2     NaN  1776-07-28         NaN        NaN   212.00     NaN    NaN  \n",
       "3     NaN  1776-08-16         NaN        NaN   129.00     NaN    NaN  \n",
       "4     NaN  1776-09-24        N87W        NaN    92.00     NaN    NaN  \n",
       "\n",
       "[5 rows x 58 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_voyage\"]].c99_voyage.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So for the ship type on this deck for example we will have tons of ship types in different languages. There is no code table for this. Would this information will be enough (e.g. for things that Liz is doing)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2          PAQUEBOT\n",
       "3    FRAGATA CORREO\n",
       "4          6TH RATE\n",
       "5    FRAGATA CORREO\n",
       "6          6TH RATE\n",
       "Name: Ship_type, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_voyage\"]].c99_voyage.Ship_type.dropna().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AT_reading_units</th>\n",
       "      <th>SST_reading_units</th>\n",
       "      <th>AP_reading_units</th>\n",
       "      <th>BART_reading_units</th>\n",
       "      <th>ReferenceCourse</th>\n",
       "      <th>ReferenceWindDirection</th>\n",
       "      <th>Decl</th>\n",
       "      <th>Distance_units</th>\n",
       "      <th>Distance_units_to_landmark</th>\n",
       "      <th>Distance_units_travelled</th>\n",
       "      <th>Longitude_units</th>\n",
       "      <th>units_of_measurement</th>\n",
       "      <th>humidity_units</th>\n",
       "      <th>water_at_pump_units</th>\n",
       "      <th>wind_scale</th>\n",
       "      <th>BARO_type</th>\n",
       "      <th>BARO_brand</th>\n",
       "      <th>API</th>\n",
       "      <th>Humidity_method</th>\n",
       "      <th>compas_error</th>\n",
       "      <th>compas_correction</th>\n",
       "      <th>AT_outside</th>\n",
       "      <th>SST</th>\n",
       "      <th>AP</th>\n",
       "      <th>wind_dir</th>\n",
       "      <th>current_dir</th>\n",
       "      <th>current_speed</th>\n",
       "      <th>attached_tem</th>\n",
       "      <th>pump_water</th>\n",
       "      <th>Humidity</th>\n",
       "      <th>wind_force</th>\n",
       "      <th>weather</th>\n",
       "      <th>prcp_descriptor</th>\n",
       "      <th>sea_state</th>\n",
       "      <th>shape_coulds</th>\n",
       "      <th>dir_coulds</th>\n",
       "      <th>Clearness</th>\n",
       "      <th>cloud_fraction</th>\n",
       "      <th>gusts</th>\n",
       "      <th>Rain</th>\n",
       "      <th>Fog</th>\n",
       "      <th>Snow</th>\n",
       "      <th>Thunder</th>\n",
       "      <th>Hail</th>\n",
       "      <th>Sea_ice</th>\n",
       "      <th>Trivial_correction</th>\n",
       "      <th>Release</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DUITSE</td>\n",
       "      <td>DUITSE</td>\n",
       "      <td>360 DEGREES</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N-NNW</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SLAPPE-LABBER BRAMZEILSKOELTE</td>\n",
       "      <td>GOED WEER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>OVERDRIJVENDE DEINSIGE LUCHT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>-23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LEAGUES</td>\n",
       "      <td>NM</td>\n",
       "      <td>180 DEGREES</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NEBN, NE, NNE, NEBE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>PLEASANT BREEZE, FRESH BREEZE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SPITS OF RAIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CLEAR THEN CLOUDY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>-17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>360 DEGREES</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FRESCACH�N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GRUESA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>-1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NM</td>\n",
       "      <td>360 DEGREES</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FRESQUITO</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LLANA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>-17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LEAGUES</td>\n",
       "      <td>NM</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SSW</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MODERATE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FAIR</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 1.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  AT_reading_units SST_reading_units AP_reading_units BART_reading_units  \\\n",
       "0              NaN               NaN              NaN                NaN   \n",
       "1              NaN               NaN              NaN                NaN   \n",
       "2              NaN               NaN              NaN                NaN   \n",
       "3              NaN               NaN              NaN                NaN   \n",
       "4              NaN               NaN              NaN                NaN   \n",
       "\n",
       "  ReferenceCourse ReferenceWindDirection Decl Distance_units  \\\n",
       "0         UNKNOWN                UNKNOWN    0            NaN   \n",
       "1         UNKNOWN                UNKNOWN  -23            NaN   \n",
       "2             NaN                UNKNOWN  -17            NaN   \n",
       "3             NaN                UNKNOWN   -1            NaN   \n",
       "4         UNKNOWN                UNKNOWN  -17            NaN   \n",
       "\n",
       "  Distance_units_to_landmark Distance_units_travelled Longitude_units  \\\n",
       "0                     DUITSE                   DUITSE     360 DEGREES   \n",
       "1                    LEAGUES                       NM     180 DEGREES   \n",
       "2                        NaN                  UNKNOWN     360 DEGREES   \n",
       "3                        NaN                       NM     360 DEGREES   \n",
       "4                    LEAGUES                       NM         UNKNOWN   \n",
       "\n",
       "  units_of_measurement humidity_units water_at_pump_units wind_scale  \\\n",
       "0                  NaN            NaN                 NaN        NaN   \n",
       "1                  NaN            NaN                 NaN        NaN   \n",
       "2                  NaN            NaN                 NaN        NaN   \n",
       "3                  NaN            NaN                 NaN        NaN   \n",
       "4                  NaN            NaN                 NaN        NaN   \n",
       "\n",
       "  BARO_type BARO_brand  API Humidity_method compas_error compas_correction  \\\n",
       "0       NaN        NaN  NaN             NaN          NaN               NaN   \n",
       "1       NaN        NaN  NaN             NaN          NaN               NaN   \n",
       "2       NaN        NaN  NaN             NaN          NaN               NaN   \n",
       "3       NaN        NaN  NaN             NaN          NaN               NaN   \n",
       "4       NaN        NaN  NaN             NaN          NaN               NaN   \n",
       "\n",
       "   AT_outside  SST   AP             wind_dir current_dir current_speed  \\\n",
       "0         NaN  NaN  NaN                N-NNW         NaN           NaN   \n",
       "1         NaN  NaN  NaN  NEBN, NE, NNE, NEBE         NaN           NaN   \n",
       "2         NaN  NaN  NaN                    E         NaN           NaN   \n",
       "3         NaN  NaN  NaN                    E         NaN           NaN   \n",
       "4         NaN  NaN  NaN                  SSW         NaN           NaN   \n",
       "\n",
       "   attached_tem pump_water Humidity                     wind_force    weather  \\\n",
       "0           NaN        NaN      NaN  SLAPPE-LABBER BRAMZEILSKOELTE  GOED WEER   \n",
       "1           NaN        NaN      NaN  PLEASANT BREEZE, FRESH BREEZE        NaN   \n",
       "2           NaN        NaN      NaN                     FRESCACH�N        NaN   \n",
       "3           NaN        NaN      NaN                      FRESQUITO        NaN   \n",
       "4           NaN        NaN      NaN                       MODERATE        NaN   \n",
       "\n",
       "  prcp_descriptor sea_state                  shape_coulds dir_coulds  \\\n",
       "0             NaN       NaN  OVERDRIJVENDE DEINSIGE LUCHT        NaN   \n",
       "1   SPITS OF RAIN       NaN                           NaN        NaN   \n",
       "2             NaN    GRUESA                           NaN        NaN   \n",
       "3             NaN     LLANA                           NaN        NaN   \n",
       "4             NaN       NaN                           NaN        NaN   \n",
       "\n",
       "           Clearness cloud_fraction gusts Rain Fog Snow Thunder Hail Sea_ice  \\\n",
       "0                NaN            NaN     0    0   0    0       0    0       0   \n",
       "1  CLEAR THEN CLOUDY            NaN     0    1   0    0       0    0       0   \n",
       "2                NaN            NaN     0    0   0    0       0    0       0   \n",
       "3                NaN            NaN     0    0   0    0       0    0       0   \n",
       "4               FAIR            NaN     0    0   0    0       0    0       0   \n",
       "\n",
       "  Trivial_correction             Release  \n",
       "0                  0  CLIWOC VERSION 2.0  \n",
       "1                  0  CLIWOC VERSION 2.0  \n",
       "2                  0  CLIWOC VERSION 2.0  \n",
       "3                  0  CLIWOC VERSION 1.0  \n",
       "4                  0  CLIWOC VERSION 1.1  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.options.display.max_columns = None\n",
    "data.data[[\"c99_data\"]].c99_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What about the different scales of wind force from different languages?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    SLAPPE-LABBER BRAMZEILSKOELTE\n",
       "1    PLEASANT BREEZE, FRESH BREEZE\n",
       "2                       FRESCACH�N\n",
       "3                        FRESQUITO\n",
       "4                         MODERATE\n",
       "Name: wind_force, dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_data\"]].c99_data.wind_force.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For some of this we would have to make a nested .json file I think ... with the key as the language in which the information is written and then find the wind force scale that corresponds to that description.\n",
    "\n",
    "More ideas on how to do this are welcome."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I have also taken done some of the easy key tables including the Meridian one. You can find those in this [link under ICOADS.C99.Variable.json]()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}