{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generating a data model for CLIWOC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-02-04 13:58:59,837 - root - INFO - init basic configure of logging success\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.append('/Users/brivas/c3s_work')\n",
    "import mdf_reader\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-02-04 13:58:59,859 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
      "2021-02-04 13:58:59,865 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n",
      "2021-02-04 13:58:59,866 - root - INFO - Getting data string from source...\n",
      "2021-02-04 13:58:59,878 - root - INFO - Extracting and reading sections\n",
      "2021-02-04 13:58:59,879 - root - INFO - Processing section partitioning threads\n",
      "2021-02-04 13:58:59,879 - root - INFO - 1000 ...\n",
      "2021-02-04 13:58:59,895 - root - INFO - done\n",
      "2021-02-04 13:58:59,896 - root - INFO - 211000 ...\n",
      "2021-02-04 13:58:59,911 - root - INFO - done\n",
      "2021-02-04 13:58:59,911 - root - INFO - 29211000 ...\n",
      "2021-02-04 13:58:59,927 - root - INFO - done\n",
      "2021-02-04 13:58:59,928 - root - INFO - 2929211000 ...\n",
      "2021-02-04 13:58:59,933 - root - INFO - done\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section core\n",
      "Reading section c1\n",
      "Reading section c5\n",
      "Reading section c6\n",
      "Reading section c7\n",
      "Reading section c8\n",
      "Reading section c9\n",
      "Reading section c95\n",
      "Reading section c96\n",
      "Reading section c97\n",
      "Reading section c98\n",
      "Reading section c99\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-02-04 13:59:00,850 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n",
      "2021-02-04 13:59:00,851 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n",
      "2021-02-04 13:59:01,734 - root - INFO - Wrapping output....\n",
      "2021-02-04 13:59:01,826 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n"
     ]
    }
   ],
   "source": [
    "schema = 'imma1'\n",
    "\n",
    "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n",
    "\n",
    "data_raw = mdf_reader.read(data_file_path, data_model = schema)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c99</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99 0 NAN     NATIONAAL ARCHIEF OF THE NETHERLA...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99 0 PRO     PUBLIC RECORD OFFICE             ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99 0 NMM     NATIONAL MARITIME MUSEUM         ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 c99\n",
       "0  99 0 NAN     NATIONAAL ARCHIEF OF THE NETHERLA...\n",
       "1  99 0 PRO     PUBLIC RECORD OFFICE             ...\n",
       "2  99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...\n",
       "3  99 0 AGI     ARCHIVO GENERAL DE INDIAS        ...\n",
       "4  99 0 NMM     NATIONAL MARITIME MUSEUM         ..."
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_raw.data['c99'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'99 0 AGI     ARCHIVO GENERAL DE INDIAS                         SEVILLE   SPAIN                                                                                                               CORREOS, 193A R5              SPANISH                       0155100S               3481100E 1 2TENERIFE                                                                                                                                                                                                                                                           0 21776100112                          UNKNOWN        DILIGENCIA                    SPANISH FRAGATA CORREO                                   ANDR�S V�LEZ                  CAPITAN                                                                                                                                LA CORU�A                                    MONTEVIDEO                                        0                                                        17760816                     129.00                                                                                                                       UNKNOWN          -1                           NM     360 DEGREES                                                                                                                                                                                                 E                                                                                                                                                                              FRESQUITO                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   LLANA                                                                                                                                                                                                                                                                                                   00000000CLIWOC VERSION 1.0'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "line = data_raw.data['c99'].iloc[3].values[0]\n",
    "line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2401"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path_to_folder = '/Users/brivas/c3s_work/mdf_reader/data_models/lib/'\n",
    "model_name = 'imma1_d730'\n",
    "model_path = os.path.join(path_to_folder, model_name)\n",
    "model_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-02-04 13:59:01,877 - root - INFO - READING DATA MODEL SCHEMA FILE...\n",
      "2021-02-04 13:59:01,883 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n",
      "2021-02-04 13:59:01,884 - root - INFO - Getting data string from source...\n",
      "2021-02-04 13:59:01,893 - root - INFO - Extracting and reading sections\n",
      "2021-02-04 13:59:01,894 - root - INFO - Processing section partitioning threads\n",
      "2021-02-04 13:59:01,895 - root - INFO - 1000 ...\n",
      "2021-02-04 13:59:01,912 - root - INFO - done\n",
      "2021-02-04 13:59:01,912 - root - INFO - 211000 ...\n",
      "2021-02-04 13:59:01,926 - root - INFO - done\n",
      "2021-02-04 13:59:01,927 - root - INFO - 29211000 ...\n",
      "2021-02-04 13:59:01,940 - root - INFO - done\n",
      "2021-02-04 13:59:01,941 - root - INFO - 3029211000 ...\n",
      "2021-02-04 13:59:01,944 - root - INFO - done\n",
      "2021-02-04 13:59:01,945 - root - INFO - 303029211000 ...\n",
      "2021-02-04 13:59:01,948 - root - INFO - done\n",
      "2021-02-04 13:59:01,949 - root - INFO - 30303029211000 ...\n",
      "2021-02-04 13:59:01,953 - root - INFO - done\n",
      "2021-02-04 13:59:01,954 - root - INFO - 3030303029211000 ...\n",
      "2021-02-04 13:59:01,956 - root - INFO - done\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading section core\n",
      "Reading section c1\n",
      "Reading section c5\n",
      "Reading section c6\n",
      "Reading section c7\n",
      "Reading section c8\n",
      "Reading section c9\n",
      "Reading section c95\n",
      "Reading section c96\n",
      "Reading section c97\n",
      "Reading section c98\n",
      "Reading section c99_sentinal\n",
      "Reading section c99_logbook\n",
      "Reading section c99_voyage\n",
      "Reading section c99_data\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-02-04 13:59:03,193 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n",
      "2021-02-04 13:59:03,194 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n",
      "2021-02-04 13:59:11,706 - root - INFO - Wrapping output....\n",
      "2021-02-04 13:59:11,814 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n"
     ]
    }
   ],
   "source": [
    "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n",
    "\n",
    "data = mdf_reader.read(data_file_path, data_model_path= model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">c99_sentinal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>ATTI</th>\n",
       "      <th>ATTL</th>\n",
       "      <th>BLK</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  c99_sentinal          \n",
       "          ATTI ATTL  BLK\n",
       "0           99    0  NaN\n",
       "1           99    0  NaN\n",
       "2           99    0  NaN\n",
       "3           99    0  NaN\n",
       "4           99    0  NaN"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "data.data[[\"c99_sentinal\"]].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>InstAbbr</th>\n",
       "      <th>InsName</th>\n",
       "      <th>InsPlace</th>\n",
       "      <th>InsLand</th>\n",
       "      <th>No_data_entry</th>\n",
       "      <th>NameArchiveSet</th>\n",
       "      <th>ArchivePart</th>\n",
       "      <th>Specification</th>\n",
       "      <th>Logbook_id</th>\n",
       "      <th>Logbook_language</th>\n",
       "      <th>Image_No</th>\n",
       "      <th>Illustr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>21</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>AGI</td>\n",
       "      <td>ARCHIVO GENERAL DE INDIAS</td>\n",
       "      <td>SEVILLE</td>\n",
       "      <td>SPAIN</td>\n",
       "      <td>1.04.02</td>\n",
       "      <td>152</td>\n",
       "      <td>VOC</td>\n",
       "      <td>5137</td>\n",
       "      <td>COTE - 4/JJ/39</td>\n",
       "      <td>SPANISH</td>\n",
       "      <td>VOC_152_5137_0072</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>154</td>\n",
       "      <td>154</td>\n",
       "      <td>154</td>\n",
       "      <td>154</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>31</td>\n",
       "      <td>154</td>\n",
       "      <td>7</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       InstAbbr                    InsName InsPlace InsLand No_data_entry  \\\n",
       "count       273                        273      273     273            21   \n",
       "unique        5                          5        5       4             1   \n",
       "top         AGI  ARCHIVO GENERAL DE INDIAS  SEVILLE   SPAIN       1.04.02   \n",
       "freq        154                        154      154     154            21   \n",
       "\n",
       "       NameArchiveSet ArchivePart Specification      Logbook_id  \\\n",
       "count              21          21            21             273   \n",
       "unique              1           1             1              17   \n",
       "top               152         VOC          5137  COTE - 4/JJ/39   \n",
       "freq               21          21            21              31   \n",
       "\n",
       "       Logbook_language           Image_No Illustr  \n",
       "count               273                 21     273  \n",
       "unique                4                  5       1  \n",
       "top             SPANISH  VOC_152_5137_0072       0  \n",
       "freq                154                  7     273  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_logbook\"]].c99_logbook.describe(include = 'all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>drLatDeg</th>\n",
       "      <th>drLatMin</th>\n",
       "      <th>drLatSec</th>\n",
       "      <th>drLatHem</th>\n",
       "      <th>drLonDeg</th>\n",
       "      <th>drLonMin</th>\n",
       "      <th>drLonSec</th>\n",
       "      <th>drLonHem</th>\n",
       "      <th>LatDeg</th>\n",
       "      <th>LatMin</th>\n",
       "      <th>LatSec</th>\n",
       "      <th>LatHem</th>\n",
       "      <th>LonDeg</th>\n",
       "      <th>LonMin</th>\n",
       "      <th>LonSec</th>\n",
       "      <th>LonHem</th>\n",
       "      <th>LatInd</th>\n",
       "      <th>LonInd</th>\n",
       "      <th>ZeroMeridian</th>\n",
       "      <th>LMname1</th>\n",
       "      <th>LMdirection1</th>\n",
       "      <th>LMdistance1</th>\n",
       "      <th>LMname2</th>\n",
       "      <th>LMdirection2</th>\n",
       "      <th>LMdistance2</th>\n",
       "      <th>LMname3</th>\n",
       "      <th>LMdirection3</th>\n",
       "      <th>LMdistance4</th>\n",
       "      <th>PosCoastal</th>\n",
       "      <th>Calendar_type</th>\n",
       "      <th>logbook_date</th>\n",
       "      <th>TimeOB</th>\n",
       "      <th>Day_of_the_week</th>\n",
       "      <th>PartDay</th>\n",
       "      <th>Watch</th>\n",
       "      <th>Glasses</th>\n",
       "      <th>Start_day</th>\n",
       "      <th>ShipName</th>\n",
       "      <th>Nationality</th>\n",
       "      <th>Ship_type</th>\n",
       "      <th>Company</th>\n",
       "      <th>Name1</th>\n",
       "      <th>Rank1</th>\n",
       "      <th>Name2</th>\n",
       "      <th>Rank2</th>\n",
       "      <th>Name3</th>\n",
       "      <th>Rank3</th>\n",
       "      <th>voyage_from</th>\n",
       "      <th>voyage_to</th>\n",
       "      <th>Anchored_ind</th>\n",
       "      <th>AnchorPlace</th>\n",
       "      <th>DASno</th>\n",
       "      <th>VoyageIni</th>\n",
       "      <th>Course_ship</th>\n",
       "      <th>Ship_speed</th>\n",
       "      <th>Distance</th>\n",
       "      <th>EncName</th>\n",
       "      <th>EncNat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>208.000000</td>\n",
       "      <td>208.000000</td>\n",
       "      <td>208.0</td>\n",
       "      <td>208</td>\n",
       "      <td>90.000000</td>\n",
       "      <td>94.000000</td>\n",
       "      <td>94.0</td>\n",
       "      <td>94</td>\n",
       "      <td>74.000000</td>\n",
       "      <td>74.000000</td>\n",
       "      <td>74.0</td>\n",
       "      <td>74</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>172.000000</td>\n",
       "      <td>172.0</td>\n",
       "      <td>172</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>66</td>\n",
       "      <td>65</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273.0</td>\n",
       "      <td>65</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>21.0</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>199</td>\n",
       "      <td>86</td>\n",
       "      <td>213</td>\n",
       "      <td>252</td>\n",
       "      <td>48</td>\n",
       "      <td>48</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>273</td>\n",
       "      <td>267</td>\n",
       "      <td>273</td>\n",
       "      <td>3</td>\n",
       "      <td>21</td>\n",
       "      <td>273</td>\n",
       "      <td>84</td>\n",
       "      <td>0.0</td>\n",
       "      <td>231</td>\n",
       "      <td>13</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>18</td>\n",
       "      <td>61</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>31</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>17</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "      <td>0.0</td>\n",
       "      <td>142</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>TENERIFE</td>\n",
       "      <td>SANDY HOOK</td>\n",
       "      <td>N18:30E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DEN SWARTE PAGOOD</td>\n",
       "      <td>WTN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1776-10-04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>THURSDAY</td>\n",
       "      <td>3</td>\n",
       "      <td>VM</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>EL PIZARRO</td>\n",
       "      <td>SPANISH</td>\n",
       "      <td>FRAGATA CORREO</td>\n",
       "      <td>RN</td>\n",
       "      <td>DE BEGUE</td>\n",
       "      <td>CAPITAN</td>\n",
       "      <td>ASMUS HENDRIK STERRENBERG</td>\n",
       "      <td>SCHIPPER</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LA CORU�A</td>\n",
       "      <td>LA CORU�A</td>\n",
       "      <td>0</td>\n",
       "      <td>MONTEVIDEO</td>\n",
       "      <td>4259.3</td>\n",
       "      <td>1776-10-03</td>\n",
       "      <td>EBS1/4S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>123.00</td>\n",
       "      <td>ACTIVE, FALCON</td>\n",
       "      <td>BRITISH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>150</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>67</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>139</td>\n",
       "      <td>189</td>\n",
       "      <td>132</td>\n",
       "      <td>169</td>\n",
       "      <td>21</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>267</td>\n",
       "      <td>273</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>208</td>\n",
       "      <td>31</td>\n",
       "      <td>154</td>\n",
       "      <td>75</td>\n",
       "      <td>60</td>\n",
       "      <td>31</td>\n",
       "      <td>154</td>\n",
       "      <td>21</td>\n",
       "      <td>21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75</td>\n",
       "      <td>79</td>\n",
       "      <td>270</td>\n",
       "      <td>2</td>\n",
       "      <td>21</td>\n",
       "      <td>44</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>33.730770</td>\n",
       "      <td>28.971153</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>63.633335</td>\n",
       "      <td>27.691490</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.162163</td>\n",
       "      <td>27.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>253.467453</td>\n",
       "      <td>29.674419</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.467743</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.166976</td>\n",
       "      <td>17.226234</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>85.771774</td>\n",
       "      <td>15.633592</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.613508</td>\n",
       "      <td>17.315168</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>136.184052</td>\n",
       "      <td>17.611095</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.393318</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>12.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>27.750000</td>\n",
       "      <td>14.750000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>12.250000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>71.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.250000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>35.000000</td>\n",
       "      <td>29.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.000000</td>\n",
       "      <td>26.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>328.000000</td>\n",
       "      <td>31.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>47.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>40.250000</td>\n",
       "      <td>44.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>102.750000</td>\n",
       "      <td>39.750000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.000000</td>\n",
       "      <td>39.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>345.000000</td>\n",
       "      <td>44.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>49.000000</td>\n",
       "      <td>59.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>346.000000</td>\n",
       "      <td>59.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>59.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>359.000000</td>\n",
       "      <td>59.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          drLatDeg    drLatMin  drLatSec drLatHem    drLonDeg   drLonMin  \\\n",
       "count   208.000000  208.000000     208.0      208   90.000000  94.000000   \n",
       "unique         NaN         NaN       NaN        2         NaN        NaN   \n",
       "top            NaN         NaN       NaN        N         NaN        NaN   \n",
       "freq           NaN         NaN       NaN      150         NaN        NaN   \n",
       "mean     33.730770   28.971153       0.0      NaN   63.633335  27.691490   \n",
       "std       9.166976   17.226234       0.0      NaN   85.771774  15.633592   \n",
       "min      12.000000    0.000000       0.0      NaN    1.000000   0.000000   \n",
       "25%      27.750000   14.750000       0.0      NaN    3.000000  16.000000   \n",
       "50%      35.000000   29.500000       0.0      NaN   52.000000  26.500000   \n",
       "75%      40.250000   44.000000       0.0      NaN  102.750000  39.750000   \n",
       "max      49.000000   59.000000       0.0      NaN  346.000000  59.000000   \n",
       "\n",
       "        drLonSec drLonHem     LatDeg     LatMin  LatSec LatHem      LonDeg  \\\n",
       "count       94.0       94  74.000000  74.000000    74.0     74  169.000000   \n",
       "unique       NaN        2        NaN        NaN     NaN      1         NaN   \n",
       "top          NaN        E        NaN        NaN     NaN      N         NaN   \n",
       "freq         NaN       67        NaN        NaN     NaN     74         NaN   \n",
       "mean         0.0      NaN  28.162163  27.500000     0.0    NaN  253.467453   \n",
       "std          0.0      NaN   8.613508  17.315168     0.0    NaN  136.184052   \n",
       "min          0.0      NaN  17.000000   0.000000     0.0    NaN    1.000000   \n",
       "25%          0.0      NaN  20.000000  12.250000     0.0    NaN   71.000000   \n",
       "50%          0.0      NaN  29.000000  29.000000     0.0    NaN  328.000000   \n",
       "75%          0.0      NaN  34.000000  39.500000     0.0    NaN  345.000000   \n",
       "max          0.0      NaN  49.000000  59.000000     0.0    NaN  359.000000   \n",
       "\n",
       "            LonMin  LonSec LonHem LatInd LonInd ZeroMeridian     LMname1  \\\n",
       "count   172.000000   172.0    172    273    273          273          66   \n",
       "unique         NaN     NaN      2      4      5           12          18   \n",
       "top            NaN     NaN      E      1      2     TENERIFE  SANDY HOOK   \n",
       "freq           NaN     NaN    139    189    132          169          21   \n",
       "mean     29.674419     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "std      17.611095     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "min       0.000000     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "25%      14.000000     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "50%      31.000000     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "75%      44.500000     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "max      59.000000     0.0    NaN    NaN    NaN          NaN         NaN   \n",
       "\n",
       "       LMdirection1  LMdistance1            LMname2 LMdirection2  LMdistance2  \\\n",
       "count            65    62.000000                  2            2     2.000000   \n",
       "unique           61          NaN                  2            2          NaN   \n",
       "top         N18:30E          NaN  DEN SWARTE PAGOOD          WTN          NaN   \n",
       "freq              2          NaN                  1            1          NaN   \n",
       "mean            NaN    70.467743                NaN          NaN     7.500000   \n",
       "std             NaN    69.393318                NaN          NaN     3.535534   \n",
       "min             NaN     3.000000                NaN          NaN     5.000000   \n",
       "25%             NaN    20.750000                NaN          NaN     6.250000   \n",
       "50%             NaN    47.000000                NaN          NaN     7.500000   \n",
       "75%             NaN    96.750000                NaN          NaN     8.750000   \n",
       "max             NaN   338.000000                NaN          NaN    10.000000   \n",
       "\n",
       "        LMname3  LMdirection3  LMdistance4 PosCoastal Calendar_type  \\\n",
       "count       0.0           0.0          0.0        273           273   \n",
       "unique      0.0           0.0          NaN          2             1   \n",
       "top         NaN           NaN          NaN          0             2   \n",
       "freq        NaN           NaN          NaN        267           273   \n",
       "mean        NaN           NaN          NaN        NaN           NaN   \n",
       "std         NaN           NaN          NaN        NaN           NaN   \n",
       "min         NaN           NaN          NaN        NaN           NaN   \n",
       "25%         NaN           NaN          NaN        NaN           NaN   \n",
       "50%         NaN           NaN          NaN        NaN           NaN   \n",
       "75%         NaN           NaN          NaN        NaN           NaN   \n",
       "max         NaN           NaN          NaN        NaN           NaN   \n",
       "\n",
       "       logbook_date  TimeOB Day_of_the_week PartDay Watch  Glasses Start_day  \\\n",
       "count           273   273.0              65      21    21     21.0       273   \n",
       "unique           31     NaN               7       1     1      NaN         2   \n",
       "top      1776-10-04     NaN        THURSDAY       3    VM      NaN   UNKNOWN   \n",
       "freq             12     NaN              11      21    21      NaN       208   \n",
       "mean            NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "std             NaN     0.0             NaN     NaN   NaN      0.0       NaN   \n",
       "min             NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "25%             NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "50%             NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "75%             NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "max             NaN    12.0             NaN     NaN   NaN      8.0       NaN   \n",
       "\n",
       "          ShipName Nationality       Ship_type Company     Name1    Rank1  \\\n",
       "count          273         273             199      86       213      252   \n",
       "unique          17           4               6       3        15        4   \n",
       "top     EL PIZARRO     SPANISH  FRAGATA CORREO      RN  DE BEGUE  CAPITAN   \n",
       "freq            31         154              75      60        31      154   \n",
       "mean           NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "std            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "min            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "25%            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "50%            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "75%            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "max            NaN         NaN             NaN     NaN       NaN      NaN   \n",
       "\n",
       "                            Name2     Rank2  Name3  Rank3 voyage_from  \\\n",
       "count                          48        48    0.0    0.0         273   \n",
       "unique                          4         3    0.0    0.0          14   \n",
       "top     ASMUS HENDRIK STERRENBERG  SCHIPPER    NaN    NaN   LA CORU�A   \n",
       "freq                           21        21    NaN    NaN          75   \n",
       "mean                          NaN       NaN    NaN    NaN         NaN   \n",
       "std                           NaN       NaN    NaN    NaN         NaN   \n",
       "min                           NaN       NaN    NaN    NaN         NaN   \n",
       "25%                           NaN       NaN    NaN    NaN         NaN   \n",
       "50%                           NaN       NaN    NaN    NaN         NaN   \n",
       "75%                           NaN       NaN    NaN    NaN         NaN   \n",
       "max                           NaN       NaN    NaN    NaN         NaN   \n",
       "\n",
       "        voyage_to Anchored_ind AnchorPlace   DASno   VoyageIni Course_ship  \\\n",
       "count         267          273           3      21         273          84   \n",
       "unique          9            2           2       1          16          77   \n",
       "top     LA CORU�A            0  MONTEVIDEO  4259.3  1776-10-03     EBS1/4S   \n",
       "freq           79          270           2      21          44           3   \n",
       "mean          NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "std           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "min           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "25%           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "50%           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "75%           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "max           NaN          NaN         NaN     NaN         NaN         NaN   \n",
       "\n",
       "        Ship_speed Distance         EncName   EncNat  \n",
       "count          0.0      231              13       28  \n",
       "unique         0.0      142               3        1  \n",
       "top            NaN   123.00  ACTIVE, FALCON  BRITISH  \n",
       "freq           NaN        5               8       28  \n",
       "mean           NaN      NaN             NaN      NaN  \n",
       "std            NaN      NaN             NaN      NaN  \n",
       "min            NaN      NaN             NaN      NaN  \n",
       "25%            NaN      NaN             NaN      NaN  \n",
       "50%            NaN      NaN             NaN      NaN  \n",
       "75%            NaN      NaN             NaN      NaN  \n",
       "max            NaN      NaN             NaN      NaN  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "pd.options.display.max_columns = None\n",
    "data.data[[\"c99_voyage\"]].c99_voyage.describe(include = 'all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     TENERIFE\n",
       "1       LONDON\n",
       "2     TENERIFE\n",
       "3     TENERIFE\n",
       "4    GREENWICH\n",
       "Name: ZeroMeridian, dtype: object"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_voyage\"]].c99_voyage.ZeroMeridian.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "e.g. the ship types on this deck will be given in a tons of different languages. There is no code table for this variable in the CLIWOC website. \n",
    "\n",
    "Would this information will be enough (e.g. for things that Liz is doing)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2          PAQUEBOT\n",
       "3    FRAGATA CORREO\n",
       "4          6TH RATE\n",
       "5    FRAGATA CORREO\n",
       "6          6TH RATE\n",
       "Name: Ship_type, dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_voyage\"]].c99_voyage.Ship_type.dropna().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AT_reading_units</th>\n",
       "      <th>SST_reading_units</th>\n",
       "      <th>AP_reading_units</th>\n",
       "      <th>BART_reading_units</th>\n",
       "      <th>ReferenceCourse</th>\n",
       "      <th>ReferenceWindDirection</th>\n",
       "      <th>Decl</th>\n",
       "      <th>Distance_units</th>\n",
       "      <th>Distance_units_to_landmark</th>\n",
       "      <th>Distance_units_travelled</th>\n",
       "      <th>Longitude_units</th>\n",
       "      <th>units_of_measurement</th>\n",
       "      <th>humidity_units</th>\n",
       "      <th>water_at_pump_units</th>\n",
       "      <th>wind_scale</th>\n",
       "      <th>BARO_type</th>\n",
       "      <th>BARO_brand</th>\n",
       "      <th>API</th>\n",
       "      <th>Humidity_method</th>\n",
       "      <th>compas_error</th>\n",
       "      <th>compas_correction</th>\n",
       "      <th>AT_outside</th>\n",
       "      <th>SST</th>\n",
       "      <th>AP</th>\n",
       "      <th>wind_dir</th>\n",
       "      <th>current_dir</th>\n",
       "      <th>current_speed</th>\n",
       "      <th>attached_tem</th>\n",
       "      <th>pump_water</th>\n",
       "      <th>Humidity</th>\n",
       "      <th>wind_force</th>\n",
       "      <th>weather</th>\n",
       "      <th>prcp_descriptor</th>\n",
       "      <th>sea_state</th>\n",
       "      <th>shape_coulds</th>\n",
       "      <th>dir_coulds</th>\n",
       "      <th>Clearness</th>\n",
       "      <th>cloud_fraction</th>\n",
       "      <th>gusts</th>\n",
       "      <th>Rain</th>\n",
       "      <th>Fog</th>\n",
       "      <th>Snow</th>\n",
       "      <th>Thunder</th>\n",
       "      <th>Hail</th>\n",
       "      <th>Sea_ice</th>\n",
       "      <th>Trivial_correction</th>\n",
       "      <th>Release</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>84</td>\n",
       "      <td>271</td>\n",
       "      <td>273</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>231</td>\n",
       "      <td>266</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>271</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>223</td>\n",
       "      <td>105</td>\n",
       "      <td>16</td>\n",
       "      <td>174</td>\n",
       "      <td>19</td>\n",
       "      <td>0.0</td>\n",
       "      <td>47</td>\n",
       "      <td>0.0</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>130</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>74</td>\n",
       "      <td>48</td>\n",
       "      <td>8</td>\n",
       "      <td>51</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>UNKNOWN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LEAGUES</td>\n",
       "      <td>NM</td>\n",
       "      <td>360 DEGREES</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E</td>\n",
       "      <td>SOUTHERLY</td>\n",
       "      <td>STRONG</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FRESQUITO</td>\n",
       "      <td>HORIZONTES CARGADOS</td>\n",
       "      <td>RAIN</td>\n",
       "      <td>LLANA</td>\n",
       "      <td>OVERDRIJVENDE LUCHT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CLOUDY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>CLIWOC VERSION 2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "      <td>271</td>\n",
       "      <td>33</td>\n",
       "      <td>NaN</td>\n",
       "      <td>61</td>\n",
       "      <td>85</td>\n",
       "      <td>180</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19</td>\n",
       "      <td>30</td>\n",
       "      <td>6</td>\n",
       "      <td>48</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26</td>\n",
       "      <td>NaN</td>\n",
       "      <td>269</td>\n",
       "      <td>252</td>\n",
       "      <td>270</td>\n",
       "      <td>273</td>\n",
       "      <td>265</td>\n",
       "      <td>272</td>\n",
       "      <td>273</td>\n",
       "      <td>273</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        AT_reading_units  SST_reading_units  AP_reading_units  \\\n",
       "count                0.0                0.0               0.0   \n",
       "unique               0.0                0.0               0.0   \n",
       "top                  NaN                NaN               NaN   \n",
       "freq                 NaN                NaN               NaN   \n",
       "mean                 NaN                NaN               NaN   \n",
       "std                  NaN                NaN               NaN   \n",
       "min                  NaN                NaN               NaN   \n",
       "25%                  NaN                NaN               NaN   \n",
       "50%                  NaN                NaN               NaN   \n",
       "75%                  NaN                NaN               NaN   \n",
       "max                  NaN                NaN               NaN   \n",
       "\n",
       "        BART_reading_units ReferenceCourse ReferenceWindDirection Decl  \\\n",
       "count                  0.0              84                    271  273   \n",
       "unique                 0.0               1                      1   33   \n",
       "top                    NaN         UNKNOWN                UNKNOWN    0   \n",
       "freq                   NaN              84                    271   33   \n",
       "mean                   NaN             NaN                    NaN  NaN   \n",
       "std                    NaN             NaN                    NaN  NaN   \n",
       "min                    NaN             NaN                    NaN  NaN   \n",
       "25%                    NaN             NaN                    NaN  NaN   \n",
       "50%                    NaN             NaN                    NaN  NaN   \n",
       "75%                    NaN             NaN                    NaN  NaN   \n",
       "max                    NaN             NaN                    NaN  NaN   \n",
       "\n",
       "        Distance_units Distance_units_to_landmark Distance_units_travelled  \\\n",
       "count              0.0                         63                      231   \n",
       "unique             0.0                          2                        5   \n",
       "top                NaN                    LEAGUES                       NM   \n",
       "freq               NaN                         61                       85   \n",
       "mean               NaN                        NaN                      NaN   \n",
       "std                NaN                        NaN                      NaN   \n",
       "min                NaN                        NaN                      NaN   \n",
       "25%                NaN                        NaN                      NaN   \n",
       "50%                NaN                        NaN                      NaN   \n",
       "75%                NaN                        NaN                      NaN   \n",
       "max                NaN                        NaN                      NaN   \n",
       "\n",
       "       Longitude_units  units_of_measurement  humidity_units  \\\n",
       "count              266                   0.0             0.0   \n",
       "unique               3                   0.0             0.0   \n",
       "top        360 DEGREES                   NaN             NaN   \n",
       "freq               180                   NaN             NaN   \n",
       "mean               NaN                   NaN             NaN   \n",
       "std                NaN                   NaN             NaN   \n",
       "min                NaN                   NaN             NaN   \n",
       "25%                NaN                   NaN             NaN   \n",
       "50%                NaN                   NaN             NaN   \n",
       "75%                NaN                   NaN             NaN   \n",
       "max                NaN                   NaN             NaN   \n",
       "\n",
       "        water_at_pump_units  wind_scale  BARO_type  BARO_brand  API  \\\n",
       "count                   0.0         0.0        0.0         0.0  0.0   \n",
       "unique                  0.0         0.0        0.0         0.0  0.0   \n",
       "top                     NaN         NaN        NaN         NaN  NaN   \n",
       "freq                    NaN         NaN        NaN         NaN  NaN   \n",
       "mean                    NaN         NaN        NaN         NaN  NaN   \n",
       "std                     NaN         NaN        NaN         NaN  NaN   \n",
       "min                     NaN         NaN        NaN         NaN  NaN   \n",
       "25%                     NaN         NaN        NaN         NaN  NaN   \n",
       "50%                     NaN         NaN        NaN         NaN  NaN   \n",
       "75%                     NaN         NaN        NaN         NaN  NaN   \n",
       "max                     NaN         NaN        NaN         NaN  NaN   \n",
       "\n",
       "        Humidity_method  compas_error  compas_correction  AT_outside  SST  \\\n",
       "count               0.0           0.0                0.0         0.0  0.0   \n",
       "unique              0.0           0.0                0.0         NaN  NaN   \n",
       "top                 NaN           NaN                NaN         NaN  NaN   \n",
       "freq                NaN           NaN                NaN         NaN  NaN   \n",
       "mean                NaN           NaN                NaN         NaN  NaN   \n",
       "std                 NaN           NaN                NaN         NaN  NaN   \n",
       "min                 NaN           NaN                NaN         NaN  NaN   \n",
       "25%                 NaN           NaN                NaN         NaN  NaN   \n",
       "50%                 NaN           NaN                NaN         NaN  NaN   \n",
       "75%                 NaN           NaN                NaN         NaN  NaN   \n",
       "max                 NaN           NaN                NaN         NaN  NaN   \n",
       "\n",
       "         AP wind_dir current_dir current_speed  attached_tem  pump_water  \\\n",
       "count   0.0      271           1             1           0.0         0.0   \n",
       "unique  0.0      130           1             1           NaN         0.0   \n",
       "top     NaN        E   SOUTHERLY        STRONG           NaN         NaN   \n",
       "freq    NaN       20           1             1           NaN         NaN   \n",
       "mean    NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "std     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "min     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "25%     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "50%     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "75%     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "max     NaN      NaN         NaN           NaN           NaN         NaN   \n",
       "\n",
       "        Humidity wind_force              weather prcp_descriptor sea_state  \\\n",
       "count        0.0        223                  105              16       174   \n",
       "unique       0.0         74                   48               8        51   \n",
       "top          NaN  FRESQUITO  HORIZONTES CARGADOS            RAIN     LLANA   \n",
       "freq         NaN         19                   30               6        48   \n",
       "mean         NaN        NaN                  NaN             NaN       NaN   \n",
       "std          NaN        NaN                  NaN             NaN       NaN   \n",
       "min          NaN        NaN                  NaN             NaN       NaN   \n",
       "25%          NaN        NaN                  NaN             NaN       NaN   \n",
       "50%          NaN        NaN                  NaN             NaN       NaN   \n",
       "75%          NaN        NaN                  NaN             NaN       NaN   \n",
       "max          NaN        NaN                  NaN             NaN       NaN   \n",
       "\n",
       "               shape_coulds  dir_coulds Clearness  cloud_fraction gusts Rain  \\\n",
       "count                    19         0.0        47             0.0   273  273   \n",
       "unique                    7         0.0         9             0.0     2    2   \n",
       "top     OVERDRIJVENDE LUCHT         NaN    CLOUDY             NaN     0    0   \n",
       "freq                     10         NaN        26             NaN   269  252   \n",
       "mean                    NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "std                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "min                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "25%                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "50%                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "75%                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "max                     NaN         NaN       NaN             NaN   NaN  NaN   \n",
       "\n",
       "        Fog Snow Thunder Hail Sea_ice Trivial_correction             Release  \n",
       "count   273  273     273  273     273                273                 273  \n",
       "unique    2    1       2    2       1                  1                   3  \n",
       "top       0    0       0    0       0                  0  CLIWOC VERSION 2.0  \n",
       "freq    270  273     265  272     273                273                 156  \n",
       "mean    NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "std     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "min     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "25%     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "50%     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "75%     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  \n",
       "max     NaN  NaN     NaN  NaN     NaN                NaN                 NaN  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_data\"]].c99_data.describe(include = 'all')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What about the different scales for the wind force, given different languages?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    SLAPPE-LABBER BRAMZEILSKOELTE\n",
       "1    PLEASANT BREEZE, FRESH BREEZE\n",
       "2                       FRESCACH�N\n",
       "3                        FRESQUITO\n",
       "4                         MODERATE\n",
       "Name: wind_force, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.data[[\"c99_data\"]].c99_data.wind_force.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For some of this data there are some code tables but according to the mdf reader we need a single code table. \n",
    "\n",
    "\n",
    "We would have to make a nested .json file I think ... with the main key as the language. More ideas on how to do this are welcome."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I have done some of the easy key tables including the Meridian one. You can find those [under  ICOADS.C99.Variable.json](https://git.noc.ac.uk/brecinosrivas/mdf_reader/-/tree/master/data_models/lib/imma1_d730/code_tables)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}