{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Generating a data model for CLIWOC" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-02-04 13:58:59,837 - root - INFO - init basic configure of logging success\n" ] } ], "source": [ "import os\n", "import sys\n", "sys.path.append('/Users/brivas/c3s_work')\n", "import mdf_reader\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-02-04 13:58:59,859 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", "2021-02-04 13:58:59,865 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n", "2021-02-04 13:58:59,866 - root - INFO - Getting data string from source...\n", "2021-02-04 13:58:59,878 - root - INFO - Extracting and reading sections\n", "2021-02-04 13:58:59,879 - root - INFO - Processing section partitioning threads\n", "2021-02-04 13:58:59,879 - root - INFO - 1000 ...\n", "2021-02-04 13:58:59,895 - root - INFO - done\n", "2021-02-04 13:58:59,896 - root - INFO - 211000 ...\n", "2021-02-04 13:58:59,911 - root - INFO - done\n", "2021-02-04 13:58:59,911 - root - INFO - 29211000 ...\n", "2021-02-04 13:58:59,927 - root - INFO - done\n", "2021-02-04 13:58:59,928 - root - INFO - 2929211000 ...\n", "2021-02-04 13:58:59,933 - root - INFO - done\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section core\n", "Reading section c1\n", "Reading section c5\n", "Reading section c6\n", "Reading section c7\n", "Reading section c8\n", "Reading section c9\n", "Reading section c95\n", "Reading section c96\n", "Reading section c97\n", "Reading section c98\n", "Reading section c99\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-02-04 13:59:00,850 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n", "2021-02-04 13:59:00,851 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", "2021-02-04 13:59:01,734 - root - INFO - Wrapping output....\n", "2021-02-04 13:59:01,826 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], "source": [ "schema = 'imma1'\n", "\n", "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n", "\n", "data_raw = mdf_reader.read(data_file_path, data_model = schema)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>c99</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>99 0 NAN NATIONAAL ARCHIEF OF THE NETHERLA...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>99 0 PRO PUBLIC RECORD OFFICE ...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>99 0 AGI ARCHIVO GENERAL DE INDIAS ...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>99 0 AGI ARCHIVO GENERAL DE INDIAS ...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>99 0 NMM NATIONAL MARITIME MUSEUM ...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " c99\n", "0 99 0 NAN NATIONAAL ARCHIEF OF THE NETHERLA...\n", "1 99 0 PRO PUBLIC RECORD OFFICE ...\n", "2 99 0 AGI ARCHIVO GENERAL DE INDIAS ...\n", "3 99 0 AGI ARCHIVO GENERAL DE INDIAS ...\n", "4 99 0 NMM NATIONAL MARITIME MUSEUM ..." ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_raw.data['c99'].head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'99 0 AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN CORREOS, 193A R5 SPANISH 0155100S 3481100E 1 2TENERIFE 0 21776100112 UNKNOWN DILIGENCIA SPANISH FRAGATA CORREO ANDR�S V�LEZ CAPITAN LA CORU�A MONTEVIDEO 0 17760816 129.00 UNKNOWN -1 NM 360 DEGREES E FRESQUITO LLANA 00000000CLIWOC VERSION 1.0'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line = data_raw.data['c99'].iloc[3].values[0]\n", "line" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2401" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(line)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path_to_folder = '/Users/brivas/c3s_work/mdf_reader/data_models/lib/'\n", "model_name = 'imma1_d730'\n", "model_path = os.path.join(path_to_folder, model_name)\n", "model_path" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-02-04 13:59:01,877 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", "2021-02-04 13:59:01,883 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n", "2021-02-04 13:59:01,884 - root - INFO - Getting data string from source...\n", "2021-02-04 13:59:01,893 - root - INFO - Extracting and reading sections\n", "2021-02-04 13:59:01,894 - root - INFO - Processing section partitioning threads\n", "2021-02-04 13:59:01,895 - root - INFO - 1000 ...\n", "2021-02-04 13:59:01,912 - root - INFO - done\n", "2021-02-04 13:59:01,912 - root - INFO - 211000 ...\n", "2021-02-04 13:59:01,926 - root - INFO - done\n", "2021-02-04 13:59:01,927 - root - INFO - 29211000 ...\n", "2021-02-04 13:59:01,940 - root - INFO - done\n", "2021-02-04 13:59:01,941 - root - INFO - 3029211000 ...\n", "2021-02-04 13:59:01,944 - root - INFO - done\n", "2021-02-04 13:59:01,945 - root - INFO - 303029211000 ...\n", "2021-02-04 13:59:01,948 - root - INFO - done\n", "2021-02-04 13:59:01,949 - root - INFO - 30303029211000 ...\n", "2021-02-04 13:59:01,953 - root - INFO - done\n", "2021-02-04 13:59:01,954 - root - INFO - 3030303029211000 ...\n", "2021-02-04 13:59:01,956 - root - INFO - done\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Reading section core\n", "Reading section c1\n", "Reading section c5\n", "Reading section c6\n", "Reading section c7\n", "Reading section c8\n", "Reading section c9\n", "Reading section c95\n", "Reading section c96\n", "Reading section c97\n", "Reading section c98\n", "Reading section c99_sentinal\n", "Reading section c99_logbook\n", "Reading section c99_voyage\n", "Reading section c99_data\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2021-02-04 13:59:03,193 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n", "2021-02-04 13:59:03,194 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", "2021-02-04 13:59:11,706 - root - INFO - Wrapping output....\n", "2021-02-04 13:59:11,814 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], "source": [ "data_file_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/133-730_1776-10_subset.imma'\n", "\n", "data = mdf_reader.read(data_file_path, data_model_path= model_path)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th></th>\n", " <th colspan=\"3\" halign=\"left\">c99_sentinal</th>\n", " </tr>\n", " <tr>\n", " <th></th>\n", " <th>ATTI</th>\n", " <th>ATTL</th>\n", " <th>BLK</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>99</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>99</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>99</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>99</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>99</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " c99_sentinal \n", " ATTI ATTL BLK\n", "0 99 0 NaN\n", "1 99 0 NaN\n", "2 99 0 NaN\n", "3 99 0 NaN\n", "4 99 0 NaN" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data.data[[\"c99_sentinal\"]].head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>InstAbbr</th>\n", " <th>InsName</th>\n", " <th>InsPlace</th>\n", " <th>InsLand</th>\n", " <th>No_data_entry</th>\n", " <th>NameArchiveSet</th>\n", " <th>ArchivePart</th>\n", " <th>Specification</th>\n", " <th>Logbook_id</th>\n", " <th>Logbook_language</th>\n", " <th>Image_No</th>\n", " <th>Illustr</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>21</td>\n", " <td>273</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>5</td>\n", " <td>5</td>\n", " <td>5</td>\n", " <td>4</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>17</td>\n", " <td>4</td>\n", " <td>5</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>AGI</td>\n", " <td>ARCHIVO GENERAL DE INDIAS</td>\n", " <td>SEVILLE</td>\n", " <td>SPAIN</td>\n", " <td>1.04.02</td>\n", " <td>152</td>\n", " <td>VOC</td>\n", " <td>5137</td>\n", " <td>COTE - 4/JJ/39</td>\n", " <td>SPANISH</td>\n", " <td>VOC_152_5137_0072</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>154</td>\n", " <td>154</td>\n", " <td>154</td>\n", " <td>154</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>31</td>\n", " <td>154</td>\n", " <td>7</td>\n", " <td>273</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " InstAbbr InsName InsPlace InsLand No_data_entry \\\n", "count 273 273 273 273 21 \n", "unique 5 5 5 4 1 \n", "top AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN 1.04.02 \n", "freq 154 154 154 154 21 \n", "\n", " NameArchiveSet ArchivePart Specification Logbook_id \\\n", "count 21 21 21 273 \n", "unique 1 1 1 17 \n", "top 152 VOC 5137 COTE - 4/JJ/39 \n", "freq 21 21 21 31 \n", "\n", " Logbook_language Image_No Illustr \n", "count 273 21 273 \n", "unique 4 5 1 \n", "top SPANISH VOC_152_5137_0072 0 \n", "freq 154 7 273 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_logbook\"]].c99_logbook.describe(include = 'all')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>drLatDeg</th>\n", " <th>drLatMin</th>\n", " <th>drLatSec</th>\n", " <th>drLatHem</th>\n", " <th>drLonDeg</th>\n", " <th>drLonMin</th>\n", " <th>drLonSec</th>\n", " <th>drLonHem</th>\n", " <th>LatDeg</th>\n", " <th>LatMin</th>\n", " <th>LatSec</th>\n", " <th>LatHem</th>\n", " <th>LonDeg</th>\n", " <th>LonMin</th>\n", " <th>LonSec</th>\n", " <th>LonHem</th>\n", " <th>LatInd</th>\n", " <th>LonInd</th>\n", " <th>ZeroMeridian</th>\n", " <th>LMname1</th>\n", " <th>LMdirection1</th>\n", " <th>LMdistance1</th>\n", " <th>LMname2</th>\n", " <th>LMdirection2</th>\n", " <th>LMdistance2</th>\n", " <th>LMname3</th>\n", " <th>LMdirection3</th>\n", " <th>LMdistance4</th>\n", " <th>PosCoastal</th>\n", " <th>Calendar_type</th>\n", " <th>logbook_date</th>\n", " <th>TimeOB</th>\n", " <th>Day_of_the_week</th>\n", " <th>PartDay</th>\n", " <th>Watch</th>\n", " <th>Glasses</th>\n", " <th>Start_day</th>\n", " <th>ShipName</th>\n", " <th>Nationality</th>\n", " <th>Ship_type</th>\n", " <th>Company</th>\n", " <th>Name1</th>\n", " <th>Rank1</th>\n", " <th>Name2</th>\n", " <th>Rank2</th>\n", " <th>Name3</th>\n", " <th>Rank3</th>\n", " <th>voyage_from</th>\n", " <th>voyage_to</th>\n", " <th>Anchored_ind</th>\n", " <th>AnchorPlace</th>\n", " <th>DASno</th>\n", " <th>VoyageIni</th>\n", " <th>Course_ship</th>\n", " <th>Ship_speed</th>\n", " <th>Distance</th>\n", " <th>EncName</th>\n", " <th>EncNat</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>208.000000</td>\n", " <td>208.000000</td>\n", " <td>208.0</td>\n", " <td>208</td>\n", " <td>90.000000</td>\n", " <td>94.000000</td>\n", " <td>94.0</td>\n", " <td>94</td>\n", " <td>74.000000</td>\n", " <td>74.000000</td>\n", " <td>74.0</td>\n", " <td>74</td>\n", " <td>169.000000</td>\n", " <td>172.000000</td>\n", " <td>172.0</td>\n", " <td>172</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>66</td>\n", " <td>65</td>\n", " <td>62.000000</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>2.000000</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273.0</td>\n", " <td>65</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>21.0</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>199</td>\n", " <td>86</td>\n", " <td>213</td>\n", " <td>252</td>\n", " <td>48</td>\n", " <td>48</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>273</td>\n", " <td>267</td>\n", " <td>273</td>\n", " <td>3</td>\n", " <td>21</td>\n", " <td>273</td>\n", " <td>84</td>\n", " <td>0.0</td>\n", " <td>231</td>\n", " <td>13</td>\n", " <td>28</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>1</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>4</td>\n", " <td>5</td>\n", " <td>12</td>\n", " <td>18</td>\n", " <td>61</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>31</td>\n", " <td>NaN</td>\n", " <td>7</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>17</td>\n", " <td>4</td>\n", " <td>6</td>\n", " <td>3</td>\n", " <td>15</td>\n", " <td>4</td>\n", " <td>4</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>14</td>\n", " <td>9</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>16</td>\n", " <td>77</td>\n", " <td>0.0</td>\n", " <td>142</td>\n", " <td>3</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>E</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>N</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>E</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>TENERIFE</td>\n", " <td>SANDY HOOK</td>\n", " <td>N18:30E</td>\n", " <td>NaN</td>\n", " <td>DEN SWARTE PAGOOD</td>\n", " <td>WTN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>1776-10-04</td>\n", " <td>NaN</td>\n", " <td>THURSDAY</td>\n", " <td>3</td>\n", " <td>VM</td>\n", " <td>NaN</td>\n", " <td>UNKNOWN</td>\n", " <td>EL PIZARRO</td>\n", " <td>SPANISH</td>\n", " <td>FRAGATA CORREO</td>\n", " <td>RN</td>\n", " <td>DE BEGUE</td>\n", " <td>CAPITAN</td>\n", " <td>ASMUS HENDRIK STERRENBERG</td>\n", " <td>SCHIPPER</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>LA CORU�A</td>\n", " <td>LA CORU�A</td>\n", " <td>0</td>\n", " <td>MONTEVIDEO</td>\n", " <td>4259.3</td>\n", " <td>1776-10-03</td>\n", " <td>EBS1/4S</td>\n", " <td>NaN</td>\n", " <td>123.00</td>\n", " <td>ACTIVE, FALCON</td>\n", " <td>BRITISH</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>150</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>67</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>74</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>139</td>\n", " <td>189</td>\n", " <td>132</td>\n", " <td>169</td>\n", " <td>21</td>\n", " <td>2</td>\n", " <td>NaN</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>267</td>\n", " <td>273</td>\n", " <td>12</td>\n", " <td>NaN</td>\n", " <td>11</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>NaN</td>\n", " <td>208</td>\n", " <td>31</td>\n", " <td>154</td>\n", " <td>75</td>\n", " <td>60</td>\n", " <td>31</td>\n", " <td>154</td>\n", " <td>21</td>\n", " <td>21</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>75</td>\n", " <td>79</td>\n", " <td>270</td>\n", " <td>2</td>\n", " <td>21</td>\n", " <td>44</td>\n", " <td>3</td>\n", " <td>NaN</td>\n", " <td>5</td>\n", " <td>8</td>\n", " <td>28</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>33.730770</td>\n", " <td>28.971153</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>63.633335</td>\n", " <td>27.691490</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>28.162163</td>\n", " <td>27.500000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>253.467453</td>\n", " <td>29.674419</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>70.467743</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>7.500000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>9.166976</td>\n", " <td>17.226234</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>85.771774</td>\n", " <td>15.633592</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>8.613508</td>\n", " <td>17.315168</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>136.184052</td>\n", " <td>17.611095</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>69.393318</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3.535534</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>12.000000</td>\n", " <td>0.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>1.000000</td>\n", " <td>0.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>17.000000</td>\n", " <td>0.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>1.000000</td>\n", " <td>0.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>5.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>27.750000</td>\n", " <td>14.750000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>3.000000</td>\n", " <td>16.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>20.000000</td>\n", " <td>12.250000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>71.000000</td>\n", " <td>14.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>20.750000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>6.250000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>35.000000</td>\n", " <td>29.500000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>52.000000</td>\n", " <td>26.500000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>29.000000</td>\n", " <td>29.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>328.000000</td>\n", " <td>31.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>47.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>7.500000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>40.250000</td>\n", " <td>44.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>102.750000</td>\n", " <td>39.750000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>34.000000</td>\n", " <td>39.500000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>345.000000</td>\n", " <td>44.500000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>96.750000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.750000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>49.000000</td>\n", " <td>59.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>346.000000</td>\n", " <td>59.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>49.000000</td>\n", " <td>59.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>359.000000</td>\n", " <td>59.000000</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>338.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>10.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>8.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " drLatDeg drLatMin drLatSec drLatHem drLonDeg drLonMin \\\n", "count 208.000000 208.000000 208.0 208 90.000000 94.000000 \n", "unique NaN NaN NaN 2 NaN NaN \n", "top NaN NaN NaN N NaN NaN \n", "freq NaN NaN NaN 150 NaN NaN \n", "mean 33.730770 28.971153 0.0 NaN 63.633335 27.691490 \n", "std 9.166976 17.226234 0.0 NaN 85.771774 15.633592 \n", "min 12.000000 0.000000 0.0 NaN 1.000000 0.000000 \n", "25% 27.750000 14.750000 0.0 NaN 3.000000 16.000000 \n", "50% 35.000000 29.500000 0.0 NaN 52.000000 26.500000 \n", "75% 40.250000 44.000000 0.0 NaN 102.750000 39.750000 \n", "max 49.000000 59.000000 0.0 NaN 346.000000 59.000000 \n", "\n", " drLonSec drLonHem LatDeg LatMin LatSec LatHem LonDeg \\\n", "count 94.0 94 74.000000 74.000000 74.0 74 169.000000 \n", "unique NaN 2 NaN NaN NaN 1 NaN \n", "top NaN E NaN NaN NaN N NaN \n", "freq NaN 67 NaN NaN NaN 74 NaN \n", "mean 0.0 NaN 28.162163 27.500000 0.0 NaN 253.467453 \n", "std 0.0 NaN 8.613508 17.315168 0.0 NaN 136.184052 \n", "min 0.0 NaN 17.000000 0.000000 0.0 NaN 1.000000 \n", "25% 0.0 NaN 20.000000 12.250000 0.0 NaN 71.000000 \n", "50% 0.0 NaN 29.000000 29.000000 0.0 NaN 328.000000 \n", "75% 0.0 NaN 34.000000 39.500000 0.0 NaN 345.000000 \n", "max 0.0 NaN 49.000000 59.000000 0.0 NaN 359.000000 \n", "\n", " LonMin LonSec LonHem LatInd LonInd ZeroMeridian LMname1 \\\n", "count 172.000000 172.0 172 273 273 273 66 \n", "unique NaN NaN 2 4 5 12 18 \n", "top NaN NaN E 1 2 TENERIFE SANDY HOOK \n", "freq NaN NaN 139 189 132 169 21 \n", "mean 29.674419 0.0 NaN NaN NaN NaN NaN \n", "std 17.611095 0.0 NaN NaN NaN NaN NaN \n", "min 0.000000 0.0 NaN NaN NaN NaN NaN \n", "25% 14.000000 0.0 NaN NaN NaN NaN NaN \n", "50% 31.000000 0.0 NaN NaN NaN NaN NaN \n", "75% 44.500000 0.0 NaN NaN NaN NaN NaN \n", "max 59.000000 0.0 NaN NaN NaN NaN NaN \n", "\n", " LMdirection1 LMdistance1 LMname2 LMdirection2 LMdistance2 \\\n", "count 65 62.000000 2 2 2.000000 \n", "unique 61 NaN 2 2 NaN \n", "top N18:30E NaN DEN SWARTE PAGOOD WTN NaN \n", "freq 2 NaN 1 1 NaN \n", "mean NaN 70.467743 NaN NaN 7.500000 \n", "std NaN 69.393318 NaN NaN 3.535534 \n", "min NaN 3.000000 NaN NaN 5.000000 \n", "25% NaN 20.750000 NaN NaN 6.250000 \n", "50% NaN 47.000000 NaN NaN 7.500000 \n", "75% NaN 96.750000 NaN NaN 8.750000 \n", "max NaN 338.000000 NaN NaN 10.000000 \n", "\n", " LMname3 LMdirection3 LMdistance4 PosCoastal Calendar_type \\\n", "count 0.0 0.0 0.0 273 273 \n", "unique 0.0 0.0 NaN 2 1 \n", "top NaN NaN NaN 0 2 \n", "freq NaN NaN NaN 267 273 \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " logbook_date TimeOB Day_of_the_week PartDay Watch Glasses Start_day \\\n", "count 273 273.0 65 21 21 21.0 273 \n", "unique 31 NaN 7 1 1 NaN 2 \n", "top 1776-10-04 NaN THURSDAY 3 VM NaN UNKNOWN \n", "freq 12 NaN 11 21 21 NaN 208 \n", "mean NaN 12.0 NaN NaN NaN 8.0 NaN \n", "std NaN 0.0 NaN NaN NaN 0.0 NaN \n", "min NaN 12.0 NaN NaN NaN 8.0 NaN \n", "25% NaN 12.0 NaN NaN NaN 8.0 NaN \n", "50% NaN 12.0 NaN NaN NaN 8.0 NaN \n", "75% NaN 12.0 NaN NaN NaN 8.0 NaN \n", "max NaN 12.0 NaN NaN NaN 8.0 NaN \n", "\n", " ShipName Nationality Ship_type Company Name1 Rank1 \\\n", "count 273 273 199 86 213 252 \n", "unique 17 4 6 3 15 4 \n", "top EL PIZARRO SPANISH FRAGATA CORREO RN DE BEGUE CAPITAN \n", "freq 31 154 75 60 31 154 \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " Name2 Rank2 Name3 Rank3 voyage_from \\\n", "count 48 48 0.0 0.0 273 \n", "unique 4 3 0.0 0.0 14 \n", "top ASMUS HENDRIK STERRENBERG SCHIPPER NaN NaN LA CORU�A \n", "freq 21 21 NaN NaN 75 \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " voyage_to Anchored_ind AnchorPlace DASno VoyageIni Course_ship \\\n", "count 267 273 3 21 273 84 \n", "unique 9 2 2 1 16 77 \n", "top LA CORU�A 0 MONTEVIDEO 4259.3 1776-10-03 EBS1/4S \n", "freq 79 270 2 21 44 3 \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " Ship_speed Distance EncName EncNat \n", "count 0.0 231 13 28 \n", "unique 0.0 142 3 1 \n", "top NaN 123.00 ACTIVE, FALCON BRITISH \n", "freq NaN 5 8 28 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "pd.options.display.max_columns = None\n", "data.data[[\"c99_voyage\"]].c99_voyage.describe(include = 'all')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 TENERIFE\n", "1 LONDON\n", "2 TENERIFE\n", "3 TENERIFE\n", "4 GREENWICH\n", "Name: ZeroMeridian, dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_voyage\"]].c99_voyage.ZeroMeridian.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "e.g. the ship types on this deck will be given in a tons of different languages. There is no code table for this variable in the CLIWOC website. \n", "\n", "Would this information will be enough (e.g. for things that Liz is doing)?" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2 PAQUEBOT\n", "3 FRAGATA CORREO\n", "4 6TH RATE\n", "5 FRAGATA CORREO\n", "6 6TH RATE\n", "Name: Ship_type, dtype: object" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_voyage\"]].c99_voyage.Ship_type.dropna().head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>AT_reading_units</th>\n", " <th>SST_reading_units</th>\n", " <th>AP_reading_units</th>\n", " <th>BART_reading_units</th>\n", " <th>ReferenceCourse</th>\n", " <th>ReferenceWindDirection</th>\n", " <th>Decl</th>\n", " <th>Distance_units</th>\n", " <th>Distance_units_to_landmark</th>\n", " <th>Distance_units_travelled</th>\n", " <th>Longitude_units</th>\n", " <th>units_of_measurement</th>\n", " <th>humidity_units</th>\n", " <th>water_at_pump_units</th>\n", " <th>wind_scale</th>\n", " <th>BARO_type</th>\n", " <th>BARO_brand</th>\n", " <th>API</th>\n", " <th>Humidity_method</th>\n", " <th>compas_error</th>\n", " <th>compas_correction</th>\n", " <th>AT_outside</th>\n", " <th>SST</th>\n", " <th>AP</th>\n", " <th>wind_dir</th>\n", " <th>current_dir</th>\n", " <th>current_speed</th>\n", " <th>attached_tem</th>\n", " <th>pump_water</th>\n", " <th>Humidity</th>\n", " <th>wind_force</th>\n", " <th>weather</th>\n", " <th>prcp_descriptor</th>\n", " <th>sea_state</th>\n", " <th>shape_coulds</th>\n", " <th>dir_coulds</th>\n", " <th>Clearness</th>\n", " <th>cloud_fraction</th>\n", " <th>gusts</th>\n", " <th>Rain</th>\n", " <th>Fog</th>\n", " <th>Snow</th>\n", " <th>Thunder</th>\n", " <th>Hail</th>\n", " <th>Sea_ice</th>\n", " <th>Trivial_correction</th>\n", " <th>Release</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>84</td>\n", " <td>271</td>\n", " <td>273</td>\n", " <td>0.0</td>\n", " <td>63</td>\n", " <td>231</td>\n", " <td>266</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>271</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>223</td>\n", " <td>105</td>\n", " <td>16</td>\n", " <td>174</td>\n", " <td>19</td>\n", " <td>0.0</td>\n", " <td>47</td>\n", " <td>0.0</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>273</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>33</td>\n", " <td>0.0</td>\n", " <td>2</td>\n", " <td>5</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " <td>130</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>74</td>\n", " <td>48</td>\n", " <td>8</td>\n", " <td>51</td>\n", " <td>7</td>\n", " <td>0.0</td>\n", " <td>9</td>\n", " <td>0.0</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>UNKNOWN</td>\n", " <td>UNKNOWN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>LEAGUES</td>\n", " <td>NM</td>\n", " <td>360 DEGREES</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>E</td>\n", " <td>SOUTHERLY</td>\n", " <td>STRONG</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>FRESQUITO</td>\n", " <td>HORIZONTES CARGADOS</td>\n", " <td>RAIN</td>\n", " <td>LLANA</td>\n", " <td>OVERDRIJVENDE LUCHT</td>\n", " <td>NaN</td>\n", " <td>CLOUDY</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>CLIWOC VERSION 2.0</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>84</td>\n", " <td>271</td>\n", " <td>33</td>\n", " <td>NaN</td>\n", " <td>61</td>\n", " <td>85</td>\n", " <td>180</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>20</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>19</td>\n", " <td>30</td>\n", " <td>6</td>\n", " <td>48</td>\n", " <td>10</td>\n", " <td>NaN</td>\n", " <td>26</td>\n", " <td>NaN</td>\n", " <td>269</td>\n", " <td>252</td>\n", " <td>270</td>\n", " <td>273</td>\n", " <td>265</td>\n", " <td>272</td>\n", " <td>273</td>\n", " <td>273</td>\n", " <td>156</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " AT_reading_units SST_reading_units AP_reading_units \\\n", "count 0.0 0.0 0.0 \n", "unique 0.0 0.0 0.0 \n", "top NaN NaN NaN \n", "freq NaN NaN NaN \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " BART_reading_units ReferenceCourse ReferenceWindDirection Decl \\\n", "count 0.0 84 271 273 \n", "unique 0.0 1 1 33 \n", "top NaN UNKNOWN UNKNOWN 0 \n", "freq NaN 84 271 33 \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " Distance_units Distance_units_to_landmark Distance_units_travelled \\\n", "count 0.0 63 231 \n", "unique 0.0 2 5 \n", "top NaN LEAGUES NM \n", "freq NaN 61 85 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " Longitude_units units_of_measurement humidity_units \\\n", "count 266 0.0 0.0 \n", "unique 3 0.0 0.0 \n", "top 360 DEGREES NaN NaN \n", "freq 180 NaN NaN \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " water_at_pump_units wind_scale BARO_type BARO_brand API \\\n", "count 0.0 0.0 0.0 0.0 0.0 \n", "unique 0.0 0.0 0.0 0.0 0.0 \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " Humidity_method compas_error compas_correction AT_outside SST \\\n", "count 0.0 0.0 0.0 0.0 0.0 \n", "unique 0.0 0.0 0.0 NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " AP wind_dir current_dir current_speed attached_tem pump_water \\\n", "count 0.0 271 1 1 0.0 0.0 \n", "unique 0.0 130 1 1 NaN 0.0 \n", "top NaN E SOUTHERLY STRONG NaN NaN \n", "freq NaN 20 1 1 NaN NaN \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " Humidity wind_force weather prcp_descriptor sea_state \\\n", "count 0.0 223 105 16 174 \n", "unique 0.0 74 48 8 51 \n", "top NaN FRESQUITO HORIZONTES CARGADOS RAIN LLANA \n", "freq NaN 19 30 6 48 \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " shape_coulds dir_coulds Clearness cloud_fraction gusts Rain \\\n", "count 19 0.0 47 0.0 273 273 \n", "unique 7 0.0 9 0.0 2 2 \n", "top OVERDRIJVENDE LUCHT NaN CLOUDY NaN 0 0 \n", "freq 10 NaN 26 NaN 269 252 \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " Fog Snow Thunder Hail Sea_ice Trivial_correction Release \n", "count 273 273 273 273 273 273 273 \n", "unique 2 1 2 2 1 1 3 \n", "top 0 0 0 0 0 0 CLIWOC VERSION 2.0 \n", "freq 270 273 265 272 273 273 156 \n", "mean NaN NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN NaN " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_data\"]].c99_data.describe(include = 'all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What about the different scales for the wind force, given different languages?" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 SLAPPE-LABBER BRAMZEILSKOELTE\n", "1 PLEASANT BREEZE, FRESH BREEZE\n", "2 FRESCACH�N\n", "3 FRESQUITO\n", "4 MODERATE\n", "Name: wind_force, dtype: object" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data[[\"c99_data\"]].c99_data.wind_force.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For some of this data there are some code tables but according to the mdf reader we need a single code table. \n", "\n", "\n", "We would have to make a nested .json file I think ... with the main key as the language. More ideas on how to do this are welcome." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I have done some of the easy key tables including the Meridian one. You can find those [under ICOADS.C99.Variable.json](https://git.noc.ac.uk/brecinosrivas/mdf_reader/-/tree/master/data_models/lib/imma1_d730/code_tables)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }