diff --git a/docs/notebooks/CLIWOC_datamodel.ipynb b/docs/notebooks/CLIWOC_datamodel.ipynb index 14f7333b85a28905c83f215b03022d1be0ef5bf6..220a863b6a1b0efc209595ede5b5668672fc188c 100644 --- a/docs/notebooks/CLIWOC_datamodel.ipynb +++ b/docs/notebooks/CLIWOC_datamodel.ipynb @@ -16,7 +16,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2021-01-08 17:05:40,504 - root - INFO - init basic configure of logging success\n" + "2021-02-04 13:58:59,837 - root - INFO - init basic configure of logging success\n" ] } ], @@ -37,19 +37,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "2021-01-08 17:05:41,566 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", - "2021-01-08 17:05:41,571 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n", - "2021-01-08 17:05:41,572 - root - INFO - Getting data string from source...\n", - "2021-01-08 17:05:41,584 - root - INFO - Extracting and reading sections\n", - "2021-01-08 17:05:41,585 - root - INFO - Processing section partitioning threads\n", - "2021-01-08 17:05:41,586 - root - INFO - 1000 ...\n", - "2021-01-08 17:05:41,600 - root - INFO - done\n", - "2021-01-08 17:05:41,601 - root - INFO - 211000 ...\n", - "2021-01-08 17:05:41,614 - root - INFO - done\n", - "2021-01-08 17:05:41,615 - root - INFO - 29211000 ...\n", - "2021-01-08 17:05:41,629 - root - INFO - done\n", - "2021-01-08 17:05:41,630 - root - INFO - 2929211000 ...\n", - "2021-01-08 17:05:41,635 - root - INFO - done\n" + "2021-02-04 13:58:59,859 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", + "2021-02-04 13:58:59,865 - root - INFO - EXTRACTING DATA FROM MODEL: imma1\n", + "2021-02-04 13:58:59,866 - root - INFO - Getting data string from source...\n", + "2021-02-04 13:58:59,878 - root - INFO - Extracting and reading sections\n", + "2021-02-04 13:58:59,879 - root - INFO - Processing section partitioning threads\n", + "2021-02-04 13:58:59,879 - root - INFO - 1000 ...\n", + "2021-02-04 13:58:59,895 - root - INFO - done\n", + "2021-02-04 13:58:59,896 - root - INFO - 211000 ...\n", + "2021-02-04 13:58:59,911 - root - INFO - done\n", + "2021-02-04 13:58:59,911 - root - INFO - 29211000 ...\n", + "2021-02-04 13:58:59,927 - root - INFO - done\n", + "2021-02-04 13:58:59,928 - root - INFO - 2929211000 ...\n", + "2021-02-04 13:58:59,933 - root - INFO - done\n" ] }, { @@ -74,10 +74,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2021-01-08 17:05:42,553 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n", - "2021-01-08 17:05:42,554 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", - "2021-01-08 17:05:43,402 - root - INFO - Wrapping output....\n", - "2021-01-08 17:05:43,498 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" + "2021-02-04 13:59:00,850 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE')\n", + "2021-02-04 13:59:00,851 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", + "2021-02-04 13:59:01,734 - root - INFO - Wrapping output....\n", + "2021-02-04 13:59:01,826 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], @@ -190,7 +190,7 @@ { "data": { "text/plain": [ - "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'" + "2401" ] }, "execution_count": 5, @@ -198,6 +198,26 @@ "output_type": "execute_result" } ], + "source": [ + "len(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "path_to_folder = '/Users/brivas/c3s_work/mdf_reader/data_models/lib/'\n", "model_name = 'imma1_d730'\n", @@ -207,32 +227,32 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2021-01-08 17:06:06,666 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", - "2021-01-08 17:06:06,671 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n", - "2021-01-08 17:06:06,673 - root - INFO - Getting data string from source...\n", - "2021-01-08 17:06:06,685 - root - INFO - Extracting and reading sections\n", - "2021-01-08 17:06:06,686 - root - INFO - Processing section partitioning threads\n", - "2021-01-08 17:06:06,687 - root - INFO - 1000 ...\n", - "2021-01-08 17:06:06,707 - root - INFO - done\n", - "2021-01-08 17:06:06,708 - root - INFO - 211000 ...\n", - "2021-01-08 17:06:06,723 - root - INFO - done\n", - "2021-01-08 17:06:06,724 - root - INFO - 29211000 ...\n", - "2021-01-08 17:06:06,737 - root - INFO - done\n", - "2021-01-08 17:06:06,738 - root - INFO - 3029211000 ...\n", - "2021-01-08 17:06:06,742 - root - INFO - done\n", - "2021-01-08 17:06:06,743 - root - INFO - 303029211000 ...\n", - "2021-01-08 17:06:06,746 - root - INFO - done\n", - "2021-01-08 17:06:06,747 - root - INFO - 30303029211000 ...\n", - "2021-01-08 17:06:06,750 - root - INFO - done\n", - "2021-01-08 17:06:06,751 - root - INFO - 3030303029211000 ...\n", - "2021-01-08 17:06:06,753 - root - INFO - done\n" + "2021-02-04 13:59:01,877 - root - INFO - READING DATA MODEL SCHEMA FILE...\n", + "2021-02-04 13:59:01,883 - root - INFO - EXTRACTING DATA FROM MODEL: /Users/brivas/c3s_work/mdf_reader/data_models/lib/imma1_d730\n", + "2021-02-04 13:59:01,884 - root - INFO - Getting data string from source...\n", + "2021-02-04 13:59:01,893 - root - INFO - Extracting and reading sections\n", + "2021-02-04 13:59:01,894 - root - INFO - Processing section partitioning threads\n", + "2021-02-04 13:59:01,895 - root - INFO - 1000 ...\n", + "2021-02-04 13:59:01,912 - root - INFO - done\n", + "2021-02-04 13:59:01,912 - root - INFO - 211000 ...\n", + "2021-02-04 13:59:01,926 - root - INFO - done\n", + "2021-02-04 13:59:01,927 - root - INFO - 29211000 ...\n", + "2021-02-04 13:59:01,940 - root - INFO - done\n", + "2021-02-04 13:59:01,941 - root - INFO - 3029211000 ...\n", + "2021-02-04 13:59:01,944 - root - INFO - done\n", + "2021-02-04 13:59:01,945 - root - INFO - 303029211000 ...\n", + "2021-02-04 13:59:01,948 - root - INFO - done\n", + "2021-02-04 13:59:01,949 - root - INFO - 30303029211000 ...\n", + "2021-02-04 13:59:01,953 - root - INFO - done\n", + "2021-02-04 13:59:01,954 - root - INFO - 3030303029211000 ...\n", + "2021-02-04 13:59:01,956 - root - INFO - done\n" ] }, { @@ -260,10 +280,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2021-01-08 17:06:08,027 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n", - "2021-01-08 17:06:08,028 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", - "2021-01-08 17:06:17,135 - root - INFO - Wrapping output....\n", - "2021-01-08 17:06:17,244 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" + "2021-02-04 13:59:03,193 - root - WARNING - Data numeric elements with missing upper or lower threshold: ('c1', 'BSI'),('c1', 'AQZ'),('c1', 'AQA'),('c1', 'UQZ'),('c1', 'UQA'),('c1', 'VQZ'),('c1', 'VQA'),('c1', 'PQZ'),('c1', 'PQA'),('c1', 'DQZ'),('c1', 'DQA'),('c5', 'OS'),('c5', 'OP'),('c5', 'FM'),('c5', 'IMMV'),('c5', 'IX'),('c5', 'W2'),('c5', 'WMI'),('c5', 'SD2'),('c5', 'SP2'),('c5', 'IS'),('c5', 'RS'),('c5', 'IC1'),('c5', 'IC2'),('c5', 'IC3'),('c5', 'IC4'),('c5', 'IC5'),('c5', 'IR'),('c5', 'RRR'),('c5', 'TR'),('c5', 'NU'),('c5', 'QCI'),('c5', 'QI1'),('c5', 'QI2'),('c5', 'QI3'),('c5', 'QI4'),('c5', 'QI5'),('c5', 'QI6'),('c5', 'QI7'),('c5', 'QI8'),('c5', 'QI9'),('c5', 'QI10'),('c5', 'QI11'),('c5', 'QI12'),('c5', 'QI13'),('c5', 'QI14'),('c5', 'QI15'),('c5', 'QI16'),('c5', 'QI17'),('c5', 'QI18'),('c5', 'QI19'),('c5', 'QI20'),('c5', 'QI21'),('c5', 'QI22'),('c5', 'QI23'),('c5', 'QI24'),('c5', 'QI25'),('c5', 'QI26'),('c5', 'QI27'),('c5', 'QI28'),('c5', 'QI29'),('c5', 'RHI'),('c5', 'AWSI'),('c6', 'FBSRC'),('c6', 'MST'),('c7', 'OPM'),('c7', 'LOT'),('c9', 'CCe'),('c9', 'WWe'),('c9', 'Ne'),('c9', 'NHe'),('c9', 'He'),('c9', 'CLe'),('c9', 'CMe'),('c9', 'CHe'),('c9', 'SBI'),('c95', 'DPRO'),('c95', 'DPRP'),('c95', 'UFR'),('c95', 'ASIR'),('c96', 'ASII'),('c97', 'ASIE'),('c99_voyage', 'drLatDeg'),('c99_voyage', 'drLatMin'),('c99_voyage', 'drLatSec'),('c99_voyage', 'drLonDeg'),('c99_voyage', 'drLonMin'),('c99_voyage', 'drLonSec'),('c99_voyage', 'LatDeg'),('c99_voyage', 'LatMin'),('c99_voyage', 'LatSec'),('c99_voyage', 'LonDeg'),('c99_voyage', 'LonMin'),('c99_voyage', 'LonSec'),('c99_voyage', 'LMdistance1'),('c99_voyage', 'LMdistance2'),('c99_voyage', 'LMdistance4'),('c99_voyage', 'TimeOB'),('c99_voyage', 'Glasses'),('c99_data', 'AT_outside'),('c99_data', 'SST'),('c99_data', 'attached_tem')\n", + "2021-02-04 13:59:03,194 - root - WARNING - Corresponding upper and/or lower bounds set to +/-inf for validation\n", + "2021-02-04 13:59:11,706 - root - INFO - Wrapping output....\n", + "2021-02-04 13:59:11,814 - root - INFO - CREATING OUTPUT DATA ATTRIBUTES FROM DATA MODEL\n" ] } ], @@ -275,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -353,7 +373,7 @@ "4 99 0 NaN" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -366,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -406,119 +426,101 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>0</th>\n", - " <td>NAN</td>\n", - " <td>NATIONAAL ARCHIEF OF THE NETHERLANDS</td>\n", - " <td>DEN HAAG</td>\n", - " <td>NEDERLAND</td>\n", - " <td>1.04.02</td>\n", - " <td>152</td>\n", - " <td>VOC</td>\n", - " <td>5137</td>\n", - " <td>VOC_152_5137</td>\n", - " <td>DUTCH</td>\n", - " <td>VOC_152_5137_0071</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>PRO</td>\n", - " <td>PUBLIC RECORD OFFICE</td>\n", - " <td>KEW</td>\n", - " <td>UNITED KINGDOM</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>PRO BH1 1602</td>\n", - " <td>ENGLISH</td>\n", - " <td>NaN</td>\n", - " <td>0</td>\n", + " <th>count</th>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>21</td>\n", + " <td>273</td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", - " <td>AGI</td>\n", - " <td>ARCHIVO GENERAL DE INDIAS</td>\n", - " <td>SEVILLE</td>\n", - " <td>SPAIN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>CORREOS, 193B R8</td>\n", - " <td>SPANISH</td>\n", - " <td>NaN</td>\n", - " <td>0</td>\n", + " <th>unique</th>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>17</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>3</th>\n", + " <th>top</th>\n", " <td>AGI</td>\n", " <td>ARCHIVO GENERAL DE INDIAS</td>\n", " <td>SEVILLE</td>\n", " <td>SPAIN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>CORREOS, 193A R5</td>\n", + " <td>1.04.02</td>\n", + " <td>152</td>\n", + " <td>VOC</td>\n", + " <td>5137</td>\n", + " <td>COTE - 4/JJ/39</td>\n", " <td>SPANISH</td>\n", - " <td>NaN</td>\n", + " <td>VOC_152_5137_0072</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", - " <td>NMM</td>\n", - " <td>NATIONAL MARITIME MUSEUM</td>\n", - " <td>GREENWICH</td>\n", - " <td>UNITED KINGDOM</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NMM ADM/L/S543</td>\n", - " <td>ENGLISH</td>\n", - " <td>NaN</td>\n", - " <td>0</td>\n", + " <th>freq</th>\n", + " <td>154</td>\n", + " <td>154</td>\n", + " <td>154</td>\n", + " <td>154</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>31</td>\n", + " <td>154</td>\n", + " <td>7</td>\n", + " <td>273</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " InstAbbr InsName InsPlace InsLand \\\n", - "0 NAN NATIONAAL ARCHIEF OF THE NETHERLANDS DEN HAAG NEDERLAND \n", - "1 PRO PUBLIC RECORD OFFICE KEW UNITED KINGDOM \n", - "2 AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN \n", - "3 AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN \n", - "4 NMM NATIONAL MARITIME MUSEUM GREENWICH UNITED KINGDOM \n", + " InstAbbr InsName InsPlace InsLand No_data_entry \\\n", + "count 273 273 273 273 21 \n", + "unique 5 5 5 4 1 \n", + "top AGI ARCHIVO GENERAL DE INDIAS SEVILLE SPAIN 1.04.02 \n", + "freq 154 154 154 154 21 \n", "\n", - " No_data_entry NameArchiveSet ArchivePart Specification Logbook_id \\\n", - "0 1.04.02 152 VOC 5137 VOC_152_5137 \n", - "1 NaN NaN NaN NaN PRO BH1 1602 \n", - "2 NaN NaN NaN NaN CORREOS, 193B R8 \n", - "3 NaN NaN NaN NaN CORREOS, 193A R5 \n", - "4 NaN NaN NaN NaN NMM ADM/L/S543 \n", + " NameArchiveSet ArchivePart Specification Logbook_id \\\n", + "count 21 21 21 273 \n", + "unique 1 1 1 17 \n", + "top 152 VOC 5137 COTE - 4/JJ/39 \n", + "freq 21 21 21 31 \n", "\n", - " Logbook_language Image_No Illustr \n", - "0 DUTCH VOC_152_5137_0071 0 \n", - "1 ENGLISH NaN 0 \n", - "2 SPANISH NaN 0 \n", - "3 SPANISH NaN 0 \n", - "4 ENGLISH NaN 0 " + " Logbook_language Image_No Illustr \n", + "count 273 21 273 \n", + "unique 4 5 1 \n", + "top SPANISH VOC_152_5137_0072 0 \n", + "freq 154 7 273 " ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data.data[[\"c99_logbook\"]].c99_logbook.head()" + "data.data[[\"c99_logbook\"]].c99_logbook.describe(include = 'all')" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -552,7 +554,44 @@ " <th>drLonHem</th>\n", " <th>LatDeg</th>\n", " <th>LatMin</th>\n", - " <th>...</th>\n", + " <th>LatSec</th>\n", + " <th>LatHem</th>\n", + " <th>LonDeg</th>\n", + " <th>LonMin</th>\n", + " <th>LonSec</th>\n", + " <th>LonHem</th>\n", + " <th>LatInd</th>\n", + " <th>LonInd</th>\n", + " <th>ZeroMeridian</th>\n", + " <th>LMname1</th>\n", + " <th>LMdirection1</th>\n", + " <th>LMdistance1</th>\n", + " <th>LMname2</th>\n", + " <th>LMdirection2</th>\n", + " <th>LMdistance2</th>\n", + " <th>LMname3</th>\n", + " <th>LMdirection3</th>\n", + " <th>LMdistance4</th>\n", + " <th>PosCoastal</th>\n", + " <th>Calendar_type</th>\n", + " <th>logbook_date</th>\n", + " <th>TimeOB</th>\n", + " <th>Day_of_the_week</th>\n", + " <th>PartDay</th>\n", + " <th>Watch</th>\n", + " <th>Glasses</th>\n", + " <th>Start_day</th>\n", + " <th>ShipName</th>\n", + " <th>Nationality</th>\n", + " <th>Ship_type</th>\n", + " <th>Company</th>\n", + " <th>Name1</th>\n", + " <th>Rank1</th>\n", + " <th>Name2</th>\n", + " <th>Rank2</th>\n", + " <th>Name3</th>\n", + " <th>Rank3</th>\n", + " <th>voyage_from</th>\n", " <th>voyage_to</th>\n", " <th>Anchored_ind</th>\n", " <th>AnchorPlace</th>\n", @@ -567,162 +606,846 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>0</th>\n", - " <td>19.0</td>\n", - " <td>15.0</td>\n", + " <th>count</th>\n", + " <td>208.000000</td>\n", + " <td>208.000000</td>\n", + " <td>208.0</td>\n", + " <td>208</td>\n", + " <td>90.000000</td>\n", + " <td>94.000000</td>\n", + " <td>94.0</td>\n", + " <td>94</td>\n", + " <td>74.000000</td>\n", + " <td>74.000000</td>\n", + " <td>74.0</td>\n", + " <td>74</td>\n", + " <td>169.000000</td>\n", + " <td>172.000000</td>\n", + " <td>172.0</td>\n", + " <td>172</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>66</td>\n", + " <td>65</td>\n", + " <td>62.000000</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2.000000</td>\n", " <td>0.0</td>\n", - " <td>N</td>\n", - " <td>102.0</td>\n", - " <td>2.0</td>\n", " <td>0.0</td>\n", - " <td>E</td>\n", - " <td>18.0</td>\n", - " <td>59.0</td>\n", - " <td>...</td>\n", - " <td>KASSERAIJEN</td>\n", - " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273.0</td>\n", + " <td>65</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>21.0</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>199</td>\n", + " <td>86</td>\n", + " <td>213</td>\n", + " <td>252</td>\n", + " <td>48</td>\n", + " <td>48</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>273</td>\n", + " <td>267</td>\n", + " <td>273</td>\n", + " <td>3</td>\n", + " <td>21</td>\n", + " <td>273</td>\n", + " <td>84</td>\n", + " <td>0.0</td>\n", + " <td>231</td>\n", + " <td>13</td>\n", + " <td>28</td>\n", + " </tr>\n", + " <tr>\n", + " <th>unique</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2</td>\n", " <td>NaN</td>\n", - " <td>4259.3</td>\n", - " <td>1776-01-02</td>\n", - " <td>NW</td>\n", " <td>NaN</td>\n", - " <td>11.50</td>\n", " <td>NaN</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>12</td>\n", + " <td>18</td>\n", + " <td>61</td>\n", + " <td>NaN</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", " <td>NaN</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>31</td>\n", + " <td>NaN</td>\n", + " <td>7</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>2</td>\n", + " <td>17</td>\n", + " <td>4</td>\n", + " <td>6</td>\n", + " <td>3</td>\n", + " <td>15</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>14</td>\n", + " <td>9</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>16</td>\n", + " <td>77</td>\n", + " <td>0.0</td>\n", + " <td>142</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>1</th>\n", + " <th>top</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>N</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>E</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>N</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>E</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>TENERIFE</td>\n", + " <td>SANDY HOOK</td>\n", + " <td>N18:30E</td>\n", + " <td>NaN</td>\n", + " <td>DEN SWARTE PAGOOD</td>\n", + " <td>WTN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>11.0</td>\n", - " <td>5.0</td>\n", - " <td>0.0</td>\n", - " <td>W</td>\n", - " <td>49.0</td>\n", - " <td>40.0</td>\n", - " <td>...</td>\n", - " <td>GRAVESEND</td>\n", " <td>0</td>\n", + " <td>2</td>\n", + " <td>1776-10-04</td>\n", + " <td>NaN</td>\n", + " <td>THURSDAY</td>\n", + " <td>3</td>\n", + " <td>VM</td>\n", + " <td>NaN</td>\n", + " <td>UNKNOWN</td>\n", + " <td>EL PIZARRO</td>\n", + " <td>SPANISH</td>\n", + " <td>FRAGATA CORREO</td>\n", + " <td>RN</td>\n", + " <td>DE BEGUE</td>\n", + " <td>CAPITAN</td>\n", + " <td>ASMUS HENDRIK STERRENBERG</td>\n", + " <td>SCHIPPER</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1776-09-18</td>\n", + " <td>LA CORU�A</td>\n", + " <td>LA CORU�A</td>\n", + " <td>0</td>\n", + " <td>MONTEVIDEO</td>\n", + " <td>4259.3</td>\n", + " <td>1776-10-03</td>\n", " <td>EBS1/4S</td>\n", " <td>NaN</td>\n", - " <td>131.00</td>\n", + " <td>123.00</td>\n", + " <td>ACTIVE, FALCON</td>\n", + " <td>BRITISH</td>\n", + " </tr>\n", + " <tr>\n", + " <th>freq</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>150</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>67</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>74</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>139</td>\n", + " <td>189</td>\n", + " <td>132</td>\n", + " <td>169</td>\n", + " <td>21</td>\n", + " <td>2</td>\n", + " <td>NaN</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>267</td>\n", + " <td>273</td>\n", + " <td>12</td>\n", " <td>NaN</td>\n", + " <td>11</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", " <td>NaN</td>\n", + " <td>208</td>\n", + " <td>31</td>\n", + " <td>154</td>\n", + " <td>75</td>\n", + " <td>60</td>\n", + " <td>31</td>\n", + " <td>154</td>\n", + " <td>21</td>\n", + " <td>21</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>75</td>\n", + " <td>79</td>\n", + " <td>270</td>\n", + " <td>2</td>\n", + " <td>21</td>\n", + " <td>44</td>\n", + " <td>3</td>\n", + " <td>NaN</td>\n", + " <td>5</td>\n", + " <td>8</td>\n", + " <td>28</td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", - " <td>38.0</td>\n", - " <td>32.0</td>\n", + " <th>mean</th>\n", + " <td>33.730770</td>\n", + " <td>28.971153</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>63.633335</td>\n", + " <td>27.691490</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>28.162163</td>\n", + " <td>27.500000</td>\n", " <td>0.0</td>\n", - " <td>N</td>\n", + " <td>NaN</td>\n", + " <td>253.467453</td>\n", + " <td>29.674419</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>70.467743</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>7.500000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>...</td>\n", - " <td>LA CORU�A</td>\n", - " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1776-07-28</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>212.00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", - " <th>3</th>\n", - " <td>15.0</td>\n", - " <td>51.0</td>\n", + " <th>std</th>\n", + " <td>9.166976</td>\n", + " <td>17.226234</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>85.771774</td>\n", + " <td>15.633592</td>\n", " <td>0.0</td>\n", - " <td>S</td>\n", + " <td>NaN</td>\n", + " <td>8.613508</td>\n", + " <td>17.315168</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>136.184052</td>\n", + " <td>17.611095</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>69.393318</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3.535534</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>...</td>\n", - " <td>MONTEVIDEO</td>\n", - " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1776-08-16</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>129.00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", - " <td>45.0</td>\n", - " <td>30.0</td>\n", + " <th>min</th>\n", + " <td>12.000000</td>\n", + " <td>0.000000</td>\n", " <td>0.0</td>\n", - " <td>N</td>\n", - " <td>49.0</td>\n", - " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>1.000000</td>\n", + " <td>0.000000</td>\n", " <td>0.0</td>\n", - " <td>W</td>\n", " <td>NaN</td>\n", + " <td>17.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", " <td>NaN</td>\n", - " <td>...</td>\n", + " <td>1.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1776-09-24</td>\n", - " <td>N87W</td>\n", " <td>NaN</td>\n", - " <td>92.00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 58 columns</p>\n", - "</div>" - ], - "text/plain": [ - " drLatDeg drLatMin drLatSec drLatHem drLonDeg drLonMin drLonSec \\\n", - "0 19.0 15.0 0.0 N 102.0 2.0 0.0 \n", - "1 NaN NaN NaN NaN 11.0 5.0 0.0 \n", - "2 38.0 32.0 0.0 N NaN NaN NaN \n", - "3 15.0 51.0 0.0 S NaN NaN NaN \n", - "4 45.0 30.0 0.0 N 49.0 8.0 0.0 \n", + " <td>3.000000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>5.000000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>27.750000</td>\n", + " <td>14.750000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>3.000000</td>\n", + " <td>16.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>20.000000</td>\n", + " <td>12.250000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>71.000000</td>\n", + " <td>14.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>20.750000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>6.250000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>35.000000</td>\n", + " <td>29.500000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>52.000000</td>\n", + " <td>26.500000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>29.000000</td>\n", + " <td>29.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>328.000000</td>\n", + " <td>31.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>47.000000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>7.500000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>40.250000</td>\n", + " <td>44.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>102.750000</td>\n", + " <td>39.750000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>34.000000</td>\n", + " <td>39.500000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>345.000000</td>\n", + " <td>44.500000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>96.750000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.750000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>49.000000</td>\n", + " <td>59.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>346.000000</td>\n", + " <td>59.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>49.000000</td>\n", + " <td>59.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>359.000000</td>\n", + " <td>59.000000</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>338.000000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>10.000000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>12.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>8.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " drLatDeg drLatMin drLatSec drLatHem drLonDeg drLonMin \\\n", + "count 208.000000 208.000000 208.0 208 90.000000 94.000000 \n", + "unique NaN NaN NaN 2 NaN NaN \n", + "top NaN NaN NaN N NaN NaN \n", + "freq NaN NaN NaN 150 NaN NaN \n", + "mean 33.730770 28.971153 0.0 NaN 63.633335 27.691490 \n", + "std 9.166976 17.226234 0.0 NaN 85.771774 15.633592 \n", + "min 12.000000 0.000000 0.0 NaN 1.000000 0.000000 \n", + "25% 27.750000 14.750000 0.0 NaN 3.000000 16.000000 \n", + "50% 35.000000 29.500000 0.0 NaN 52.000000 26.500000 \n", + "75% 40.250000 44.000000 0.0 NaN 102.750000 39.750000 \n", + "max 49.000000 59.000000 0.0 NaN 346.000000 59.000000 \n", + "\n", + " drLonSec drLonHem LatDeg LatMin LatSec LatHem LonDeg \\\n", + "count 94.0 94 74.000000 74.000000 74.0 74 169.000000 \n", + "unique NaN 2 NaN NaN NaN 1 NaN \n", + "top NaN E NaN NaN NaN N NaN \n", + "freq NaN 67 NaN NaN NaN 74 NaN \n", + "mean 0.0 NaN 28.162163 27.500000 0.0 NaN 253.467453 \n", + "std 0.0 NaN 8.613508 17.315168 0.0 NaN 136.184052 \n", + "min 0.0 NaN 17.000000 0.000000 0.0 NaN 1.000000 \n", + "25% 0.0 NaN 20.000000 12.250000 0.0 NaN 71.000000 \n", + "50% 0.0 NaN 29.000000 29.000000 0.0 NaN 328.000000 \n", + "75% 0.0 NaN 34.000000 39.500000 0.0 NaN 345.000000 \n", + "max 0.0 NaN 49.000000 59.000000 0.0 NaN 359.000000 \n", + "\n", + " LonMin LonSec LonHem LatInd LonInd ZeroMeridian LMname1 \\\n", + "count 172.000000 172.0 172 273 273 273 66 \n", + "unique NaN NaN 2 4 5 12 18 \n", + "top NaN NaN E 1 2 TENERIFE SANDY HOOK \n", + "freq NaN NaN 139 189 132 169 21 \n", + "mean 29.674419 0.0 NaN NaN NaN NaN NaN \n", + "std 17.611095 0.0 NaN NaN NaN NaN NaN \n", + "min 0.000000 0.0 NaN NaN NaN NaN NaN \n", + "25% 14.000000 0.0 NaN NaN NaN NaN NaN \n", + "50% 31.000000 0.0 NaN NaN NaN NaN NaN \n", + "75% 44.500000 0.0 NaN NaN NaN NaN NaN \n", + "max 59.000000 0.0 NaN NaN NaN NaN NaN \n", + "\n", + " LMdirection1 LMdistance1 LMname2 LMdirection2 LMdistance2 \\\n", + "count 65 62.000000 2 2 2.000000 \n", + "unique 61 NaN 2 2 NaN \n", + "top N18:30E NaN DEN SWARTE PAGOOD WTN NaN \n", + "freq 2 NaN 1 1 NaN \n", + "mean NaN 70.467743 NaN NaN 7.500000 \n", + "std NaN 69.393318 NaN NaN 3.535534 \n", + "min NaN 3.000000 NaN NaN 5.000000 \n", + "25% NaN 20.750000 NaN NaN 6.250000 \n", + "50% NaN 47.000000 NaN NaN 7.500000 \n", + "75% NaN 96.750000 NaN NaN 8.750000 \n", + "max NaN 338.000000 NaN NaN 10.000000 \n", "\n", - " drLonHem LatDeg LatMin ... voyage_to Anchored_ind AnchorPlace \\\n", - "0 E 18.0 59.0 ... KASSERAIJEN 0 NaN \n", - "1 W 49.0 40.0 ... GRAVESEND 0 NaN \n", - "2 NaN NaN NaN ... LA CORU�A 0 NaN \n", - "3 NaN NaN NaN ... MONTEVIDEO 0 NaN \n", - "4 W NaN NaN ... NaN 0 NaN \n", + " LMname3 LMdirection3 LMdistance4 PosCoastal Calendar_type \\\n", + "count 0.0 0.0 0.0 273 273 \n", + "unique 0.0 0.0 NaN 2 1 \n", + "top NaN NaN NaN 0 2 \n", + "freq NaN NaN NaN 267 273 \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", "\n", - " DASno VoyageIni Course_ship Ship_speed Distance EncName EncNat \n", - "0 4259.3 1776-01-02 NW NaN 11.50 NaN NaN \n", - "1 NaN 1776-09-18 EBS1/4S NaN 131.00 NaN NaN \n", - "2 NaN 1776-07-28 NaN NaN 212.00 NaN NaN \n", - "3 NaN 1776-08-16 NaN NaN 129.00 NaN NaN \n", - "4 NaN 1776-09-24 N87W NaN 92.00 NaN NaN \n", + " logbook_date TimeOB Day_of_the_week PartDay Watch Glasses Start_day \\\n", + "count 273 273.0 65 21 21 21.0 273 \n", + "unique 31 NaN 7 1 1 NaN 2 \n", + "top 1776-10-04 NaN THURSDAY 3 VM NaN UNKNOWN \n", + "freq 12 NaN 11 21 21 NaN 208 \n", + "mean NaN 12.0 NaN NaN NaN 8.0 NaN \n", + "std NaN 0.0 NaN NaN NaN 0.0 NaN \n", + "min NaN 12.0 NaN NaN NaN 8.0 NaN \n", + "25% NaN 12.0 NaN NaN NaN 8.0 NaN \n", + "50% NaN 12.0 NaN NaN NaN 8.0 NaN \n", + "75% NaN 12.0 NaN NaN NaN 8.0 NaN \n", + "max NaN 12.0 NaN NaN NaN 8.0 NaN \n", "\n", - "[5 rows x 58 columns]" + " ShipName Nationality Ship_type Company Name1 Rank1 \\\n", + "count 273 273 199 86 213 252 \n", + "unique 17 4 6 3 15 4 \n", + "top EL PIZARRO SPANISH FRAGATA CORREO RN DE BEGUE CAPITAN \n", + "freq 31 154 75 60 31 154 \n", + "mean NaN NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN NaN \n", + "\n", + " Name2 Rank2 Name3 Rank3 voyage_from \\\n", + "count 48 48 0.0 0.0 273 \n", + "unique 4 3 0.0 0.0 14 \n", + "top ASMUS HENDRIK STERRENBERG SCHIPPER NaN NaN LA CORU�A \n", + "freq 21 21 NaN NaN 75 \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", + "\n", + " voyage_to Anchored_ind AnchorPlace DASno VoyageIni Course_ship \\\n", + "count 267 273 3 21 273 84 \n", + "unique 9 2 2 1 16 77 \n", + "top LA CORU�A 0 MONTEVIDEO 4259.3 1776-10-03 EBS1/4S \n", + "freq 79 270 2 21 44 3 \n", + "mean NaN NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN NaN \n", + "\n", + " Ship_speed Distance EncName EncNat \n", + "count 0.0 231 13 28 \n", + "unique 0.0 142 3 1 \n", + "top NaN 123.00 ACTIVE, FALCON BRITISH \n", + "freq NaN 5 8 28 \n", + "mean NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN " ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data.data[[\"c99_voyage\"]].c99_voyage.head()" + "import pandas as pd\n", + "pd.options.display.max_columns = None\n", + "data.data[[\"c99_voyage\"]].c99_voyage.describe(include = 'all')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 TENERIFE\n", + "1 LONDON\n", + "2 TENERIFE\n", + "3 TENERIFE\n", + "4 GREENWICH\n", + "Name: ZeroMeridian, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.data[[\"c99_voyage\"]].c99_voyage.ZeroMeridian.head()" ] }, { @@ -736,7 +1459,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -750,7 +1473,7 @@ "Name: Ship_type, dtype: object" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -761,7 +1484,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -795,7 +1518,33 @@ " <th>Distance_units</th>\n", " <th>Distance_units_to_landmark</th>\n", " <th>Distance_units_travelled</th>\n", - " <th>...</th>\n", + " <th>Longitude_units</th>\n", + " <th>units_of_measurement</th>\n", + " <th>humidity_units</th>\n", + " <th>water_at_pump_units</th>\n", + " <th>wind_scale</th>\n", + " <th>BARO_type</th>\n", + " <th>BARO_brand</th>\n", + " <th>API</th>\n", + " <th>Humidity_method</th>\n", + " <th>compas_error</th>\n", + " <th>compas_correction</th>\n", + " <th>AT_outside</th>\n", + " <th>SST</th>\n", + " <th>AP</th>\n", + " <th>wind_dir</th>\n", + " <th>current_dir</th>\n", + " <th>current_speed</th>\n", + " <th>attached_tem</th>\n", + " <th>pump_water</th>\n", + " <th>Humidity</th>\n", + " <th>wind_force</th>\n", + " <th>weather</th>\n", + " <th>prcp_descriptor</th>\n", + " <th>sea_state</th>\n", + " <th>shape_coulds</th>\n", + " <th>dir_coulds</th>\n", + " <th>Clearness</th>\n", " <th>cloud_fraction</th>\n", " <th>gusts</th>\n", " <th>Rain</th>\n", @@ -810,176 +1559,698 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>0</th>\n", + " <th>count</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>84</td>\n", + " <td>271</td>\n", + " <td>273</td>\n", + " <td>0.0</td>\n", + " <td>63</td>\n", + " <td>231</td>\n", + " <td>266</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>271</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>223</td>\n", + " <td>105</td>\n", + " <td>16</td>\n", + " <td>174</td>\n", + " <td>19</td>\n", + " <td>0.0</td>\n", + " <td>47</td>\n", + " <td>0.0</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " </tr>\n", + " <tr>\n", + " <th>unique</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>33</td>\n", + " <td>0.0</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>0.0</td>\n", + " <td>130</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>74</td>\n", + " <td>48</td>\n", + " <td>8</td>\n", + " <td>51</td>\n", + " <td>7</td>\n", + " <td>0.0</td>\n", + " <td>9</td>\n", + " <td>0.0</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>UNKNOWN</td>\n", + " <td>UNKNOWN</td>\n", + " <td>0</td>\n", + " <td>NaN</td>\n", + " <td>LEAGUES</td>\n", + " <td>NM</td>\n", + " <td>360 DEGREES</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>E</td>\n", + " <td>SOUTHERLY</td>\n", + " <td>STRONG</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>FRESQUITO</td>\n", + " <td>HORIZONTES CARGADOS</td>\n", + " <td>RAIN</td>\n", + " <td>LLANA</td>\n", + " <td>OVERDRIJVENDE LUCHT</td>\n", + " <td>NaN</td>\n", + " <td>CLOUDY</td>\n", + " <td>NaN</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>CLIWOC VERSION 2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>freq</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>84</td>\n", + " <td>271</td>\n", + " <td>33</td>\n", + " <td>NaN</td>\n", + " <td>61</td>\n", + " <td>85</td>\n", + " <td>180</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>20</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>19</td>\n", + " <td>30</td>\n", + " <td>6</td>\n", + " <td>48</td>\n", + " <td>10</td>\n", + " <td>NaN</td>\n", + " <td>26</td>\n", + " <td>NaN</td>\n", + " <td>269</td>\n", + " <td>252</td>\n", + " <td>270</td>\n", + " <td>273</td>\n", + " <td>265</td>\n", + " <td>272</td>\n", + " <td>273</td>\n", + " <td>273</td>\n", + " <td>156</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>0</td>\n", " <td>NaN</td>\n", - " <td>DUITSE</td>\n", - " <td>DUITSE</td>\n", - " <td>...</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>CLIWOC VERSION 2.0</td>\n", " </tr>\n", " <tr>\n", - " <th>1</th>\n", + " <th>25%</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>-23</td>\n", " <td>NaN</td>\n", - " <td>LEAGUES</td>\n", - " <td>NM</td>\n", - " <td>...</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>CLIWOC VERSION 2.0</td>\n", " </tr>\n", " <tr>\n", - " <th>2</th>\n", + " <th>50%</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>-17</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>...</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>CLIWOC VERSION 2.0</td>\n", " </tr>\n", " <tr>\n", - " <th>3</th>\n", + " <th>75%</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>-1</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>NM</td>\n", - " <td>...</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>CLIWOC VERSION 1.0</td>\n", " </tr>\n", " <tr>\n", - " <th>4</th>\n", + " <th>max</th>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>UNKNOWN</td>\n", - " <td>-17</td>\n", " <td>NaN</td>\n", - " <td>LEAGUES</td>\n", - " <td>NM</td>\n", - " <td>...</td>\n", " <td>NaN</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>CLIWOC VERSION 1.1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>5 rows × 47 columns</p>\n", "</div>" ], "text/plain": [ - " AT_reading_units SST_reading_units AP_reading_units BART_reading_units \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", + " AT_reading_units SST_reading_units AP_reading_units \\\n", + "count 0.0 0.0 0.0 \n", + "unique 0.0 0.0 0.0 \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN \n", "\n", - " ReferenceCourse ReferenceWindDirection Decl Distance_units \\\n", - "0 UNKNOWN UNKNOWN 0 NaN \n", - "1 UNKNOWN UNKNOWN -23 NaN \n", - "2 NaN UNKNOWN -17 NaN \n", - "3 NaN UNKNOWN -1 NaN \n", - "4 UNKNOWN UNKNOWN -17 NaN \n", + " BART_reading_units ReferenceCourse ReferenceWindDirection Decl \\\n", + "count 0.0 84 271 273 \n", + "unique 0.0 1 1 33 \n", + "top NaN UNKNOWN UNKNOWN 0 \n", + "freq NaN 84 271 33 \n", + "mean NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN \n", "\n", - " Distance_units_to_landmark Distance_units_travelled ... cloud_fraction \\\n", - "0 DUITSE DUITSE ... NaN \n", - "1 LEAGUES NM ... NaN \n", - "2 NaN UNKNOWN ... NaN \n", - "3 NaN NM ... NaN \n", - "4 LEAGUES NM ... NaN \n", + " Distance_units Distance_units_to_landmark Distance_units_travelled \\\n", + "count 0.0 63 231 \n", + "unique 0.0 2 5 \n", + "top NaN LEAGUES NM \n", + "freq NaN 61 85 \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN \n", "\n", - " gusts Rain Fog Snow Thunder Hail Sea_ice Trivial_correction \\\n", - "0 0 0 0 0 0 0 0 0 \n", - "1 0 1 0 0 0 0 0 0 \n", - "2 0 0 0 0 0 0 0 0 \n", - "3 0 0 0 0 0 0 0 0 \n", - "4 0 0 0 0 0 0 0 0 \n", + " Longitude_units units_of_measurement humidity_units \\\n", + "count 266 0.0 0.0 \n", + "unique 3 0.0 0.0 \n", + "top 360 DEGREES NaN NaN \n", + "freq 180 NaN NaN \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN \n", "\n", - " Release \n", - "0 CLIWOC VERSION 2.0 \n", - "1 CLIWOC VERSION 2.0 \n", - "2 CLIWOC VERSION 2.0 \n", - "3 CLIWOC VERSION 1.0 \n", - "4 CLIWOC VERSION 1.1 \n", + " water_at_pump_units wind_scale BARO_type BARO_brand API \\\n", + "count 0.0 0.0 0.0 0.0 0.0 \n", + "unique 0.0 0.0 0.0 0.0 0.0 \n", + "top NaN NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN NaN \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", "\n", - "[5 rows x 47 columns]" + " Humidity_method compas_error compas_correction AT_outside SST \\\n", + "count 0.0 0.0 0.0 0.0 0.0 \n", + "unique 0.0 0.0 0.0 NaN NaN \n", + "top NaN NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN NaN \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", + "\n", + " AP wind_dir current_dir current_speed attached_tem pump_water \\\n", + "count 0.0 271 1 1 0.0 0.0 \n", + "unique 0.0 130 1 1 NaN 0.0 \n", + "top NaN E SOUTHERLY STRONG NaN NaN \n", + "freq NaN 20 1 1 NaN NaN \n", + "mean NaN NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN NaN \n", + "\n", + " Humidity wind_force weather prcp_descriptor sea_state \\\n", + "count 0.0 223 105 16 174 \n", + "unique 0.0 74 48 8 51 \n", + "top NaN FRESQUITO HORIZONTES CARGADOS RAIN LLANA \n", + "freq NaN 19 30 6 48 \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", + "\n", + " shape_coulds dir_coulds Clearness cloud_fraction gusts Rain \\\n", + "count 19 0.0 47 0.0 273 273 \n", + "unique 7 0.0 9 0.0 2 2 \n", + "top OVERDRIJVENDE LUCHT NaN CLOUDY NaN 0 0 \n", + "freq 10 NaN 26 NaN 269 252 \n", + "mean NaN NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN NaN \n", + "\n", + " Fog Snow Thunder Hail Sea_ice Trivial_correction Release \n", + "count 273 273 273 273 273 273 273 \n", + "unique 2 1 2 2 1 1 3 \n", + "top 0 0 0 0 0 0 CLIWOC VERSION 2.0 \n", + "freq 270 273 265 272 273 273 156 \n", + "mean NaN NaN NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN NaN NaN " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data.data[[\"c99_data\"]].c99_data.head()" + "data.data[[\"c99_data\"]].c99_data.describe(include = 'all')" ] }, { @@ -991,7 +2262,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1005,7 +2276,7 @@ "Name: wind_force, dtype: object" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" }