#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Gather field stats from CLIWOC c99 """ import os import pandas as pd import numpy as np import pickle from collections import defaultdict import sys sys.path.append('/home/users/brecinos/c3s_work') import mdf_reader funPath = os.path.dirname(os.path.abspath(__file__)) # data_path = os.path.join(funPath,'data/133-730/') # print(data_path) data_jasmin = '/gws/nopw/j04/glamod_marine/data/datasets/ICOADS_R3.0.0T/level0/133-730' print(data_jasmin) years = np.arange(1661, 1895) print(years) output_path = '/home/users/brecinos/c3s_work/133-730/' # i = 1 i = int(sys.argv[1]) year = years[i] print(year) months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] # TODO: for running in jasmin we must change several things.. the year and data_path paths_files = [] for m in months: path = os.path.join(data_jasmin, str(year)+'-'+m+'.imma') # print(path) if os.path.exists(path): paths_files.append(path) print(paths_files) schema_lib = os.path.join(os.path.dirname(funPath), 'data_models', 'lib') print(schema_lib) schema_name = 'imma1_d730' model_path = os.path.join(schema_lib, schema_name) print(model_path) d = defaultdict(list) ship_types = [] lat_inds = [] lon_inds = [] at_units = [] sst_units = [] ap_units = [] bart_units = [] lon_units = [] baro_types = [] dist_units = [] dist_units_toland = [] dist_units_travelled = [] units_of_other_measurements = [] humidity_units = [] releases = [] for path in paths_files: data = mdf_reader.read(path, data_model_path=model_path) names = os.path.split(path)[1][0:7] # Getting elements from voyage section rig = data.data[["c99_voyage"]].c99_voyage.Ship_type.value_counts(dropna=False).to_frame() lat_ind = data.data[["c99_voyage"]].c99_voyage.LatInd.value_counts(dropna=False).to_frame() lon_ind = data.data[["c99_voyage"]].c99_voyage.LonInd.value_counts(dropna=False).to_frame() # Getting elements from data section at_unit = data.data[["c99_data"]].c99_data.AT_reading_units.value_counts(dropna=False).to_frame() sst_unit = data.data[["c99_data"]].c99_data.SST_reading_units.value_counts(dropna=False).to_frame() ap_unit = data.data[["c99_data"]].c99_data.AP_reading_units.value_counts(dropna=False).to_frame() bart_unit = data.data[["c99_data"]].c99_data.BART_reading_units.value_counts(dropna=False).to_frame() lon_unit = data.data[["c99_data"]].c99_data.Longitude_units.value_counts(dropna=False).to_frame() baro_type = data.data[["c99_data"]].c99_data.BARO_type.value_counts(dropna=False).to_frame() Distance_unit = data.data[["c99_data"]].c99_data.Distance_units.value_counts(dropna=False).to_frame() Distance_unit_land = data.data[["c99_data"]].c99_data.Distance_units_to_landmark.value_counts(dropna=False).to_frame() Distance_unit_travel = data.data[["c99_data"]].c99_data.Distance_units_travelled.value_counts(dropna=False).to_frame() units_of_measures = data.data[["c99_data"]].c99_data.units_of_measurement.value_counts(dropna=False).to_frame() humidity_u = data.data[["c99_data"]].c99_data.humidity_units.value_counts(dropna=False).to_frame() release = data.data[["c99_data"]].c99_data.Release.value_counts(dropna=False).to_frame() ship_types.append(rig) lat_inds.append(lat_ind) lon_inds.append(lon_ind) at_units.append(at_unit) sst_units.append(sst_unit) ap_units.append(ap_unit) bart_units.append(bart_unit) lon_units.append(lon_unit) baro_types.append(baro_type) dist_units.append(Distance_unit) dist_units_toland.append(Distance_unit_land) dist_units_travelled.append(Distance_unit_travel) units_of_other_measurements.append(units_of_measures) humidity_units.append(humidity_u) releases.append(release) d['ship_types'] = pd.concat(ship_types, axis=1).sum(axis=1) d['lan_inds'] = pd.concat(lat_inds, axis=1).sum(axis=1) d['lon_inds'] = pd.concat(lon_inds, axis=1).sum(axis=1) d['at_units'] = pd.concat(at_units, axis=1).sum(axis=1) d['sst_units'] = pd.concat(sst_units, axis=1).sum(axis=1) d['ap_units'] = pd.concat(ap_units, axis=1).sum(axis=1) d['bart_units'] = pd.concat(bart_units, axis=1).sum(axis=1) d['lon_units'] = pd.concat(lon_units, axis=1).sum(axis=1) d['baro_types'] = pd.concat(baro_types, axis=1).sum(axis=1) d['distance_units'] = pd.concat(dist_units, axis=1).sum(axis=1) d['distance_units_to_land'] = pd.concat(dist_units_toland, axis=1).sum(axis=1) d['distance_units_travelled'] = pd.concat(dist_units_travelled, axis=1).sum(axis=1) d['units_of_other_measurements'] = pd.concat(units_of_other_measurements, axis=1).sum(axis=1) d['humidity_units'] = pd.concat(humidity_units, axis=1).sum(axis=1) d['releases'] = pd.concat(releases, axis=1).sum(axis=1) # print(d) fp = os.path.join(output_path, str(year) + '.pkl') # print(fp) with open(fp, 'wb') as f: pickle.dump(d, f, protocol=-1) print('Done!')