From cc9b84c27f6110d7a672235afea50cf3cbb48649 Mon Sep 17 00:00:00 2001 From: Beartriz Recinos Rivas <brivas@NOCS05291F.lan> Date: Mon, 8 Feb 2021 13:51:50 +0000 Subject: [PATCH] testing cliwoc gather of var stats --- tests/gather_stats_c99.py | 107 ++++++++++++++++++++++++++++++++++++++ tests/run_plot.slurm | 18 +++++++ 2 files changed, 125 insertions(+) create mode 100755 tests/gather_stats_c99.py create mode 100755 tests/run_plot.slurm diff --git a/tests/gather_stats_c99.py b/tests/gather_stats_c99.py new file mode 100755 index 0000000..b19ea19 --- /dev/null +++ b/tests/gather_stats_c99.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Gather field stats from CLIWOC c99 +""" +import os +import sys +import pandas as pd +import numpy as np +import mdf_reader +import json +import pickle +from collections import defaultdict + +funPath = os.path.dirname(os.path.abspath(__file__)) +data_path = os.path.join(funPath,'data/133-730/') +print(data_path) + +data_jasmin = '/group_workspaces/jasmin2/glamod_marine/data/datasets/ICOADS_R3.0.0T/level0/133-730/' +print(data_jasmin) + +years = np.arange(1661,1895) + +output_path = '/Users/brivas/c3s_work/mdf_reader/tests/data/' + +i = 1 +# i_in_jasmin = int(sys.argv[1]) + +year = years[i] + +print(year) + +months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'] + +# TODO: for running in jasmin we must change several things.. the year and data_path +paths_files = [] +for m in months: + path = os.path.join(data_path, str(year)+'-'+m+'.imma') + if os.path.exists(path): + paths_files.append(path) + +print(paths_files) + +schema_lib = os.path.join(os.path.dirname(funPath),'data_models','lib') + +print(schema_lib) + +schema_name = 'imma1_d730' + +model_path = os.path.join(schema_lib, schema_name) +print(model_path) + +d = defaultdict(list) +ship_types = [] +lat_inds = [] +lon_inds = [] +at_units = [] +sst_units = [] +ap_units = [] +bart_units = [] +lon_units = [] +baro_types = [] + +for path in paths_files: + data = mdf_reader.read(path, data_model_path= model_path) + names = os.path.split(path)[1][0:7] + + # Getting elements from voyage section + rig = data.data[["c99_voyage"]].c99_voyage.Ship_type.value_counts(dropna=False).to_frame() + lat_ind = data.data[["c99_voyage"]].c99_voyage.LatInd.value_counts(dropna=False).to_frame() + lon_ind = data.data[["c99_voyage"]].c99_voyage.LonInd.value_counts(dropna=False).to_frame() + + # Getting elements from data section + at_unit = data.data[["c99_data"]].c99_data.AT_reading_units.value_counts(dropna=False).to_frame() + sst_unit = data.data[["c99_data"]].c99_data.SST_reading_units.value_counts(dropna=False).to_frame() + ap_unit = data.data[["c99_data"]].c99_data.AP_reading_units.value_counts(dropna=False).to_frame() + bart_unit = data.data[["c99_data"]].c99_data.BART_reading_units.value_counts(dropna=False).to_frame() + lon_unit = data.data[["c99_data"]].c99_data.Longitude_units.value_counts(dropna=False).to_frame() + baro_type = data.data[["c99_data"]].c99_data.BARO_type.value_counts(dropna=False).to_frame() + + ship_types.append(rig) + lat_inds.append(lat_ind) + lon_inds.append(lon_ind) + at_units.append(at_unit) + sst_units.append(sst_unit) + ap_units.append(ap_unit) + bart_units.append(bart_unit) + lon_units.append(lon_unit) + baro_types.append(baro_type) + + +d['ship_types'] = pd.concat(ship_types, axis=1, join='inner').sum(axis=1) +d['lan_inds'] = pd.concat(lat_inds, axis=1, join='inner').sum(axis=1) +d['lon_inds'] = pd.concat(lon_inds, axis=1, join='inner').sum(axis=1) +d['at_units'] = pd.concat(at_units, axis=1, join='inner').sum(axis=1) +d['sst_units'] = pd.concat(sst_units, axis=1, join='inner').sum(axis=1) +d['ap_units'] = pd.concat(ap_units, axis=1, join='inner').sum(axis=1) +d['bart_units'] = pd.concat(bart_units, axis=1, join='inner').sum(axis=1) +d['lon_units'] = pd.concat(lon_units, axis=1, join='inner').sum(axis=1) +d['baro_types'] = pd.concat(baro_types, axis=1, join='inner').sum(axis=1) + +print(d) +fp = os.path.join(output_path, str(year) + '.pkl') +print(fp) + +with open(fp, 'wb') as f: + pickle.dump(d, f, protocol=-1) \ No newline at end of file diff --git a/tests/run_plot.slurm b/tests/run_plot.slurm new file mode 100755 index 0000000..ce5a18b --- /dev/null +++ b/tests/run_plot.slurm @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --partition=short-serial +#SBATCH --array=1-10 +#SBATCH --job-name=cliwoc_ct +#SBATCH --output=slurm_log_output/cliwoc_ct_%A_%a.out +#SBATCH --error=slurm_log_output/cliwoc_ct_%A_%a.err +#SBATCH --mem=4000 +#SBATCH --time=00:29:00 + +source activate ~/miniconda3/envs/sst + +echo "starting from $SLURM_ARRAY_TASK_ID" + +python ~mdf_reader/tests/gather_stats_c99.py $SLURM_ARRAY_TASK_ID + +echo "Done slurm task ID = $SLURM_ARRAY_TASK_ID" + +##sacct -j $SLURM_JOB_ID --format=User,JobID,Jobname,partition,state,time,start,end,elapsed,MaxRss,MaxVMSize,nnodes,ncpus,nodelist >> ../slurm_log_output/job_stats.out \ No newline at end of file -- GitLab