Commit 40a734f3 authored by Beatriz Recinos's avatar Beatriz Recinos
Browse files

new utils func and write to netcdf script

parent 73a15178
# TO RUN LOCALLY
import os
import psutil
import time
import sys
from configobj import ConfigObj
import pandas as pd
import numpy as np
import xarray as xr
import warnings
warnings.filterwarnings('ignore')
config = ConfigObj("../config.ini")
sys.path.append(config['repo_path'])
from sst_tools import utils_sst as utils
from sst_tools import workflow_sst as workflow
from sst_tools.workflow_sst import non_zero_data
from sst_tools.workflow_sst import sum_buoy_density
# Time
start = time.time()
process = psutil.Process(os.getpid())
# Input specify resolution in space and time
res = 2
#alias = 'QS-JAN'
alias = '1M'
#alias = '1Y'
if res == 1:
path_coarse = config['satellite_sst_1x1']
path_buoy = config['satellite_sst_buoy_1x1']
elif res == 2:
path_coarse = config['satellite_sst_2x2']
path_buoy = config['satellite_sst_buoy_2x2']
else:
path_coarse = None
path_buoy = None
print('Error please specify a resolution')
# Open data
sst_coarse = utils.open_data_and_crop(path_coarse, lat_s=-90, lat_n=-25)
sst_at_buoy = utils.open_data_and_crop(path_buoy, lat_s=-90, lat_n=-25)
# Adding sst standard error to the sst_at_buoy data set
sst_at_buoy['analysed_sst_se'] = sst_at_buoy.analysed_sst_std / np.sqrt(sst_at_buoy.buoy_count)
# Count how many grid-cells we have with data on the sst_at_buoy data set
grid_boxes_with_data = xr.apply_ufunc(non_zero_data,
sst_at_buoy.analysed_sst,
input_core_dims=[["lat", "lon"]],
dask='allowed',
vectorize=True
)
grid_boxes_with_data.name = 'No_of_grid_cells_with_data'
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done reading everything: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Calculate time-stamp means, by resample the data to a different
# time resolution depending on the alias given
# Monthly means: each time stamp correspond to the last day of the month
sst_coarse_re = sst_coarse.resample(time=alias).mean()
# For sst_at_buoy we need to calculate weighted time moving averages
# We weight each variable by the number of buoy points (buoy_density) on each grid cell
sst_at_buoy_sst = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst,
sst_at_buoy.buoy_count,
time_stamp=alias,
var_name='analysed_sst')
sst_at_buoy_std = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_std,
sst_at_buoy.buoy_count,
time_stamp=alias,
var_name='analysed_sst_std')
sst_at_buoy_median = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_median,
sst_at_buoy.buoy_count,
time_stamp=alias,
var_name='analysed_sst_median')
sst_at_buoy_se = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_se,
sst_at_buoy.buoy_count,
time_stamp=alias,
var_name='analysed_sst_se')
# Last variable makes no sense to weight it by itself!
sst_at_buoy_buoy_count_mean = sst_at_buoy.buoy_count.resample(time=alias).mean()
# We also calculate the total sum of buoy points for each month
sst_at_buoy_buoy_count_sum = sst_at_buoy.buoy_count.resample(time=alias).sum()
# The number of gridboxes with data for every month.
grid_boxes_with_data_re = grid_boxes_with_data.resample(time=alias).sum()
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done resampling: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Getting coordinates and dimensions for the data set
yi = sst_at_buoy.lat.values
xi = sst_at_buoy.lon.values
time = sst_at_buoy_sst.time.values
# Ready to build the xarray.dataframe
combined = xr.Dataset(coords={'lat': (['lat'], yi),
'lon': (['lon'], xi),
'time': time})
dict_sst_buoy = {'long_name': 'analysed sea surface temperature at buoy',
'standard_name': 'sea_water_temperature',
'units': 'kelvin'}
dict_std = {'long_name': 'std analysed sea surface temperature at buoy',
'standard_name': 'standard deviation',
'units': 'kelvin'}
dict_median = {'long_name': 'median analysed sea surface temperature at buoy',
'standard_name': 'median',
'units': 'kelvin'}
dict_se = {'long_name': 'se analysed sea surface temperature at buoy',
'standard_name': 'standard error',
'units': 'kelvin'}
dict_coarse = {'long_name': 'analysed sea surface temperature coarse',
'standard_name': 'sea_water_temperature',
'units': 'kelvin'}
dict_buoy_mean = {'long_name': 'mean_buoy_count_per_grid_cell',
'standard_name': 'buoy_count',
'units': 'No of buoys on a grid cell'}
dict_buoy_sum = {'long_name': 'sum_buoy_count_per_grid_cell',
'standard_name': 'buoy_count_total',
'units': 'No of buoys on a grid cell'}
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_sst,
'analysed_sst_at_buoy',
dict_sst_buoy)
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_std,
'analysed_sst_at_buoy_std',
dict_std)
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_median,
'analysed_sst_at_buoy_median',
dict_median)
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_se,
'analysed_sst_at_buoy_se',
dict_se)
combined = utils.write_variable_to_xr_dataset(combined,
sst_coarse_re.analysed_sst,
'analysed_sst_coarse',
dict_coarse)
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_buoy_count_mean,
'buoy_count_mean',
dict_buoy_mean)
combined = utils.write_variable_to_xr_dataset(combined,
sst_at_buoy_buoy_count_sum,
'buoy_count_sum',
dict_buoy_sum)
combined = utils.add_global_attrs(combined, res)
combined.to_netcdf(os.path.join(config['combined_data'],
alias + '_' + str(res)+'x'+str(res) + '.nc'))
print('Done saving to netcdf')
......@@ -37,8 +37,8 @@ start_day = int(sys.argv[3])
month = int(sys.argv[4])
day_to_plot = int(sys.argv[5]) # From SLURM ARRAY
lat_bnds = [-50, -25]
lon_bnds = [100, 180]
lat_bnds = [-90, -25]
lon_bnds = [-150, 150]
extent = np.append(lon_bnds, lat_bnds)
print(extent)
......
......@@ -223,3 +223,52 @@ def open_data_and_crop(path_to_data, lat_s, lat_n):
data_cropped = xr.open_mfdataset(file_paths).sel(lat=slice(lat_s, lat_n))
return data_cropped
def write_variable_to_xr_dataset(data_set, var, var_name, dict_attrs):
"""
Write variable to a netcdf file
Parameters
----------
data_set: xr.Dataset to write the variables to.
var: xr.dask.array to write to the xr.Dataset
var_name: Name of variable to save
dict_attrs: A dictionary with attributes
Returns
-------
xr_dataset: xr.Dataset with the variables written
"""
data_set[var_name] = (['time', 'lat', 'lon'], var.load())
data_set[var_name].attrs = dict_attrs
return data_set
def add_global_attrs(data_set, res):
"""
Write global attrs to a netcdf file
Parameters
----------
data_set: xr.Dataset to write the attributes to.
res: resolution
Returns
-------
xr_dataset: xr.Dataset with global attributes written
"""
# Global netcdf attributes
dic = {'title': 'ESA SST CCI OSTIA L4 product re-sampled',
'gds_version_id': '2.0',
'spatial_resolution': str(res) + ' degree',
'geospatial_lat_resolution': res,
'geospatial_lat_units': 'degrees_north',
'geospatial_lon_resolution': res,
'geospatial_lon_units': 'degrees_east',
'creator_name': 'Beatriz Recinos (NOC)'}
data_set.attrs = dic
return data_set
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment