new utils func and write to netcdf script

40a734f3 · Beatriz Recinos · 73a15178 · 40a734f3 · 40a734f3 · 40a734f3
Commit 40a734f3 authored 4 years ago by Beatriz Recinos
Showing with 235 additions and 2 deletions

scripts/combined_data_to_netcdf.py scripts/combined_data_to_netcdf.py +184 -0

scripts/plot_cci_buoy_error.py scripts/plot_cci_buoy_error.py +2 -2

sst_tools/utils_sst.py sst_tools/utils_sst.py +49 -0

No files found.
--- a/scripts/combined_data_to_netcdf.py
+++ b/scripts/combined_data_to_netcdf.py
+# TO RUN LOCALLY
+import os
+import psutil
+import time
+import sys
+from configobj import ConfigObj
+import pandas as pd
+import numpy as np
+import xarray as xr
+import warnings
+warnings.filterwarnings('ignore')
+
+config = ConfigObj("../config.ini")
+sys.path.append(config['repo_path'])
+
+from sst_tools import utils_sst as utils
+from sst_tools import workflow_sst as workflow
+from sst_tools.workflow_sst import non_zero_data
+from sst_tools.workflow_sst import sum_buoy_density
+
+# Time
+start = time.time()
+process = psutil.Process(os.getpid())
+
+# Input specify resolution in space and time
+res = 2
+#alias = 'QS-JAN'
+alias = '1M'
+#alias = '1Y'
+
+if res == 1:
+    path_coarse = config['satellite_sst_1x1']
+    path_buoy = config['satellite_sst_buoy_1x1']
+elif res == 2:
+    path_coarse = config['satellite_sst_2x2']
+    path_buoy = config['satellite_sst_buoy_2x2']
+else:
+    path_coarse = None
+    path_buoy = None
+    print('Error please specify a resolution')
+
+
+# Open data
+sst_coarse = utils.open_data_and_crop(path_coarse, lat_s=-90, lat_n=-25)
+sst_at_buoy = utils.open_data_and_crop(path_buoy, lat_s=-90, lat_n=-25)
+
+# Adding sst standard error to the sst_at_buoy data set
+sst_at_buoy['analysed_sst_se'] = sst_at_buoy.analysed_sst_std / np.sqrt(sst_at_buoy.buoy_count)
+
+# Count how many grid-cells we have with data on the sst_at_buoy data set
+grid_boxes_with_data = xr.apply_ufunc(non_zero_data,
+                                      sst_at_buoy.analysed_sst,
+                                      input_core_dims=[["lat", "lon"]],
+                                      dask='allowed',
+                                      vectorize=True
+                                      )
+grid_boxes_with_data.name = 'No_of_grid_cells_with_data'
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done reading everything: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Calculate time-stamp means, by resample the data to a different
+# time resolution depending on the alias given
+
+# Monthly means: each time stamp correspond to the last day of the month
+sst_coarse_re = sst_coarse.resample(time=alias).mean()
+
+# For sst_at_buoy we need to calculate weighted time moving averages
+# We weight each variable by the number of buoy points (buoy_density) on each grid cell
+sst_at_buoy_sst = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst,
+                                                                    sst_at_buoy.buoy_count,
+                                                                    time_stamp=alias,
+                                                                    var_name='analysed_sst')
+
+sst_at_buoy_std = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_std,
+                                                                    sst_at_buoy.buoy_count,
+                                                                    time_stamp=alias,
+                                                                    var_name='analysed_sst_std')
+
+sst_at_buoy_median = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_median,
+                                                                       sst_at_buoy.buoy_count,
+                                                                       time_stamp=alias,
+                                                                       var_name='analysed_sst_median')
+
+sst_at_buoy_se = workflow.calculate_time_moving_weighted_average(sst_at_buoy.analysed_sst_se,
+                                                                   sst_at_buoy.buoy_count,
+                                                                   time_stamp=alias,
+                                                                   var_name='analysed_sst_se')
+
+# Last variable makes no sense to weight it by itself!
+sst_at_buoy_buoy_count_mean = sst_at_buoy.buoy_count.resample(time=alias).mean()
+
+# We also calculate the total sum of buoy points for each month
+sst_at_buoy_buoy_count_sum = sst_at_buoy.buoy_count.resample(time=alias).sum()
+
+# The number of gridboxes with data for every month.
+grid_boxes_with_data_re = grid_boxes_with_data.resample(time=alias).sum()
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done resampling: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Getting coordinates and dimensions for the data set
+yi = sst_at_buoy.lat.values
+xi = sst_at_buoy.lon.values
+time = sst_at_buoy_sst.time.values
+
+# Ready to build the xarray.dataframe
+combined = xr.Dataset(coords={'lat': (['lat'], yi),
+                              'lon': (['lon'], xi),
+                              'time': time})
+
+dict_sst_buoy = {'long_name': 'analysed sea surface temperature at buoy',
+                 'standard_name': 'sea_water_temperature',
+                 'units': 'kelvin'}
+
+dict_std = {'long_name': 'std analysed sea surface temperature at buoy',
+            'standard_name': 'standard deviation',
+            'units': 'kelvin'}
+
+dict_median = {'long_name': 'median analysed sea surface temperature at buoy',
+               'standard_name': 'median',
+               'units': 'kelvin'}
+
+dict_se = {'long_name': 'se analysed sea surface temperature at buoy',
+            'standard_name': 'standard error',
+            'units': 'kelvin'}
+
+dict_coarse = {'long_name': 'analysed sea surface temperature coarse',
+               'standard_name': 'sea_water_temperature',
+               'units': 'kelvin'}
+
+dict_buoy_mean = {'long_name': 'mean_buoy_count_per_grid_cell',
+                  'standard_name': 'buoy_count',
+                  'units': 'No of buoys on a grid cell'}
+
+dict_buoy_sum = {'long_name': 'sum_buoy_count_per_grid_cell',
+                 'standard_name': 'buoy_count_total',
+                 'units': 'No of buoys on a grid cell'}
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_sst,
+                                              'analysed_sst_at_buoy',
+                                              dict_sst_buoy)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_std,
+                                              'analysed_sst_at_buoy_std',
+                                              dict_std)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_median,
+                                              'analysed_sst_at_buoy_median',
+                                              dict_median)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_se,
+                                              'analysed_sst_at_buoy_se',
+                                              dict_se)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_coarse_re.analysed_sst,
+                                              'analysed_sst_coarse',
+                                              dict_coarse)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_buoy_count_mean,
+                                              'buoy_count_mean',
+                                              dict_buoy_mean)
+
+combined = utils.write_variable_to_xr_dataset(combined,
+                                              sst_at_buoy_buoy_count_sum,
+                                              'buoy_count_sum',
+                                              dict_buoy_sum)
+
+combined = utils.add_global_attrs(combined, res)
+
+combined.to_netcdf(os.path.join(config['combined_data'],
+                                alias + '_' + str(res)+'x'+str(res) + '.nc'))
+
+print('Done saving to netcdf')
--- a/scripts/plot_cci_buoy_error.py
+++ b/scripts/plot_cci_buoy_error.py
@@ -37,8 +37,8 @@ start_day = int(sys.argv[3])
 month = int(sys.argv[4])
 day_to_plot = int(sys.argv[5]) # From SLURM ARRAY

-lat_bnds = [-50, -25]
-lon_bnds = [100, 180]
+lat_bnds = [-90, -25]
+lon_bnds = [-150, 150]

 extent = np.append(lon_bnds, lat_bnds)
 print(extent)

--- a/sst_tools/utils_sst.py
+++ b/sst_tools/utils_sst.py
@@ -223,3 +223,52 @@ def open_data_and_crop(path_to_data, lat_s, lat_n):
    data_cropped = xr.open_mfdataset(file_paths).sel(lat=slice(lat_s, lat_n))

    return data_cropped
+
+
+def write_variable_to_xr_dataset(data_set, var, var_name, dict_attrs):
+    """
+    Write variable to a netcdf file
+
+        Parameters
+        ----------
+        data_set: xr.Dataset to write the variables to.
+        var: xr.dask.array to write to the xr.Dataset
+        var_name: Name of variable to save
+        dict_attrs: A dictionary with attributes
+
+        Returns
+        -------
+        xr_dataset: xr.Dataset with the variables written
+    """
+    data_set[var_name] = (['time', 'lat', 'lon'], var.load())
+    data_set[var_name].attrs = dict_attrs
+
+    return data_set
+
+
+def add_global_attrs(data_set, res):
+    """
+    Write global attrs to a netcdf file
+
+        Parameters
+        ----------
+        data_set: xr.Dataset to write the attributes to.
+        res: resolution
+
+        Returns
+        -------
+        xr_dataset: xr.Dataset with global attributes written
+    """
+    # Global netcdf attributes
+    dic = {'title': 'ESA SST CCI OSTIA L4 product re-sampled',
+           'gds_version_id': '2.0',
+           'spatial_resolution': str(res) + ' degree',
+           'geospatial_lat_resolution': res,
+           'geospatial_lat_units': 'degrees_north',
+           'geospatial_lon_resolution': res,
+           'geospatial_lon_units': 'degrees_east',
+           'creator_name': 'Beatriz Recinos (NOC)'}
+
+    data_set.attrs = dic
+
+    return data_set
\ No newline at end of file