added time series script

6842de0c · Beatriz Recinos · b6be34c5 · 6842de0c · 6842de0c · 6842de0c
Commit 6842de0c authored 4 years ago by Beatriz Recinos
6 changed files
--- a/config.ini
+++ b/config.ini
@@ -15,3 +15,4 @@ satellite_sst_buoy_1x1 = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_at

 satellite_sst_buoy_2x2 = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_atbuoy_2x2/'

+time_series = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/time_series_ESAcci_sst/'
--- a/scripts/sst_differences.py
+++ b/scripts/sst_differences.py
+# TO RUN LOCALLY
+import os
+import psutil
+import time
+import sys
+from configobj import ConfigObj
+import pandas as pd
+import warnings
+warnings.filterwarnings('ignore')
+
+config = ConfigObj("../config.ini")
+sys.path.append(config['repo_path'])
+
+from sst_tools import utils_sst as utils
+from sst_tools import workflow_sst as workflow
+
+# Time
+start = time.time()
+process = psutil.Process(os.getpid())
+
+res = 2
+
+if res == 1:
+    path_coarse = config['satellite_sst_1x1']
+    path_buoy = config['satellite_sst_buoy_1x1']
+elif res == 2:
+    path_coarse = config['satellite_sst_2x2']
+    path_buoy = config['satellite_sst_buoy_2x2']
+else:
+    path_coarse = None
+    path_buoy = None
+    print('Error please specify a resolution')
+
+
+# Open data
+sst_coarse = utils.open_data_and_crop(path_coarse, lat_s=-90, lat_n=-25)
+sst_at_buoy = utils.open_data_and_crop(path_buoy, lat_s=-90, lat_n=-25)
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done reading everything: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Calculate time-stamp means, by resample the data to a different
+# time resolution depending on the alias given
+
+# Monthly means: each time stamp correspond to the last day of the month
+sst_coarse_m = sst_coarse.resample(time='1M').mean()
+sst_at_buoy_m = sst_at_buoy.resample(time='1M').mean()
+sst_at_buoy_m_buoy_density = sst_at_buoy.buoy_count.resample(time='1M').sum()
+
+# Yearly means: each time stamp correspond to the last month of the year
+sst_coarse_y = sst_coarse.resample(time='1Y').mean()
+sst_at_buoy_y = sst_at_buoy.resample(time='1Y').mean()
+sst_at_buoy_y_buoy_density = sst_at_buoy.buoy_count.resample(time='1Y').sum()
+
+# Seasonal means: every 4 months year ends in JAN.
+sst_coarse_s = sst_coarse.resample(time='QS-JAN').mean()
+sst_at_buoy_s = sst_at_buoy.resample(time='QS-JAN').mean()
+sst_at_buoy_s_buoy_density = sst_at_buoy.buoy_count.resample(time='QS-JAN').sum()
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done resampling: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Calculate cell by cell differences of analysed_sst and time series statistics
+diff_m = workflow.calculate_time_series_diff(sst_coarse_m, sst_at_buoy_m)
+diff_y = workflow.calculate_time_series_diff(sst_coarse_y, sst_at_buoy_y)
+diff_s = workflow.calculate_time_series_diff(sst_coarse_s, sst_at_buoy_s)
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done calculating time series diff: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Calculate S.O means of bin statistics
+stats_m = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_m)
+stats_y = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_y)
+stats_s = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_s)
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done stats: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+# Estimate total buoy count
+count_m = workflow.calculate_total_buoy_density(sst_at_buoy_m_buoy_density)
+count_y = workflow.calculate_total_buoy_density(sst_at_buoy_y_buoy_density)
+count_s = workflow.calculate_total_buoy_density(sst_at_buoy_s_buoy_density)
+
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Done count: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
+
+result_m = pd.concat([diff_m, stats_m, count_m], axis=1)
+result_y = pd.concat([diff_y, stats_y, count_y], axis=1)
+result_s = pd.concat([diff_s, stats_s, count_s], axis=1)
+
+result_m.to_csv(os.path.join(config['time_series'], 'monthly_' + str(res)+'x'+str(res) + '.csv'))
+result_y.to_csv(os.path.join(config['time_series'], 'yearly_' + str(res)+'x'+str(res) + '.csv'))
+result_s.to_csv(os.path.join(config['time_series'], 'seasonal_' + str(res)+'x'+str(res) + '.csv'))
+
+# Log
+m, s = divmod(time.time() - start, 60)
+h, m = divmod(m, 60)
+print("Saved everything: %02d:%02d:%02d" % (h, m, s))
+print(process.memory_info().rss)
--- a/sst_tools/__init__.py
+++ b/sst_tools/__init__.py
+from sst_tools.utils_sst import *
+from sst_tools.plot_sst import *
+from sst_tools.workflow_sst import *
--- a/sst_tools/plot_sst.py
+++ b/sst_tools/plot_sst.py
@@ -75,7 +75,6 @@ def plot_sst_on_salem_map(ds, t, lon_xinterval=0.5, lat_yinterval=0.5):

    sm = ds.isel(time=t).salem.get_map(countries=True)
    sm.set_data(ds.analysed_sst.isel(time=t))
-    sm.set_data(ds.analysed_sst.isel(time=t))
    sm.set_lonlat_contours(xinterval=lon_xinterval, yinterval=lat_yinterval)
    sm.set_cmap('RdBu_r')
    return sm
@@ -99,13 +98,12 @@ def plot_stats_on_salem_map(ds, var, t, lon_xinterval=0.5, lat_yinterval=0.5):
    if var == 'buoy_count':
        cmap = 'Purples'
    elif var == 'analysed_sst_std':
-        cmap = 'Greens'
+        cmap = 'Greys'
    else:
        cmap = 'RdBu_r'

    sm = ds.isel(time=t).salem.get_map(countries=True)
    sm.set_data(ds[var].isel(time=t))
-    sm.set_data(ds[var].isel(time=t))
    sm.set_lonlat_contours(xinterval=lon_xinterval, yinterval=lat_yinterval)
    sm.set_cmap(cmap)
    return sm
--- a/sst_tools/utils_sst.py
+++ b/sst_tools/utils_sst.py
@@ -4,6 +4,7 @@ import re
 import logging
 import pyreadr
 import pandas as pd
+import xarray as xr
 # Module logger
 log = logging.getLogger(__name__)

@@ -99,6 +100,30 @@ def find_nc_files_specific_days(main_path, dates):
    return files_sst, files_anomaly


+def find_all_coarse_daily_files(main_path):
+    """ Finds all coarse cci .nc files for a
+    given resolution or re-griding method and
+    returns a list of paths order by date.
+
+    Parameters
+    ----------
+    main_path: Folder for the re-gridded data configuration
+    (e.g. config['satellite_sst_1x1'] see config file)
+
+    Returns
+    -------
+    sst_paths
+    """
+
+    sst_paths = []
+    for root, dirs, files in os.walk(main_path):
+        files.sort()
+        for file in files:
+            sst_paths.append(os.path.join(root, file))
+
+    return sst_paths
+
+
 def find_drifter_files(main_path, months, year):
    """ Finds all .Rda drifters files for a list of months and years
    in the MFILES_DRIF sub directories.
@@ -176,3 +201,25 @@ def read_rda_as_pandas(path_to_file, keep_globe=False):

    return df.reset_index(drop=True)

+
+def open_data_and_crop(path_to_data, lat_s, lat_n):
+    """
+    Opens all netcdf files in a path and crops the data to a section
+    of the world only on Latitude!
+        Parameters
+        ----------
+        path_to_data: path to several netcdf files
+        lat_s: latitude southern value
+        lat_n: latitude northern value
+
+        Returns
+        -------
+        data_cropped: dask array of the entire data set crop to a section
+        of the world only on latitude
+    """
+
+    # Reading data set
+    file_paths = find_all_coarse_daily_files(path_to_data)
+    data_cropped = xr.open_mfdataset(file_paths).sel(lat=slice(lat_s, lat_n))
+
+    return data_cropped
--- a/sst_tools/workflow_sst.py
+++ b/sst_tools/workflow_sst.py
@@ -5,6 +5,7 @@ import xarray as xr
 from scipy.stats import binned_statistic_2d
 from datetime import datetime
 from collections import defaultdict
+
 # Module logger
 log = logging.getLogger(__name__)

@@ -290,3 +291,130 @@ def grid_buoy_data(x, y, z, resolution, data):

    return buoy

+
+def calculate_time_series_diff(sst_coarse, sst_at_buoy):
+    """
+    Calculates spatial means of SST differences between two re-gridded
+    data sets of ESA-cci-sst at a given resolution.
+
+        Parameters
+        ----------
+        sst_coarse: coarse cci-sst data (1 or 2 degree)
+        sst_at_buoy: interpolated cci-sst data to buoy coordinates
+        and re-gridded into 1 or 2 degree grid cells
+
+        Returns
+        --------
+        dt: data frame with time series differences of sst between the two data sets
+        and some general statistics of the differences in sst
+    """
+
+    # Calculate sst differences between data sets cell by cell
+    diff_ts = sst_at_buoy.analysed_sst - sst_coarse.analysed_sst
+
+    # Calculate a spatial average of the differences for the entire data set
+    # also weighted by latitude grid cell area
+    weights = np.cos(np.deg2rad(diff_ts.lat))
+    weights.name = 'weights'
+
+    dt_w = diff_ts.weighted(weights).mean(('lon', 'lat')).load()
+    dt_w.name = 'weighted_analysed_sst_diff_mean'
+    dt_w = dt_w.to_dataframe()
+
+    std = diff_ts.std(dim=('lon', 'lat')).load()
+    std.name = 'analysed_sst_diff_std'
+    std = std.to_dataframe()
+
+    median = diff_ts.median(dim=('lon', 'lat')).load()
+    median.name = 'analysed_sst_diff_median'
+    median = median.to_dataframe()
+
+    dt = diff_ts.mean(('lon', 'lat')).load()
+    dt = dt.to_dataframe()
+
+    dt['weighted_analysed_sst_diff_mean'] = dt_w['weighted_analysed_sst_diff_mean'].copy()
+    dt['analysed_sst_diff_std'] = std['analysed_sst_diff_std'].copy()
+    dt['analysed_sst_diff_median'] = median['analysed_sst_diff_median'].copy()
+
+    return dt
+
+
+# TODO: this function is very ugly!! needs re-thinking
+def calculate_spatial_avg_of_stats(sst_at_buoy):
+    """
+    Calculates spatial means of bins statistics these are statistics from the interpolated data set:
+    std and median of sst inside a grid cell and buoy count averages.
+
+        Parameters
+        ----------
+        sst_at_buoy: interpolated cci-sst data to buoy coordinates and re-gridded
+        into 1 or 2 degree grid cells, this contains bin statistics (std, median of sst values inside
+        each grid cell and buoy count)
+
+        Returns
+        --------
+        out: data frame with time series of mean values for each bin statistics variable (std, median, buoy_count)
+    """
+    # weighted by latitude grid cell area
+    weights = np.cos(np.deg2rad(sst_at_buoy.lat))
+    weights.name = 'weights'
+
+    std_w = sst_at_buoy.analysed_sst_std.weighted(weights).mean(('lon', 'lat')).load()
+    std_w.name = 'weighted_mean_analysed_sst_std'
+    out = std_w.to_dataframe()
+
+    median_w = sst_at_buoy.analysed_sst_median.weighted(weights).mean(('lon', 'lat')).load()
+    median_w.name = 'weighted_mean_analysed_sst_median'
+    median_w = median_w.to_dataframe()
+    out['weighted_mean_analysed_sst_median'] = median_w['weighted_mean_analysed_sst_median'].copy()
+
+    count_w = sst_at_buoy.buoy_count.weighted(weights).mean(('lon', 'lat')).load()
+    count_w.name = 'weighted_mean_buoy_count'
+    count_w = count_w.to_dataframe()
+    out['weighted_mean_buoy_count'] = count_w['weighted_mean_buoy_count'].copy()
+
+    std = sst_at_buoy.analysed_sst_std.mean(('lon', 'lat')).load()
+    std.name = 'mean_analysed_sst_std'
+    std = std.to_dataframe()
+    out['mean_analysed_sst_std'] = std['mean_analysed_sst_std'].copy()
+
+    median = sst_at_buoy.analysed_sst_median.mean(('lon', 'lat')).load()
+    median.name = 'mean_analysed_sst_median'
+    median = median.to_dataframe()
+    out['mean_analysed_sst_median'] = median['mean_analysed_sst_median'].copy()
+
+    count = sst_at_buoy.buoy_count.mean(('lon', 'lat')).load()
+    count.name = 'mean_buoy_count'
+    count = count.to_dataframe()
+    out['mean_buoy_count'] = count['mean_buoy_count'].copy()
+
+    return out
+
+
+def calculate_total_buoy_density(buoy_count):
+    """
+    Calculates total buoy density along lat and long and time
+
+        Parameters
+        ----------
+        buoy_count: total count (weighted sum)
+        per grid cell of cci-sst data points interpolated to buoy coordinates
+
+        Returns
+        --------
+        out: data frame with time series of the total count of cci-sst data points interpolated to buoy
+        coordinates
+    """
+    weights = np.cos(np.deg2rad(buoy_count.lat))
+    weights.name = 'weights'
+
+    count_w = buoy_count.weighted(weights).sum(('lon', 'lat')).load()
+    count_w.name = 'weighted_sum_buoy_count'
+    out = count_w.to_dataframe()
+
+    count = buoy_count.sum(('lon', 'lat')).load()
+    count.name = 'sum_buoy_count'
+    count = count.to_dataframe()
+    out['sum_buoy_count'] = count['sum_buoy_count'].copy()
+
+    return out