Commit 6842de0c authored by Beatriz Recinos's avatar Beatriz Recinos
Browse files

added time series script

parent b6be34c5
......@@ -15,3 +15,4 @@ satellite_sst_buoy_1x1 = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_at
satellite_sst_buoy_2x2 = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_atbuoy_2x2/'
time_series = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/time_series_ESAcci_sst/'
# TO RUN LOCALLY
import os
import psutil
import time
import sys
from configobj import ConfigObj
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
config = ConfigObj("../config.ini")
sys.path.append(config['repo_path'])
from sst_tools import utils_sst as utils
from sst_tools import workflow_sst as workflow
# Time
start = time.time()
process = psutil.Process(os.getpid())
res = 2
if res == 1:
path_coarse = config['satellite_sst_1x1']
path_buoy = config['satellite_sst_buoy_1x1']
elif res == 2:
path_coarse = config['satellite_sst_2x2']
path_buoy = config['satellite_sst_buoy_2x2']
else:
path_coarse = None
path_buoy = None
print('Error please specify a resolution')
# Open data
sst_coarse = utils.open_data_and_crop(path_coarse, lat_s=-90, lat_n=-25)
sst_at_buoy = utils.open_data_and_crop(path_buoy, lat_s=-90, lat_n=-25)
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done reading everything: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Calculate time-stamp means, by resample the data to a different
# time resolution depending on the alias given
# Monthly means: each time stamp correspond to the last day of the month
sst_coarse_m = sst_coarse.resample(time='1M').mean()
sst_at_buoy_m = sst_at_buoy.resample(time='1M').mean()
sst_at_buoy_m_buoy_density = sst_at_buoy.buoy_count.resample(time='1M').sum()
# Yearly means: each time stamp correspond to the last month of the year
sst_coarse_y = sst_coarse.resample(time='1Y').mean()
sst_at_buoy_y = sst_at_buoy.resample(time='1Y').mean()
sst_at_buoy_y_buoy_density = sst_at_buoy.buoy_count.resample(time='1Y').sum()
# Seasonal means: every 4 months year ends in JAN.
sst_coarse_s = sst_coarse.resample(time='QS-JAN').mean()
sst_at_buoy_s = sst_at_buoy.resample(time='QS-JAN').mean()
sst_at_buoy_s_buoy_density = sst_at_buoy.buoy_count.resample(time='QS-JAN').sum()
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done resampling: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Calculate cell by cell differences of analysed_sst and time series statistics
diff_m = workflow.calculate_time_series_diff(sst_coarse_m, sst_at_buoy_m)
diff_y = workflow.calculate_time_series_diff(sst_coarse_y, sst_at_buoy_y)
diff_s = workflow.calculate_time_series_diff(sst_coarse_s, sst_at_buoy_s)
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done calculating time series diff: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Calculate S.O means of bin statistics
stats_m = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_m)
stats_y = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_y)
stats_s = workflow.calculate_spatial_avg_of_stats(sst_at_buoy_s)
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done stats: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
# Estimate total buoy count
count_m = workflow.calculate_total_buoy_density(sst_at_buoy_m_buoy_density)
count_y = workflow.calculate_total_buoy_density(sst_at_buoy_y_buoy_density)
count_s = workflow.calculate_total_buoy_density(sst_at_buoy_s_buoy_density)
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Done count: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
result_m = pd.concat([diff_m, stats_m, count_m], axis=1)
result_y = pd.concat([diff_y, stats_y, count_y], axis=1)
result_s = pd.concat([diff_s, stats_s, count_s], axis=1)
result_m.to_csv(os.path.join(config['time_series'], 'monthly_' + str(res)+'x'+str(res) + '.csv'))
result_y.to_csv(os.path.join(config['time_series'], 'yearly_' + str(res)+'x'+str(res) + '.csv'))
result_s.to_csv(os.path.join(config['time_series'], 'seasonal_' + str(res)+'x'+str(res) + '.csv'))
# Log
m, s = divmod(time.time() - start, 60)
h, m = divmod(m, 60)
print("Saved everything: %02d:%02d:%02d" % (h, m, s))
print(process.memory_info().rss)
from sst_tools.utils_sst import *
from sst_tools.plot_sst import *
from sst_tools.workflow_sst import *
......@@ -75,7 +75,6 @@ def plot_sst_on_salem_map(ds, t, lon_xinterval=0.5, lat_yinterval=0.5):
sm = ds.isel(time=t).salem.get_map(countries=True)
sm.set_data(ds.analysed_sst.isel(time=t))
sm.set_data(ds.analysed_sst.isel(time=t))
sm.set_lonlat_contours(xinterval=lon_xinterval, yinterval=lat_yinterval)
sm.set_cmap('RdBu_r')
return sm
......@@ -99,13 +98,12 @@ def plot_stats_on_salem_map(ds, var, t, lon_xinterval=0.5, lat_yinterval=0.5):
if var == 'buoy_count':
cmap = 'Purples'
elif var == 'analysed_sst_std':
cmap = 'Greens'
cmap = 'Greys'
else:
cmap = 'RdBu_r'
sm = ds.isel(time=t).salem.get_map(countries=True)
sm.set_data(ds[var].isel(time=t))
sm.set_data(ds[var].isel(time=t))
sm.set_lonlat_contours(xinterval=lon_xinterval, yinterval=lat_yinterval)
sm.set_cmap(cmap)
return sm
......@@ -4,6 +4,7 @@ import re
import logging
import pyreadr
import pandas as pd
import xarray as xr
# Module logger
log = logging.getLogger(__name__)
......@@ -99,6 +100,30 @@ def find_nc_files_specific_days(main_path, dates):
return files_sst, files_anomaly
def find_all_coarse_daily_files(main_path):
""" Finds all coarse cci .nc files for a
given resolution or re-griding method and
returns a list of paths order by date.
Parameters
----------
main_path: Folder for the re-gridded data configuration
(e.g. config['satellite_sst_1x1'] see config file)
Returns
-------
sst_paths
"""
sst_paths = []
for root, dirs, files in os.walk(main_path):
files.sort()
for file in files:
sst_paths.append(os.path.join(root, file))
return sst_paths
def find_drifter_files(main_path, months, year):
""" Finds all .Rda drifters files for a list of months and years
in the MFILES_DRIF sub directories.
......@@ -176,3 +201,25 @@ def read_rda_as_pandas(path_to_file, keep_globe=False):
return df.reset_index(drop=True)
def open_data_and_crop(path_to_data, lat_s, lat_n):
"""
Opens all netcdf files in a path and crops the data to a section
of the world only on Latitude!
Parameters
----------
path_to_data: path to several netcdf files
lat_s: latitude southern value
lat_n: latitude northern value
Returns
-------
data_cropped: dask array of the entire data set crop to a section
of the world only on latitude
"""
# Reading data set
file_paths = find_all_coarse_daily_files(path_to_data)
data_cropped = xr.open_mfdataset(file_paths).sel(lat=slice(lat_s, lat_n))
return data_cropped
......@@ -5,6 +5,7 @@ import xarray as xr
from scipy.stats import binned_statistic_2d
from datetime import datetime
from collections import defaultdict
# Module logger
log = logging.getLogger(__name__)
......@@ -290,3 +291,130 @@ def grid_buoy_data(x, y, z, resolution, data):
return buoy
def calculate_time_series_diff(sst_coarse, sst_at_buoy):
"""
Calculates spatial means of SST differences between two re-gridded
data sets of ESA-cci-sst at a given resolution.
Parameters
----------
sst_coarse: coarse cci-sst data (1 or 2 degree)
sst_at_buoy: interpolated cci-sst data to buoy coordinates
and re-gridded into 1 or 2 degree grid cells
Returns
--------
dt: data frame with time series differences of sst between the two data sets
and some general statistics of the differences in sst
"""
# Calculate sst differences between data sets cell by cell
diff_ts = sst_at_buoy.analysed_sst - sst_coarse.analysed_sst
# Calculate a spatial average of the differences for the entire data set
# also weighted by latitude grid cell area
weights = np.cos(np.deg2rad(diff_ts.lat))
weights.name = 'weights'
dt_w = diff_ts.weighted(weights).mean(('lon', 'lat')).load()
dt_w.name = 'weighted_analysed_sst_diff_mean'
dt_w = dt_w.to_dataframe()
std = diff_ts.std(dim=('lon', 'lat')).load()
std.name = 'analysed_sst_diff_std'
std = std.to_dataframe()
median = diff_ts.median(dim=('lon', 'lat')).load()
median.name = 'analysed_sst_diff_median'
median = median.to_dataframe()
dt = diff_ts.mean(('lon', 'lat')).load()
dt = dt.to_dataframe()
dt['weighted_analysed_sst_diff_mean'] = dt_w['weighted_analysed_sst_diff_mean'].copy()
dt['analysed_sst_diff_std'] = std['analysed_sst_diff_std'].copy()
dt['analysed_sst_diff_median'] = median['analysed_sst_diff_median'].copy()
return dt
# TODO: this function is very ugly!! needs re-thinking
def calculate_spatial_avg_of_stats(sst_at_buoy):
"""
Calculates spatial means of bins statistics these are statistics from the interpolated data set:
std and median of sst inside a grid cell and buoy count averages.
Parameters
----------
sst_at_buoy: interpolated cci-sst data to buoy coordinates and re-gridded
into 1 or 2 degree grid cells, this contains bin statistics (std, median of sst values inside
each grid cell and buoy count)
Returns
--------
out: data frame with time series of mean values for each bin statistics variable (std, median, buoy_count)
"""
# weighted by latitude grid cell area
weights = np.cos(np.deg2rad(sst_at_buoy.lat))
weights.name = 'weights'
std_w = sst_at_buoy.analysed_sst_std.weighted(weights).mean(('lon', 'lat')).load()
std_w.name = 'weighted_mean_analysed_sst_std'
out = std_w.to_dataframe()
median_w = sst_at_buoy.analysed_sst_median.weighted(weights).mean(('lon', 'lat')).load()
median_w.name = 'weighted_mean_analysed_sst_median'
median_w = median_w.to_dataframe()
out['weighted_mean_analysed_sst_median'] = median_w['weighted_mean_analysed_sst_median'].copy()
count_w = sst_at_buoy.buoy_count.weighted(weights).mean(('lon', 'lat')).load()
count_w.name = 'weighted_mean_buoy_count'
count_w = count_w.to_dataframe()
out['weighted_mean_buoy_count'] = count_w['weighted_mean_buoy_count'].copy()
std = sst_at_buoy.analysed_sst_std.mean(('lon', 'lat')).load()
std.name = 'mean_analysed_sst_std'
std = std.to_dataframe()
out['mean_analysed_sst_std'] = std['mean_analysed_sst_std'].copy()
median = sst_at_buoy.analysed_sst_median.mean(('lon', 'lat')).load()
median.name = 'mean_analysed_sst_median'
median = median.to_dataframe()
out['mean_analysed_sst_median'] = median['mean_analysed_sst_median'].copy()
count = sst_at_buoy.buoy_count.mean(('lon', 'lat')).load()
count.name = 'mean_buoy_count'
count = count.to_dataframe()
out['mean_buoy_count'] = count['mean_buoy_count'].copy()
return out
def calculate_total_buoy_density(buoy_count):
"""
Calculates total buoy density along lat and long and time
Parameters
----------
buoy_count: total count (weighted sum)
per grid cell of cci-sst data points interpolated to buoy coordinates
Returns
--------
out: data frame with time series of the total count of cci-sst data points interpolated to buoy
coordinates
"""
weights = np.cos(np.deg2rad(buoy_count.lat))
weights.name = 'weights'
count_w = buoy_count.weighted(weights).sum(('lon', 'lat')).load()
count_w.name = 'weighted_sum_buoy_count'
out = count_w.to_dataframe()
count = buoy_count.sum(('lon', 'lat')).load()
count.name = 'sum_buoy_count'
count = count.to_dataframe()
out['sum_buoy_count'] = count['sum_buoy_count'].copy()
return out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment