Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
ORCHESTRA-SST
Commits
25291389
Commit
25291389
authored
4 years ago
by
Beatriz Recinos
Browse files
Options
Download
Email Patches
Plain Diff
changes to time series avg
parent
6842de0c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
868 additions
and
68 deletions
+868
-68
notebooks/computing_time_series.ipynb
notebooks/computing_time_series.ipynb
+730
-0
scripts/sst_differences.py
scripts/sst_differences.py
+60
-28
sst_tools/workflow_sst.py
sst_tools/workflow_sst.py
+78
-40
No files found.
notebooks/computing_time_series.ipynb
0 → 100644
View file @
25291389
This source diff could not be displayed because it is too large. You can
view the blob
instead.
This diff is collapsed.
Click to expand it.
scripts/sst_differences.py
View file @
25291389
...
...
@@ -5,6 +5,8 @@ import time
import
sys
from
configobj
import
ConfigObj
import
pandas
as
pd
import
numpy
as
np
import
xarray
as
xr
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
...
...
@@ -13,12 +15,16 @@ sys.path.append(config['repo_path'])
from
sst_tools
import
utils_sst
as
utils
from
sst_tools
import
workflow_sst
as
workflow
from
sst_tools.workflow_sst
import
non_zero_data
from
sst_tools.workflow_sst
import
sum_buoy_density
# Time
start
=
time
.
time
()
process
=
psutil
.
Process
(
os
.
getpid
())
res
=
2
# Input specify resolution in space and time
res
=
1
alias
=
'1M'
if
res
==
1
:
path_coarse
=
config
[
'satellite_sst_1x1'
]
...
...
@@ -36,6 +42,18 @@ else:
sst_coarse
=
utils
.
open_data_and_crop
(
path_coarse
,
lat_s
=-
90
,
lat_n
=-
25
)
sst_at_buoy
=
utils
.
open_data_and_crop
(
path_buoy
,
lat_s
=-
90
,
lat_n
=-
25
)
# Adding sst standard error to the sst_at_buoy data set
sst_at_buoy
[
'analysed_sst_se'
]
=
sst_at_buoy
.
analysed_sst_std
/
np
.
sqrt
(
sst_at_buoy
.
buoy_count
)
# Count how many grid-cells we have with data on the sst_at_buoy data set
grid_boxes_with_data
=
xr
.
apply_ufunc
(
non_zero_data
,
sst_at_buoy
.
analysed_sst
,
input_core_dims
=
[[
"lat"
,
"lon"
]],
dask
=
'allowed'
,
vectorize
=
True
)
grid_boxes_with_data
.
name
=
'No_of_grid_cells_with_data'
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done reading everything: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
...
...
@@ -45,29 +63,47 @@ print(process.memory_info().rss)
# time resolution depending on the alias given
# Monthly means: each time stamp correspond to the last day of the month
sst_coarse_m
=
sst_coarse
.
resample
(
time
=
'1M'
).
mean
()
sst_at_buoy_m
=
sst_at_buoy
.
resample
(
time
=
'1M'
).
mean
()
sst_at_buoy_m_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'1M'
).
sum
()
sst_coarse_re
=
sst_coarse
.
resample
(
time
=
alias
).
mean
()
# For sst_at_buoy we need to calculate weighted time moving averages
# We weight each variable by the number of buoy points (buoy_density) on each grid cell
sst_at_buoy_sst
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst'
)
sst_at_buoy_std
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_std
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_std'
)
sst_at_buoy_median
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_median
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_median'
)
# Yearly means: each time stamp correspond to the last month of the year
sst_coarse_y
=
sst_coarse
.
resample
(
time
=
'1Y'
).
mean
()
sst_at_buoy_y
=
sst_at_buoy
.
resample
(
time
=
'1Y'
).
mean
()
sst_at_buoy_y_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'1Y'
).
sum
(
)
sst_at_buoy_se
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_se
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_se'
)
# Seasonal means: every 4 months year ends in JAN.
sst_coarse_s
=
sst_coarse
.
resample
(
time
=
'QS-JAN'
).
mean
()
sst_at_buoy_s
=
sst_at_buoy
.
resample
(
time
=
'QS-JAN'
).
mean
()
sst_at_buoy_s_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'QS-JAN'
).
sum
()
# Last variable makes no sense to weight it by itself!
sst_at_buoy_buoy_count_mean
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
alias
).
mean
()
# We also calculate the total sum of buoy points for each month
sst_at_buoy_buoy_count_sum
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
alias
).
sum
()
# The number of gridboxes with data for every month.
grid_boxes_with_data_re
=
grid_boxes_with_data
.
resample
(
time
=
alias
).
sum
()
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done resampling: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Calculate cell by cell differences of analysed_sst and time series statistics
diff_m
=
workflow
.
calculate_time_series_diff
(
sst_coarse_m
,
sst_at_buoy_m
)
diff_y
=
workflow
.
calculate_time_series_diff
(
sst_coarse_y
,
sst_at_buoy_y
)
diff_s
=
workflow
.
calculate_time_series_diff
(
sst_coarse_s
,
sst_at_buoy_s
)
diff_mean
=
workflow
.
calculate_time_series_diff
(
sst_coarse_re
,
sst_at_buoy_sst
,
grid_boxes_with_data_re
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
...
...
@@ -75,9 +111,12 @@ print("Done calculating time series diff: %02d:%02d:%02d" % (h, m, s))
print
(
process
.
memory_info
().
rss
)
# Calculate S.O means of bin statistics
stats_m
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_m
)
stats_y
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_y
)
stats_s
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_s
)
bin_std_mean
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_std
,
var_name
=
'analysed_sst_std'
)
bin_median_mean
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_median
,
var_name
=
'analysed_sst_median'
)
bin_se_mean
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_se
,
var_name
=
'analysed_sst_se'
)
bin_buoy_count_mean
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_buoy_count_mean
,
var_name
=
'buoy_count'
)
stats_mean
=
pd
.
concat
([
bin_std_mean
,
bin_median_mean
,
bin_se_mean
,
bin_buoy_count_mean
],
axis
=
1
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
...
...
@@ -85,22 +124,15 @@ print("Done stats: %02d:%02d:%02d" % (h, m, s))
print
(
process
.
memory_info
().
rss
)
# Estimate total buoy count
count_m
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_m_buoy_density
)
count_y
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_y_buoy_density
)
count_s
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_s_buoy_density
)
count_sum
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_buoy_count_sum
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done count: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
result_m
=
pd
.
concat
([
diff_m
,
stats_m
,
count_m
],
axis
=
1
)
result_y
=
pd
.
concat
([
diff_y
,
stats_y
,
count_y
],
axis
=
1
)
result_s
=
pd
.
concat
([
diff_s
,
stats_s
,
count_s
],
axis
=
1
)
result_m
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'monthly_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
result_y
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'yearly_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
result_s
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'seasonal_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
result
=
pd
.
concat
([
diff_mean
,
stats_mean
,
count_sum
],
axis
=
1
)
result
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
alias
+
'_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
# Log
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
...
...
This diff is collapsed.
Click to expand it.
sst_tools/workflow_sst.py
View file @
25291389
...
...
@@ -292,25 +292,31 @@ def grid_buoy_data(x, y, z, resolution, data):
return
buoy
def
calculate_time_series_diff
(
sst_coarse
,
sst_at_buoy
):
def
calculate_time_series_diff
(
sst_coarse
,
sst_at_buoy
,
grid_boxes_with_data
):
"""
Calculates spatial means of SST differences between two re-gridded
data sets of ESA-cci-sst at a given resolution.
Parameters
----------
sst_coarse: coarse cci-sst data (1 or 2 degree)
sst_coarse: coarse cci-sst data (1 or 2 degree)
resampled to a time stamp
sst_at_buoy: interpolated cci-sst data to buoy coordinates
and re-gridded into 1 or 2 degree grid cells
and re-gridded (into 1 or 2 degree) resampled to a time stamp
grid_boxes_with_data: Number of grid boxes with data resampled to a time stamp
Returns
--------
dt: data frame with time series differences of sst between the two data sets
and some general statistics of the differences in sst
dt: data frame with time series of mean sst differences between the two data sets (buoy - coarse)
across the southern ocean and some general statistics.
dt variables:
- mean of sst diff (weighted by the grid cell area and unweighted)
- std of the sst diff
- median value of sst diff
- standard error of the sst diff: std / sqrt(Total No. of grid boxes with data for each time stamp)
"""
# Calculate sst differences between data sets cell by cell
diff_ts
=
sst_at_buoy
.
analysed_sst
-
sst_coarse
.
analysed_sst
diff_ts
=
sst_at_buoy
-
sst_coarse
.
analysed_sst
# Calculate a spatial average of the differences for the entire data set
# also weighted by latitude grid cell area
...
...
@@ -332,61 +338,45 @@ def calculate_time_series_diff(sst_coarse, sst_at_buoy):
dt
=
diff_ts
.
mean
((
'lon'
,
'lat'
)).
load
()
dt
=
dt
.
to_dataframe
()
se
=
diff_ts
.
std
(
dim
=
(
'lon'
,
'lat'
)).
load
()
/
np
.
sqrt
(
grid_boxes_with_data
.
load
())
se
.
name
=
'analysed_sst_diff_se'
se
=
se
.
to_dataframe
()
dt
[
'weighted_analysed_sst_diff_mean'
]
=
dt_w
[
'weighted_analysed_sst_diff_mean'
].
copy
()
dt
[
'analysed_sst_diff_std'
]
=
std
[
'analysed_sst_diff_std'
].
copy
()
dt
[
'analysed_sst_diff_median'
]
=
median
[
'analysed_sst_diff_median'
].
copy
()
dt
[
'analysed_sst_diff_se'
]
=
se
[
'analysed_sst_diff_se'
].
copy
()
return
dt
# TODO: this function is very ugly!! needs re-thinking
def
calculate_spatial_avg_of_stats
(
sst_at_buoy
):
def
calculate_spatial_avg_of_stats
(
sst_at_buoy_var
,
var_name
):
"""
Calculates spatial means of bins statistics these are statistics from the interpolated data set:
std and median of sst inside a grid cell and buoy count averages.
Parameters
----------
sst_at_buoy
: interpolated cci-sst data to buoy coordinates and re-gridded
into 1 or 2 degree grid cells, this contains bin statistics (std, median of
sst values inside
each grid cell and buoy count)
sst_at_buoy
_var: sst_at_buoy bin statistics resampled in a time stamp
(std, median and se of binned
sst values inside
each grid cell)
var_name: variable name
Returns
--------
out: data frame with time series of mean values for each bin statistics variable (std, median, buoy_count)
out: data frame with time series of mean values for each bin statistics variable (std, median,
se,
buoy_count)
"""
# weighted by latitude grid cell area
weights
=
np
.
cos
(
np
.
deg2rad
(
sst_at_buoy
.
lat
))
weights
=
np
.
cos
(
np
.
deg2rad
(
sst_at_buoy
_var
.
lat
))
weights
.
name
=
'weights'
std_w
=
sst_at_buoy
.
analysed_sst_std
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
std_w
.
name
=
'weighted_mean_analysed_sst_std'
out
=
std_w
.
to_dataframe
()
median_w
=
sst_at_buoy
.
analysed_sst_median
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
median_w
.
name
=
'weighted_mean_analysed_sst_median'
median_w
=
median_w
.
to_dataframe
()
out
[
'weighted_mean_analysed_sst_median'
]
=
median_w
[
'weighted_mean_analysed_sst_median'
].
copy
()
count_w
=
sst_at_buoy
.
buoy_count
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
count_w
.
name
=
'weighted_mean_buoy_count'
count_w
=
count_w
.
to_dataframe
()
out
[
'weighted_mean_buoy_count'
]
=
count_w
[
'weighted_mean_buoy_count'
].
copy
()
std
=
sst_at_buoy
.
analysed_sst_std
.
mean
((
'lon'
,
'lat'
)).
load
()
std
.
name
=
'mean_analysed_sst_std'
std
=
std
.
to_dataframe
()
out
[
'mean_analysed_sst_std'
]
=
std
[
'mean_analysed_sst_std'
].
copy
()
median
=
sst_at_buoy
.
analysed_sst_median
.
mean
((
'lon'
,
'lat'
)).
load
()
median
.
name
=
'mean_analysed_sst_median'
median
=
median
.
to_dataframe
()
out
[
'mean_analysed_sst_median'
]
=
median
[
'mean_analysed_sst_median'
].
copy
()
var_w
=
sst_at_buoy_var
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
var_w
.
name
=
var_name
+
'_weighted'
out
=
var_w
.
to_dataframe
()
count
=
sst_at_buoy
.
buoy_count
.
mean
((
'lon'
,
'lat'
)).
load
()
count
.
name
=
'mean_buoy_count
'
count
=
count
.
to_dataframe
()
out
[
'mean_buoy_count'
]
=
count
[
'mean_buoy_count
'
].
copy
()
var_m
=
sst_at_buoy
_var
.
mean
((
'lon'
,
'lat'
)).
load
()
var_m
.
name
=
var_name
+
'_unweighted
'
var_m
=
var_m
.
to_dataframe
()
out
[
var_name
+
'_unweighted'
]
=
var_m
[
var_name
+
'_unweighted
'
].
copy
()
return
out
...
...
@@ -418,3 +408,51 @@ def calculate_total_buoy_density(buoy_count):
out
[
'sum_buoy_count'
]
=
count
[
'sum_buoy_count'
].
copy
()
return
out
def
non_zero_data
(
array
):
co
=
np
.
count_nonzero
(
~
np
.
isnan
(
array
))
return
co
def
sum_buoy_density
(
array
):
rho
=
array
.
sum
()
return
rho
def
calculate_time_moving_weighted_average
(
var_to_avg
,
weights
,
time_stamp
,
var_name
):
"""
Calculates time moving weighted average for an xarray variable
Parameters
__________
var_to_avg: xarray.Variable to average
weights: xarray.Variable for weights
time_stamp: alias to resample the data: e.g. 1M for monthly average
var_name: named variable after calculating the weighted avg
Returns
_______
weighted_avg: xarray.Variable averaged
"""
# Calculates the sum of all weights on each time stamp
weights_sum
=
weights
.
resample
(
time
=
time_stamp
).
sum
()
# Multiply its variable for its own weight
multp
=
var_to_avg
*
weights
# Sum the multiply values on each time stamp
multp_sum
=
multp
.
resample
(
time
=
time_stamp
).
sum
()
# Weighted avg
weighted_avg
=
multp_sum
/
weights_sum
weighted_avg
.
name
=
var_name
return
weighted_avg
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment