Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
ORCHESTRA-SST
Commits
6842de0c
Commit
6842de0c
authored
4 years ago
by
Beatriz Recinos
Browse files
Options
Download
Email Patches
Plain Diff
added time series script
parent
b6be34c5
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
289 additions
and
3 deletions
+289
-3
config.ini
config.ini
+1
-0
scripts/sst_differences.py
scripts/sst_differences.py
+109
-0
sst_tools/__init__.py
sst_tools/__init__.py
+3
-0
sst_tools/plot_sst.py
sst_tools/plot_sst.py
+1
-3
sst_tools/utils_sst.py
sst_tools/utils_sst.py
+47
-0
sst_tools/workflow_sst.py
sst_tools/workflow_sst.py
+128
-0
No files found.
config.ini
View file @
6842de0c
...
...
@@ -15,3 +15,4 @@ satellite_sst_buoy_1x1 = '/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_at
satellite_sst_buoy_2x2
=
'/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/v2.1_atbuoy_2x2/'
time_series
=
'/gws/nopw/j04/orchestra/ESAcci_sst_v2.1_coarse/time_series_ESAcci_sst/'
This diff is collapsed.
Click to expand it.
scripts/sst_differences.py
0 → 100755
View file @
6842de0c
# TO RUN LOCALLY
import
os
import
psutil
import
time
import
sys
from
configobj
import
ConfigObj
import
pandas
as
pd
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
config
=
ConfigObj
(
"../config.ini"
)
sys
.
path
.
append
(
config
[
'repo_path'
])
from
sst_tools
import
utils_sst
as
utils
from
sst_tools
import
workflow_sst
as
workflow
# Time
start
=
time
.
time
()
process
=
psutil
.
Process
(
os
.
getpid
())
res
=
2
if
res
==
1
:
path_coarse
=
config
[
'satellite_sst_1x1'
]
path_buoy
=
config
[
'satellite_sst_buoy_1x1'
]
elif
res
==
2
:
path_coarse
=
config
[
'satellite_sst_2x2'
]
path_buoy
=
config
[
'satellite_sst_buoy_2x2'
]
else
:
path_coarse
=
None
path_buoy
=
None
print
(
'Error please specify a resolution'
)
# Open data
sst_coarse
=
utils
.
open_data_and_crop
(
path_coarse
,
lat_s
=-
90
,
lat_n
=-
25
)
sst_at_buoy
=
utils
.
open_data_and_crop
(
path_buoy
,
lat_s
=-
90
,
lat_n
=-
25
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done reading everything: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Calculate time-stamp means, by resample the data to a different
# time resolution depending on the alias given
# Monthly means: each time stamp correspond to the last day of the month
sst_coarse_m
=
sst_coarse
.
resample
(
time
=
'1M'
).
mean
()
sst_at_buoy_m
=
sst_at_buoy
.
resample
(
time
=
'1M'
).
mean
()
sst_at_buoy_m_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'1M'
).
sum
()
# Yearly means: each time stamp correspond to the last month of the year
sst_coarse_y
=
sst_coarse
.
resample
(
time
=
'1Y'
).
mean
()
sst_at_buoy_y
=
sst_at_buoy
.
resample
(
time
=
'1Y'
).
mean
()
sst_at_buoy_y_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'1Y'
).
sum
()
# Seasonal means: every 4 months year ends in JAN.
sst_coarse_s
=
sst_coarse
.
resample
(
time
=
'QS-JAN'
).
mean
()
sst_at_buoy_s
=
sst_at_buoy
.
resample
(
time
=
'QS-JAN'
).
mean
()
sst_at_buoy_s_buoy_density
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
'QS-JAN'
).
sum
()
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done resampling: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Calculate cell by cell differences of analysed_sst and time series statistics
diff_m
=
workflow
.
calculate_time_series_diff
(
sst_coarse_m
,
sst_at_buoy_m
)
diff_y
=
workflow
.
calculate_time_series_diff
(
sst_coarse_y
,
sst_at_buoy_y
)
diff_s
=
workflow
.
calculate_time_series_diff
(
sst_coarse_s
,
sst_at_buoy_s
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done calculating time series diff: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Calculate S.O means of bin statistics
stats_m
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_m
)
stats_y
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_y
)
stats_s
=
workflow
.
calculate_spatial_avg_of_stats
(
sst_at_buoy_s
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done stats: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Estimate total buoy count
count_m
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_m_buoy_density
)
count_y
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_y_buoy_density
)
count_s
=
workflow
.
calculate_total_buoy_density
(
sst_at_buoy_s_buoy_density
)
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done count: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
result_m
=
pd
.
concat
([
diff_m
,
stats_m
,
count_m
],
axis
=
1
)
result_y
=
pd
.
concat
([
diff_y
,
stats_y
,
count_y
],
axis
=
1
)
result_s
=
pd
.
concat
([
diff_s
,
stats_s
,
count_s
],
axis
=
1
)
result_m
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'monthly_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
result_y
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'yearly_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
result_s
.
to_csv
(
os
.
path
.
join
(
config
[
'time_series'
],
'seasonal_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.csv'
))
# Log
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Saved everything: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
This diff is collapsed.
Click to expand it.
sst_tools/__init__.py
View file @
6842de0c
from
sst_tools.utils_sst
import
*
from
sst_tools.plot_sst
import
*
from
sst_tools.workflow_sst
import
*
This diff is collapsed.
Click to expand it.
sst_tools/plot_sst.py
View file @
6842de0c
...
...
@@ -75,7 +75,6 @@ def plot_sst_on_salem_map(ds, t, lon_xinterval=0.5, lat_yinterval=0.5):
sm
=
ds
.
isel
(
time
=
t
).
salem
.
get_map
(
countries
=
True
)
sm
.
set_data
(
ds
.
analysed_sst
.
isel
(
time
=
t
))
sm
.
set_data
(
ds
.
analysed_sst
.
isel
(
time
=
t
))
sm
.
set_lonlat_contours
(
xinterval
=
lon_xinterval
,
yinterval
=
lat_yinterval
)
sm
.
set_cmap
(
'RdBu_r'
)
return
sm
...
...
@@ -99,13 +98,12 @@ def plot_stats_on_salem_map(ds, var, t, lon_xinterval=0.5, lat_yinterval=0.5):
if
var
==
'buoy_count'
:
cmap
=
'Purples'
elif
var
==
'analysed_sst_std'
:
cmap
=
'Gre
en
s'
cmap
=
'Gre
y
s'
else
:
cmap
=
'RdBu_r'
sm
=
ds
.
isel
(
time
=
t
).
salem
.
get_map
(
countries
=
True
)
sm
.
set_data
(
ds
[
var
].
isel
(
time
=
t
))
sm
.
set_data
(
ds
[
var
].
isel
(
time
=
t
))
sm
.
set_lonlat_contours
(
xinterval
=
lon_xinterval
,
yinterval
=
lat_yinterval
)
sm
.
set_cmap
(
cmap
)
return
sm
This diff is collapsed.
Click to expand it.
sst_tools/utils_sst.py
View file @
6842de0c
...
...
@@ -4,6 +4,7 @@ import re
import
logging
import
pyreadr
import
pandas
as
pd
import
xarray
as
xr
# Module logger
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -99,6 +100,30 @@ def find_nc_files_specific_days(main_path, dates):
return
files_sst
,
files_anomaly
def
find_all_coarse_daily_files
(
main_path
):
""" Finds all coarse cci .nc files for a
given resolution or re-griding method and
returns a list of paths order by date.
Parameters
----------
main_path: Folder for the re-gridded data configuration
(e.g. config['satellite_sst_1x1'] see config file)
Returns
-------
sst_paths
"""
sst_paths
=
[]
for
root
,
dirs
,
files
in
os
.
walk
(
main_path
):
files
.
sort
()
for
file
in
files
:
sst_paths
.
append
(
os
.
path
.
join
(
root
,
file
))
return
sst_paths
def
find_drifter_files
(
main_path
,
months
,
year
):
""" Finds all .Rda drifters files for a list of months and years
in the MFILES_DRIF sub directories.
...
...
@@ -176,3 +201,25 @@ def read_rda_as_pandas(path_to_file, keep_globe=False):
return
df
.
reset_index
(
drop
=
True
)
def
open_data_and_crop
(
path_to_data
,
lat_s
,
lat_n
):
"""
Opens all netcdf files in a path and crops the data to a section
of the world only on Latitude!
Parameters
----------
path_to_data: path to several netcdf files
lat_s: latitude southern value
lat_n: latitude northern value
Returns
-------
data_cropped: dask array of the entire data set crop to a section
of the world only on latitude
"""
# Reading data set
file_paths
=
find_all_coarse_daily_files
(
path_to_data
)
data_cropped
=
xr
.
open_mfdataset
(
file_paths
).
sel
(
lat
=
slice
(
lat_s
,
lat_n
))
return
data_cropped
This diff is collapsed.
Click to expand it.
sst_tools/workflow_sst.py
View file @
6842de0c
...
...
@@ -5,6 +5,7 @@ import xarray as xr
from
scipy.stats
import
binned_statistic_2d
from
datetime
import
datetime
from
collections
import
defaultdict
# Module logger
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -290,3 +291,130 @@ def grid_buoy_data(x, y, z, resolution, data):
return
buoy
def
calculate_time_series_diff
(
sst_coarse
,
sst_at_buoy
):
"""
Calculates spatial means of SST differences between two re-gridded
data sets of ESA-cci-sst at a given resolution.
Parameters
----------
sst_coarse: coarse cci-sst data (1 or 2 degree)
sst_at_buoy: interpolated cci-sst data to buoy coordinates
and re-gridded into 1 or 2 degree grid cells
Returns
--------
dt: data frame with time series differences of sst between the two data sets
and some general statistics of the differences in sst
"""
# Calculate sst differences between data sets cell by cell
diff_ts
=
sst_at_buoy
.
analysed_sst
-
sst_coarse
.
analysed_sst
# Calculate a spatial average of the differences for the entire data set
# also weighted by latitude grid cell area
weights
=
np
.
cos
(
np
.
deg2rad
(
diff_ts
.
lat
))
weights
.
name
=
'weights'
dt_w
=
diff_ts
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
dt_w
.
name
=
'weighted_analysed_sst_diff_mean'
dt_w
=
dt_w
.
to_dataframe
()
std
=
diff_ts
.
std
(
dim
=
(
'lon'
,
'lat'
)).
load
()
std
.
name
=
'analysed_sst_diff_std'
std
=
std
.
to_dataframe
()
median
=
diff_ts
.
median
(
dim
=
(
'lon'
,
'lat'
)).
load
()
median
.
name
=
'analysed_sst_diff_median'
median
=
median
.
to_dataframe
()
dt
=
diff_ts
.
mean
((
'lon'
,
'lat'
)).
load
()
dt
=
dt
.
to_dataframe
()
dt
[
'weighted_analysed_sst_diff_mean'
]
=
dt_w
[
'weighted_analysed_sst_diff_mean'
].
copy
()
dt
[
'analysed_sst_diff_std'
]
=
std
[
'analysed_sst_diff_std'
].
copy
()
dt
[
'analysed_sst_diff_median'
]
=
median
[
'analysed_sst_diff_median'
].
copy
()
return
dt
# TODO: this function is very ugly!! needs re-thinking
def
calculate_spatial_avg_of_stats
(
sst_at_buoy
):
"""
Calculates spatial means of bins statistics these are statistics from the interpolated data set:
std and median of sst inside a grid cell and buoy count averages.
Parameters
----------
sst_at_buoy: interpolated cci-sst data to buoy coordinates and re-gridded
into 1 or 2 degree grid cells, this contains bin statistics (std, median of sst values inside
each grid cell and buoy count)
Returns
--------
out: data frame with time series of mean values for each bin statistics variable (std, median, buoy_count)
"""
# weighted by latitude grid cell area
weights
=
np
.
cos
(
np
.
deg2rad
(
sst_at_buoy
.
lat
))
weights
.
name
=
'weights'
std_w
=
sst_at_buoy
.
analysed_sst_std
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
std_w
.
name
=
'weighted_mean_analysed_sst_std'
out
=
std_w
.
to_dataframe
()
median_w
=
sst_at_buoy
.
analysed_sst_median
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
median_w
.
name
=
'weighted_mean_analysed_sst_median'
median_w
=
median_w
.
to_dataframe
()
out
[
'weighted_mean_analysed_sst_median'
]
=
median_w
[
'weighted_mean_analysed_sst_median'
].
copy
()
count_w
=
sst_at_buoy
.
buoy_count
.
weighted
(
weights
).
mean
((
'lon'
,
'lat'
)).
load
()
count_w
.
name
=
'weighted_mean_buoy_count'
count_w
=
count_w
.
to_dataframe
()
out
[
'weighted_mean_buoy_count'
]
=
count_w
[
'weighted_mean_buoy_count'
].
copy
()
std
=
sst_at_buoy
.
analysed_sst_std
.
mean
((
'lon'
,
'lat'
)).
load
()
std
.
name
=
'mean_analysed_sst_std'
std
=
std
.
to_dataframe
()
out
[
'mean_analysed_sst_std'
]
=
std
[
'mean_analysed_sst_std'
].
copy
()
median
=
sst_at_buoy
.
analysed_sst_median
.
mean
((
'lon'
,
'lat'
)).
load
()
median
.
name
=
'mean_analysed_sst_median'
median
=
median
.
to_dataframe
()
out
[
'mean_analysed_sst_median'
]
=
median
[
'mean_analysed_sst_median'
].
copy
()
count
=
sst_at_buoy
.
buoy_count
.
mean
((
'lon'
,
'lat'
)).
load
()
count
.
name
=
'mean_buoy_count'
count
=
count
.
to_dataframe
()
out
[
'mean_buoy_count'
]
=
count
[
'mean_buoy_count'
].
copy
()
return
out
def
calculate_total_buoy_density
(
buoy_count
):
"""
Calculates total buoy density along lat and long and time
Parameters
----------
buoy_count: total count (weighted sum)
per grid cell of cci-sst data points interpolated to buoy coordinates
Returns
--------
out: data frame with time series of the total count of cci-sst data points interpolated to buoy
coordinates
"""
weights
=
np
.
cos
(
np
.
deg2rad
(
buoy_count
.
lat
))
weights
.
name
=
'weights'
count_w
=
buoy_count
.
weighted
(
weights
).
sum
((
'lon'
,
'lat'
)).
load
()
count_w
.
name
=
'weighted_sum_buoy_count'
out
=
count_w
.
to_dataframe
()
count
=
buoy_count
.
sum
((
'lon'
,
'lat'
)).
load
()
count
.
name
=
'sum_buoy_count'
count
=
count
.
to_dataframe
()
out
[
'sum_buoy_count'
]
=
count
[
'sum_buoy_count'
].
copy
()
return
out
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment