Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
brivas
ORCHESTRA-SST
Commits
40a734f3
Commit
40a734f3
authored
4 years ago
by
Beatriz Recinos
Browse files
Options
Download
Email Patches
Plain Diff
new utils func and write to netcdf script
parent
73a15178
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
235 additions
and
2 deletions
+235
-2
scripts/combined_data_to_netcdf.py
scripts/combined_data_to_netcdf.py
+184
-0
scripts/plot_cci_buoy_error.py
scripts/plot_cci_buoy_error.py
+2
-2
sst_tools/utils_sst.py
sst_tools/utils_sst.py
+49
-0
No files found.
scripts/combined_data_to_netcdf.py
0 → 100755
View file @
40a734f3
# TO RUN LOCALLY
import
os
import
psutil
import
time
import
sys
from
configobj
import
ConfigObj
import
pandas
as
pd
import
numpy
as
np
import
xarray
as
xr
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
config
=
ConfigObj
(
"../config.ini"
)
sys
.
path
.
append
(
config
[
'repo_path'
])
from
sst_tools
import
utils_sst
as
utils
from
sst_tools
import
workflow_sst
as
workflow
from
sst_tools.workflow_sst
import
non_zero_data
from
sst_tools.workflow_sst
import
sum_buoy_density
# Time
start
=
time
.
time
()
process
=
psutil
.
Process
(
os
.
getpid
())
# Input specify resolution in space and time
res
=
2
#alias = 'QS-JAN'
alias
=
'1M'
#alias = '1Y'
if
res
==
1
:
path_coarse
=
config
[
'satellite_sst_1x1'
]
path_buoy
=
config
[
'satellite_sst_buoy_1x1'
]
elif
res
==
2
:
path_coarse
=
config
[
'satellite_sst_2x2'
]
path_buoy
=
config
[
'satellite_sst_buoy_2x2'
]
else
:
path_coarse
=
None
path_buoy
=
None
print
(
'Error please specify a resolution'
)
# Open data
sst_coarse
=
utils
.
open_data_and_crop
(
path_coarse
,
lat_s
=-
90
,
lat_n
=-
25
)
sst_at_buoy
=
utils
.
open_data_and_crop
(
path_buoy
,
lat_s
=-
90
,
lat_n
=-
25
)
# Adding sst standard error to the sst_at_buoy data set
sst_at_buoy
[
'analysed_sst_se'
]
=
sst_at_buoy
.
analysed_sst_std
/
np
.
sqrt
(
sst_at_buoy
.
buoy_count
)
# Count how many grid-cells we have with data on the sst_at_buoy data set
grid_boxes_with_data
=
xr
.
apply_ufunc
(
non_zero_data
,
sst_at_buoy
.
analysed_sst
,
input_core_dims
=
[[
"lat"
,
"lon"
]],
dask
=
'allowed'
,
vectorize
=
True
)
grid_boxes_with_data
.
name
=
'No_of_grid_cells_with_data'
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done reading everything: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Calculate time-stamp means, by resample the data to a different
# time resolution depending on the alias given
# Monthly means: each time stamp correspond to the last day of the month
sst_coarse_re
=
sst_coarse
.
resample
(
time
=
alias
).
mean
()
# For sst_at_buoy we need to calculate weighted time moving averages
# We weight each variable by the number of buoy points (buoy_density) on each grid cell
sst_at_buoy_sst
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst'
)
sst_at_buoy_std
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_std
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_std'
)
sst_at_buoy_median
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_median
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_median'
)
sst_at_buoy_se
=
workflow
.
calculate_time_moving_weighted_average
(
sst_at_buoy
.
analysed_sst_se
,
sst_at_buoy
.
buoy_count
,
time_stamp
=
alias
,
var_name
=
'analysed_sst_se'
)
# Last variable makes no sense to weight it by itself!
sst_at_buoy_buoy_count_mean
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
alias
).
mean
()
# We also calculate the total sum of buoy points for each month
sst_at_buoy_buoy_count_sum
=
sst_at_buoy
.
buoy_count
.
resample
(
time
=
alias
).
sum
()
# The number of gridboxes with data for every month.
grid_boxes_with_data_re
=
grid_boxes_with_data
.
resample
(
time
=
alias
).
sum
()
m
,
s
=
divmod
(
time
.
time
()
-
start
,
60
)
h
,
m
=
divmod
(
m
,
60
)
print
(
"Done resampling: %02d:%02d:%02d"
%
(
h
,
m
,
s
))
print
(
process
.
memory_info
().
rss
)
# Getting coordinates and dimensions for the data set
yi
=
sst_at_buoy
.
lat
.
values
xi
=
sst_at_buoy
.
lon
.
values
time
=
sst_at_buoy_sst
.
time
.
values
# Ready to build the xarray.dataframe
combined
=
xr
.
Dataset
(
coords
=
{
'lat'
:
([
'lat'
],
yi
),
'lon'
:
([
'lon'
],
xi
),
'time'
:
time
})
dict_sst_buoy
=
{
'long_name'
:
'analysed sea surface temperature at buoy'
,
'standard_name'
:
'sea_water_temperature'
,
'units'
:
'kelvin'
}
dict_std
=
{
'long_name'
:
'std analysed sea surface temperature at buoy'
,
'standard_name'
:
'standard deviation'
,
'units'
:
'kelvin'
}
dict_median
=
{
'long_name'
:
'median analysed sea surface temperature at buoy'
,
'standard_name'
:
'median'
,
'units'
:
'kelvin'
}
dict_se
=
{
'long_name'
:
'se analysed sea surface temperature at buoy'
,
'standard_name'
:
'standard error'
,
'units'
:
'kelvin'
}
dict_coarse
=
{
'long_name'
:
'analysed sea surface temperature coarse'
,
'standard_name'
:
'sea_water_temperature'
,
'units'
:
'kelvin'
}
dict_buoy_mean
=
{
'long_name'
:
'mean_buoy_count_per_grid_cell'
,
'standard_name'
:
'buoy_count'
,
'units'
:
'No of buoys on a grid cell'
}
dict_buoy_sum
=
{
'long_name'
:
'sum_buoy_count_per_grid_cell'
,
'standard_name'
:
'buoy_count_total'
,
'units'
:
'No of buoys on a grid cell'
}
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_sst
,
'analysed_sst_at_buoy'
,
dict_sst_buoy
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_std
,
'analysed_sst_at_buoy_std'
,
dict_std
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_median
,
'analysed_sst_at_buoy_median'
,
dict_median
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_se
,
'analysed_sst_at_buoy_se'
,
dict_se
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_coarse_re
.
analysed_sst
,
'analysed_sst_coarse'
,
dict_coarse
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_buoy_count_mean
,
'buoy_count_mean'
,
dict_buoy_mean
)
combined
=
utils
.
write_variable_to_xr_dataset
(
combined
,
sst_at_buoy_buoy_count_sum
,
'buoy_count_sum'
,
dict_buoy_sum
)
combined
=
utils
.
add_global_attrs
(
combined
,
res
)
combined
.
to_netcdf
(
os
.
path
.
join
(
config
[
'combined_data'
],
alias
+
'_'
+
str
(
res
)
+
'x'
+
str
(
res
)
+
'.nc'
))
print
(
'Done saving to netcdf'
)
This diff is collapsed.
Click to expand it.
scripts/plot_cci_buoy_error.py
View file @
40a734f3
...
...
@@ -37,8 +37,8 @@ start_day = int(sys.argv[3])
month
=
int
(
sys
.
argv
[
4
])
day_to_plot
=
int
(
sys
.
argv
[
5
])
# From SLURM ARRAY
lat_bnds
=
[
-
5
0
,
-
25
]
lon_bnds
=
[
10
0
,
1
8
0
]
lat_bnds
=
[
-
9
0
,
-
25
]
lon_bnds
=
[
-
15
0
,
1
5
0
]
extent
=
np
.
append
(
lon_bnds
,
lat_bnds
)
print
(
extent
)
...
...
This diff is collapsed.
Click to expand it.
sst_tools/utils_sst.py
View file @
40a734f3
...
...
@@ -223,3 +223,52 @@ def open_data_and_crop(path_to_data, lat_s, lat_n):
data_cropped
=
xr
.
open_mfdataset
(
file_paths
).
sel
(
lat
=
slice
(
lat_s
,
lat_n
))
return
data_cropped
def
write_variable_to_xr_dataset
(
data_set
,
var
,
var_name
,
dict_attrs
):
"""
Write variable to a netcdf file
Parameters
----------
data_set: xr.Dataset to write the variables to.
var: xr.dask.array to write to the xr.Dataset
var_name: Name of variable to save
dict_attrs: A dictionary with attributes
Returns
-------
xr_dataset: xr.Dataset with the variables written
"""
data_set
[
var_name
]
=
([
'time'
,
'lat'
,
'lon'
],
var
.
load
())
data_set
[
var_name
].
attrs
=
dict_attrs
return
data_set
def
add_global_attrs
(
data_set
,
res
):
"""
Write global attrs to a netcdf file
Parameters
----------
data_set: xr.Dataset to write the attributes to.
res: resolution
Returns
-------
xr_dataset: xr.Dataset with global attributes written
"""
# Global netcdf attributes
dic
=
{
'title'
:
'ESA SST CCI OSTIA L4 product re-sampled'
,
'gds_version_id'
:
'2.0'
,
'spatial_resolution'
:
str
(
res
)
+
' degree'
,
'geospatial_lat_resolution'
:
res
,
'geospatial_lat_units'
:
'degrees_north'
,
'geospatial_lon_resolution'
:
res
,
'geospatial_lon_units'
:
'degrees_east'
,
'creator_name'
:
'Beatriz Recinos (NOC)'
}
data_set
.
attrs
=
dic
return
data_set
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment