Commit cb4791d3 authored by Richard Cornes's avatar Richard Cornes
Browse files

Removed hard-coded paths from sripts and now uses config file

parent e9c3d20f
#!/usr/bin/env Rscript
# code written by Elizabeth Kent, National Oceanography Centre
# code written for the NERC GloSAT project
# Description:
## IMPORTANT: all paths to all data are defined in config.yml
## input_data:
# 1. arguments (e.g. 1990)
# 2. output_data/HOOVER_RUN_ZERO
# 3. output_data/DISJ/stat
## code written by Elizabeth Kent, National Oceanography Centre
## code written for the NERC GloSAT project
## my_args[1] is start year
## my_args[2] is config file
library(icoads.utils)
## Description:
#### IMPORTANT: all paths to all data are defined in config.yml
#### input_data:
## 1. arguments (e.g. 1990)
## 2. output_data/HOOVER_RUN_ZERO
## 3. output_data/DISJ/stat
Sys.setenv(TZ='GMT')
options(warnings=1)
exit <- function() { invokeRestart("abort") }
my_args = commandArgs(trailingOnly=TRUE)
if(length(my_args)== 0) {my_args<-my_args2}
if(length(my_args)!= 1) {stop("wrong number of input my_args or my_args2 not set")}
require(icoads.utils)
config<-config::get(file = "config.yml")
print.comm <- TRUE
exit <- function() { invokeRestart("abort") }
my_args <- commandArgs(trailingOnly=TRUE)
if(length(my_args)== 0) {my_args <- my_args2}
if(length(my_args)!= 2) {stop("wrong number of input my_args or my_args2 not set")}
# my_args[1] is start year
config <- config::get(file = my_args[2])
print.comm <- TRUE
iyr<-my_args[1]
iyr <- my_args[1]
#########################################################################################
##################################################################################################################################################################################
cat('--------------------------------------------- \n')
cat('running diurnal checks for',iyr,'\n')
cat('--------------------------------------------- \n')
dir.out <- paste0(config$output_dir,"DIURNAL_FAIL_IDS/")
if (!dir.exists(dir.out) ) dir.create(dir.out)
dir.in <- paste0(config$output_dir,"FINAL_PROC/")
df<-icoads.utils::read_trackmonth(dir.in,syr=iyr)
if(!is.null(df)) {
YRMO <- split(df, data.frame(df$yr, df$mo), drop=TRUE)
if(length(YRMO) > 0 ) {
names(YRMO) <- gsub("\\.","_", names(YRMO))
# empty list for output
sp.op<-vector(mode = "list", length = length(YRMO))
names(sp.op)<-names(YRMO)
for ( imo in 1:length(YRMO) ) {
#cat(imo,length(sp.op),"\n")
data<-YRMO[[imo]]
sp<-split(data,data$final.id)
sp<-sp[sapply(sp,nrow)>=10]
source("~eck/bad_daynightcheck.R")
op<-lapply(sp,function(X) bad_daynight_check(X$date,X$yr,X$lon,X$lat,X$dck,X$at))
op<-op[sapply(op,function(X) X==1)]
if(is.null(op)) op<-""
#print(names(unlist(op)))
sp.op[[imo]]<-names(unlist(op))
}
filenames<-paste0(dir.out,names(sp.op),".Rda")
jj<-mapply(saveRDS, sp.op, file = filenames)
nn<-names(table(unlist(sp.op)))
df$baddn<-ifelse(df$final.id %in% nn,1,0)
cat('--------------------------------------------- \n')
cat("Diurnal check output:",iyr,"1=fail","\n")
cat('--------------------------------------------- \n')
print(table(df$dck,df$baddn))
cat('--------------------------------------------- \n')
}
} # switch for no input data (df = NULL)
##dir.out <- paste0(config$output_dir,"DIURNAL_FAIL_IDS/")
dir.out <- config$dir_flags_dir
if (!dir.exists(dir.out) ) dir.create(dir.out)
##dir.in <- paste0(config$output_dir,"FINAL_PROC/")
dir.in <- config$data_dir
df <- icoads.utils::read_trackmonth(dir.in,syr=iyr)
if(!is.null(df)) {
YRMO <- split(df, data.frame(df$yr, df$mo), drop=TRUE)
if(length(YRMO) > 0 ) {
names(YRMO) <- gsub("\\.","_", names(YRMO))
## empty list for output
sp.op <- vector(mode = "list", length = length(YRMO))
names(sp.op) <- names(YRMO)
for ( imo in 1:length(YRMO) ) {
##cat(imo,length(sp.op),"\n")
data <- YRMO[[imo]]
sp <- split(data,data$final.id)
sp <- sp[sapply(sp,nrow)>=10]
op <- lapply(sp,function(X) bad_daynight_check(X$date,X$yr,X$lon,X$lat,X$dck,X$at))
op <- op[sapply(op,function(X) X==1)]
if(is.null(op)) op <- ""
##print(names(unlist(op)))
sp.op[[imo]] <- names(unlist(op))
}
filenames <- paste0(dir.out,names(sp.op),".Rda")
jj <- mapply(saveRDS, sp.op, file = filenames)
nn <- names(table(unlist(sp.op)))
df$baddn <- ifelse(df$final.id %in% nn,1,0)
cat('--------------------------------------------- \n')
cat("Diurnal check output:",iyr,"1=fail","\n")
cat('--------------------------------------------- \n')
print(table(df$dck,df$baddn))
cat('--------------------------------------------- \n')
}
} ## switch for no input data (df = NULL)
#!/usr/bin/env Rscript
my_args = commandArgs(trailingOnly=TRUE)
##!/usr/bin/env Rscript
#code_dir="/gws/nopw/j04/glosat/NMAT/ECK/HEIGHT_ENS/"
code_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/"
#data_dir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/FINAL_PROC"
data_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/FINAL_PROC"
#pub47_dir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/HOSTACE_PROC_ANC_INFO/PUB47byMONTH"
#out_dir="./ENS_HTS/"
out_dir="."
out_grid_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/GRID_HTS/"
library(icoads.utils)
data(basin_name)
##-------------------------------------------------------------------
## my_args[1] = start year
## my_args[2] = end year
## my_args[3] = config file
##-------------------------------------------------------------------
#source("/gws/nopw/j04/glosat/NMAT/ECK/version_2/get_basin.R")
source(paste0(code_dir,"get_basin.R"))
#--------------------------------------------------------------------------------
##--------------------------------------------------------------------------------
Sys.setenv(TZ='GMT')
#--------------------------------------------------------------------------------
options(warn=1)
if ( length(my_args) < 2 ) {my_args<-my_args2}
my_args <- commandArgs(trailingOnly=TRUE)
##--------------------------------------------------------------------------------
##data_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/FINAL_PROC"
##pub47_dir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/HOSTACE_PROC_ANC_INFO/PUB47byMONTH"
##out_grid_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/GRID_HTS/"
config <- config::get(file = my_args[2])
data_dir <- config$data_dir
pub47_dir <- config$pub_47_by_month_path
out_dir <- config$ens_hts_dir
out_grid_dir <- config$ens_grid_hts_dir
if ( length(my_args) < 2 ) {my_args <- my_args2}
for ( year in my_args[1]:my_args[2] ) {
cat("--------------------------------------------------","\n")
cat("Generating default grids for",year,"\n")
cat("--------------------------------------------------","\n")
flist<-list.files(data_dir,pattern=as.character(year),full.names=T)
if ( length(flist) == 0 ) {
data_dir2<-gsub("FINAL_PROC","HOOVER_RUNZERO",data_dir)
flist<-list.files(data_dir2,pattern=as.character(year),full.names=T)
data$final.id<-data$track.id
}
sp<-lapply(flist,readRDS)
data<-do.call(rbind,sp)
#data<-subset(data,mo==12)
fmiss<-round(sum(is.na(data$final.id))/nrow(data)*100,1)
cat("n.good=",nrow(data),"miss id=",fmiss,"%\t","\n")
data$lon180<-ifelse(data$lon>=180,data$lon-360,data$lon)
data$ix10<-floor((data$lon180+180)*0.1)+1
data$iy10<-floor((data$lat+90)*0.1)+1
data$ix5<-floor((data$lon180+180)*0.2)+1
data$iy5<-floor((data$lat+90)*0.2)+1
data$ix2<-floor((data$lon180+180)*0.5)+1
data$iy2<-floor((data$lat+90)*0.5)+1
data$ix1<-floor((data$lon180+180)*1.0)+1
data$iy1<-floor((data$lat+90)*1.0)+1
ind<-array(NA,dim=c(nrow(data),2))
ind[,1]<-ifelse(data$ix1<=360,data$ix1,360)
ind[,2]<-ifelse(data$iy1<=180,data$iy1,180)
data$basin<-basin_name[ind]
data$basin<-ifelse(is.na(data$basin),"coastal",data$basin)
basin5<-array(NA,c(72,36))
basin5n<-array(NA,c(72,36))
for ( ix in seq(1,359,5) ) {
for ( iy in seq(1,179,5) ) {
#sub<-basin_name[ix:(ix+4),iy:(iy+4)]
sub<-basin[ix:(ix+4),iy:(iy+4)]
subn<-basin_name[ix:(ix+4),iy:(iy+4)]
if(sum(!is.na(sub)) == 0 ) next
if(length(table(sub))>1) {
tt<-sort(table(sub))
if(tt[1]/sum(!is.na(sub))<0.25 & length(tt) == 2 ) {
#print(tt)
basin5[ceiling(ix/5),ceiling(iy/5)]<-as.numeric(names(tt)[2])
basin5n[ceiling(ix/5),ceiling(iy/5)]<-names(table(subn))[2]
} else {
#print(table(sub))
cat("--------------------------------------------------","\n")
cat("Generating default grids for",year,"\n")
cat("--------------------------------------------------","\n")
flist <- list.files(data_dir,pattern=as.character(year),full.names=T)
if ( length(flist) == 0 ) {
data_dir2 <- gsub("FINAL_PROC","HOOVER_RUNZERO",data_dir)
flist <- list.files(data_dir2,pattern=as.character(year),full.names=T)
data$final.id <- data$track.id
}
sp <- lapply(flist,readRDS)
data <- do.call(rbind,sp)
##data <- subset(data,mo==12)
fmiss <- round(sum(is.na(data$final.id))/nrow(data)*100,1)
cat("n.good=",nrow(data),"miss id=",fmiss,"%\t","\n")
data$lon180 <- ifelse(data$lon>=180,data$lon-360,data$lon)
data$ix10 <- floor((data$lon180+180)*0.1)+1
data$iy10 <- floor((data$lat+90)*0.1)+1
data$ix5 <- floor((data$lon180+180)*0.2)+1
data$iy5 <- floor((data$lat+90)*0.2)+1
data$ix2 <- floor((data$lon180+180)*0.5)+1
data$iy2 <- floor((data$lat+90)*0.5)+1
data$ix1 <- floor((data$lon180+180)*1.0)+1
data$iy1 <- floor((data$lat+90)*1.0)+1
ind <- array(NA,dim=c(nrow(data),2))
ind[,1] <- ifelse(data$ix1<=360,data$ix1,360)
ind[,2] <- ifelse(data$iy1<=180,data$iy1,180)
data$basin <- basin_name[ind]
data$basin <- ifelse(is.na(data$basin),"coastal",data$basin)
basin5 <- array(NA,c(72,36))
basin5n <- array(NA,c(72,36))
for ( ix in seq(1,359,5) ) {
for ( iy in seq(1,179,5) ) {
##sub <- basin_name[ix:(ix+4),iy:(iy+4)]
sub <- basin[ix:(ix+4),iy:(iy+4)]
subn <- basin_name[ix:(ix+4),iy:(iy+4)]
if(sum(!is.na(sub)) == 0 ) next
if(length(table(sub))>1) {
tt <- sort(table(sub))
if(tt[1]/sum(!is.na(sub))<0.25 & length(tt) == 2 ) {
##print(tt)
basin5[ceiling(ix/5),ceiling(iy/5)] <- as.numeric(names(tt)[2])
basin5n[ceiling(ix/5),ceiling(iy/5)] <- names(table(subn))[2]
} else {
##print(table(sub))
next
}
} else {
basin5[ceiling(ix/5),ceiling(iy/5)] <- as.numeric(names(table(sub)))
basin5n[ceiling(ix/5),ceiling(iy/5)] <- names(table(subn))
}
}
}
cat("got basins","\n")
##hts.by.id <- read.table(paste0("ENS_HTS/htbyid_",year,".psv.gz"),header=F,sep="|")
hts_file_id <- file.path(out_dir,sprintf("htbyid_%s.psv",year))
if(file.exists(hts_file_id)){
hts.by.id <- read.table(hts_file_id,header=F,sep="|",comment.char="",quote=FALSE)
cat("got ID hts","\n")
} else {
cat('no height file, skipping',year,"\n")
next
}
} else {
basin5[ceiling(ix/5),ceiling(iy/5)]<-as.numeric(names(table(sub)))
basin5n[ceiling(ix/5),ceiling(iy/5)]<-names(table(subn))
}
}
}
cat("got basins","\n")
#hts.by.id <- read.table(paste0("ENS_HTS/htbyid_",year,".psv.gz"),header=F,sep="|")
if ( file.exists(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyid_",year,".psv")) ) {
hts.by.id <- read.table(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyid_",year,".psv"),header=F,sep="|",comment.char="",quote=FALSE)
cat("got ID hts","\n")
} else {
cat('no height file, skipping',year,"\n")
next
}
#hts.by.uid <- read.table(paste0("ENS_HTS/htbyuid_",year,".psv.gz"),header=F,sep="|")
hts.by.uid <- read.table(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyuid_",year,".psv"),header=F,sep="|")
cat("got UID hts","\n")
h.id<-data.frame(final.id=hts.by.id$V1,ht=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),mean),ht.sd=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),sd))
h.uid<-data.frame(uid=unique(hts.by.uid$V1),ht=NA,ht.sd=NA)
sp<-split(hts.by.uid,hts.by.uid$V1)
h.uid$ht<-sapply(sp,function(X) mean(X$V4))
h.uid$ht.sd<-sapply(sp,function(X) sd(X$V4))
data<-merge(data,h.id,all.x=T)
data.noid<-subset(data,is.na(ht))
data.noid$ht<-NULL
data.noid$ht.sd<-NULL
data.id<-subset(data,!is.na(ht))
data.noid<-merge(data.noid,h.uid,all.x=T)
data<-rbind(data.id,data.noid)
# hts by 5 deg box
data$tmp<-paste0(data$ix5,"_",data$iy5)
sp<-split(data,data$tmp)
ht<-data.frame(ht=sapply(sp,function(X) round(mean(X$ht,na.rm=T),2)))
ht$tmp<-rownames(ht)
#ht.sd<-data.frame(sd=sapply(sp,function(X) round(sd(X$at.ht,na.rm=T),2)))
ht.sd<-data.frame(sd=sapply(sp,function(X) round((quantile(X$ht,5/6,na.rm=T)-quantile(X$ht,1/6,na.rm=T))*0.5,2)))
ht.num<-data.frame(num=sapply(sp,function(X) sum(!is.na(X$ht))))
stri<-".ix5.iy5"
ht.app<-data.frame(tmp=ht$tmp,ht=ht$ht,sd=ht.sd$sd,num=ht.num$num)
ht.app<-subset(ht.app,!is.na(ht.app$ht))
ht.app<-subset(ht.app,ht.app$num>50)
names(ht.app)[2:4]<-paste0(names(ht.app)[2:4],stri)
ht.5x5<-ht.app
tmp<-strsplit(ht.5x5$tmp,"_")
want<-array(NA,c(nrow(ht.5x5),2))
want[,1]<-sapply(tmp,function(X) as.numeric(X[1]))
want[,2]<-sapply(tmp,function(X) as.numeric(X[2]))
# start off with 5x5 values
op.ht<-array(NA,c(72,36))
op.ht[want]<-ht.5x5$ht.ix5.iy5
op.sd<-array(NA,c(72,36))
op.sd[want]<-ht.5x5$sd.ix5.iy5
op.num<-array(NA,c(72,36))
op.num[want]<-ht.5x5$num.ix5.iy5
#op.ht<-ifelse(is.na(op.ht),bas10.ht,op.ht)
#op.ht<-ifelse(is.na(op.ht),bas.ht,op.ht)
#op.ht<-ifelse(is.na(op.ht),lat10.ht,op.ht)
#op.sd<-ifelse(is.na(op.sd),bas10.sd,op.sd)
#op.sd<-ifelse(is.na(op.sd),bas.sd,op.sd)
#op.sd<-ifelse(is.na(op.sd),lat10.sd,op.sd)
#
# infill southern ocean
for ( i in 35:1 ) {
if(sum(!is.na(op.ht[,i]))==0 ) {
#cat(i,sum(!is.na(op.ht[,i])),"\n")
op.ht[,i]<-op.ht[,i+1]
op.sd[,i]<-min(op.sd[,i+1],2)
hts_file_uid <- file.path(out_dir,sprintf("htbyuid_%s.psv",year))
hts.by.uid <- read.table(hts_file_uid,header=F,sep="|")
cat("got UID hts","\n")
h.id <- data.frame(final.id=hts.by.id$V1,ht=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),mean),ht.sd=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),sd))
h.uid <- data.frame(uid=unique(hts.by.uid$V1),ht=NA,ht.sd=NA)
sp <- split(hts.by.uid,hts.by.uid$V1)
h.uid$ht <- sapply(sp,function(X) mean(X$V4))
h.uid$ht.sd <- sapply(sp,function(X) sd(X$V4))
data <- merge(data,h.id,all.x=T)
data.noid <- subset(data,is.na(ht))
data.noid$ht <- NULL
data.noid$ht.sd <- NULL
data.id <- subset(data,!is.na(ht))
data.noid <- merge(data.noid,h.uid,all.x=T)
data <- rbind(data.id,data.noid)
## hts by 5 deg box
data$tmp <- paste0(data$ix5,"_",data$iy5)
sp <- split(data,data$tmp)
ht <- data.frame(ht=sapply(sp,function(X) round(mean(X$ht,na.rm=T),2)))
ht$tmp <- rownames(ht)
##ht.sd <- data.frame(sd=sapply(sp,function(X) round(sd(X$at.ht,na.rm=T),2)))
ht.sd <- data.frame(sd=sapply(sp,function(X) round((quantile(X$ht,5/6,na.rm=T)-quantile(X$ht,1/6,na.rm=T))*0.5,2)))
ht.num <- data.frame(num=sapply(sp,function(X) sum(!is.na(X$ht))))
stri <- ".ix5.iy5"
ht.app <- data.frame(tmp=ht$tmp,ht=ht$ht,sd=ht.sd$sd,num=ht.num$num)
ht.app <- subset(ht.app,!is.na(ht.app$ht))
ht.app <- subset(ht.app,ht.app$num>50)
names(ht.app)[2:4] <- paste0(names(ht.app)[2:4],stri)
ht.5x5 <- ht.app
tmp <- strsplit(ht.5x5$tmp,"_")
want <- array(NA,c(nrow(ht.5x5),2))
want[,1] <- sapply(tmp,function(X) as.numeric(X[1]))
want[,2] <- sapply(tmp,function(X) as.numeric(X[2]))
## start off with 5x5 values
op.ht <- array(NA,c(72,36))
op.ht[want] <- ht.5x5$ht.ix5.iy5
op.sd <- array(NA,c(72,36))
op.sd[want] <- ht.5x5$sd.ix5.iy5
op.num <- array(NA,c(72,36))
op.num[want] <- ht.5x5$num.ix5.iy5
##op.ht <- ifelse(is.na(op.ht),bas10.ht,op.ht)
##op.ht <- ifelse(is.na(op.ht),bas.ht,op.ht)
##op.ht <- ifelse(is.na(op.ht),lat10.ht,op.ht)
##op.sd <- ifelse(is.na(op.sd),bas10.sd,op.sd)
##op.sd <- ifelse(is.na(op.sd),bas.sd,op.sd)
##op.sd <- ifelse(is.na(op.sd),lat10.sd,op.sd)
##
## infill southern ocean
for ( i in 35:1 ) {
if(sum(!is.na(op.ht[,i]))==0 ) {
##cat(i,sum(!is.na(op.ht[,i])),"\n")
op.ht[,i] <- op.ht[,i+1]
op.sd[,i] <- min(op.sd[,i+1],2)
}
}
}
saveRDS(op.ht,paste0(out_grid_dir,"deg5_ht_",year,".Rda"))
saveRDS(op.sd,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
saveRDS(op.num,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
saveRDS(op.ht,paste0(out_grid_dir,"deg5_ht_",year,".Rda"))
saveRDS(op.sd,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
saveRDS(op.num,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
} # end of loop over years
} ## end of loop over years
This diff is collapsed.
......@@ -36,3 +36,14 @@ default:
track_root_athena: "/pgdata/metman/TRACK"
track_input: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/TRACK_INPUT/"
track_output: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/GTRACK_OUT/"
# Final Processing Dirs
data_dir: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/FINAL_PROC"
# Height Files/Dirs
ens_hts_dir: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/"
ens_grid_hts_dir: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/GRID_HTS/"
def_hts_file: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/default_height_ensemble.Rda"
# Diurnal dirs
dir_flags_dir: "/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/DIURNAL_FAIL_IDS/"
\ No newline at end of file
......@@ -43,11 +43,11 @@ set y = $1
Rscript $codedir/run_joins.R $y $configfile >> ${log_output_dir}/dop.$1.$2
echo 'XXX getting height ensemble' >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/get_height_ens.R $y $y >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/get_height_ens.R $y $y $configfile >> ${log_output_dir}/dop.$1.$2
echo 'XXX generating gridded heights' >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/get_gridded_vals.R $y $y >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/get_gridded_vals.R $y $y $configfile >> ${log_output_dir}/dop.$1.$2
echo 'XXX getting ships with wrong diurnal cycle' >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/add_diurnal_fails.R $y >> ${log_output_dir}/dop.$1.$2
Rscript $codedir/add_diurnal_fails.R $y $configfile >> ${log_output_dir}/dop.$1.$2
@ y = $y + 1
end
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment