Commit e122b788 authored by Richard Cornes's avatar Richard Cornes
Browse files

Added disjoint tracking scripts and data

parent f7e99fda
File added
#!/usr/bin/env Rscript
# code written by Elizabeth Kent, National Oceanography Centre
# code written for the NERC GloSAT project
# Description:
## IMPORTANT: all paths to all data are defined in config.yml
## input_data:
# 1. arguments (e.g. 1990)
# 2. output_data/HOOVER_RUN_ZERO
# 3. output_data/DISJ/stat
Sys.setenv(TZ='GMT')
options(warnings=1)
exit <- function() { invokeRestart("abort") }
my_args = commandArgs(trailingOnly=TRUE)
if(length(my_args)== 0) {my_args<-my_args2}
if(length(my_args)!= 1) {stop("wrong number of input my_args or my_args2 not set")}
require(icoads.utils)
config<-config::get(file = "config.yml")
print.comm <- TRUE
# my_args[1] is start year
iyr<-my_args[1]
#########################################################################################
cat('--------------------------------------------- \n')
cat('running diurnal checks for',iyr,'\n')
cat('--------------------------------------------- \n')
dir.out <- paste0(config$output_dir,"DIURNAL_FAIL_IDS/")
if (!dir.exists(dir.out) ) dir.create(dir.out)
dir.in <- paste0(config$output_dir,"FINAL_PROC/")
df<-icoads.utils::read_trackmonth(dir.in,syr=iyr)
if(!is.null(df)) {
YRMO <- split(df, data.frame(df$yr, df$mo), drop=TRUE)
if(length(YRMO) > 0 ) {
names(YRMO) <- gsub("\\.","_", names(YRMO))
# empty list for output
sp.op<-vector(mode = "list", length = length(YRMO))
names(sp.op)<-names(YRMO)
for ( imo in 1:length(YRMO) ) {
#cat(imo,length(sp.op),"\n")
data<-YRMO[[imo]]
sp<-split(data,data$final.id)
sp<-sp[sapply(sp,nrow)>=10]
source("~eck/bad_daynightcheck.R")
op<-lapply(sp,function(X) bad_daynight_check(X$date,X$yr,X$lon,X$lat,X$dck,X$at))
op<-op[sapply(op,function(X) X==1)]
if(is.null(op)) op<-""
#print(names(unlist(op)))
sp.op[[imo]]<-names(unlist(op))
}
filenames<-paste0(dir.out,names(sp.op),".Rda")
jj<-mapply(saveRDS, sp.op, file = filenames)
nn<-names(table(unlist(sp.op)))
df$baddn<-ifelse(df$final.id %in% nn,1,0)
cat('--------------------------------------------- \n')
cat("Diurnal check output:",iyr,"1=fail","\n")
cat('--------------------------------------------- \n')
print(table(df$dck,df$baddn))
cat('--------------------------------------------- \n')
}
} # switch for no input data (df = NULL)
assign_country <- function (data) {
#source("add_ituco.R")
print.comm <- TRUE
icoads_cc <- c( "Netherlands", "Norway", "US", "UK", "France", "Denmark", "Italy", "India", "HongKong", "NewZealand", "Ireland", "Philippines", "Egypt", "Canada", "Belgium", "SouthAfrica", "Australia", "Japan", "Pakistan", "Argentina", "Sweden", "FRG", "Iceland", "Israel", "Malaysia", "USSR", "Finland", "RepKorea", "NewCaledonia", "Portugal", "Spain", "Thailand", "Yugoslavia", "Poland", "Brazil", "Singapore", "Kenya", "Tanzania", "Uganda", "Mexico", "GDR")
icoads_cc <- data.frame(as.character(seq(0,40)),icoads_cc)
names(icoads_cc) <- c("cc.num","country")
icoads_cc$cc.num[which(nchar(icoads_cc$cc.num)==1)] <- paste0("0",icoads_cc$cc.num[which(nchar(icoads_cc$cc.num)==1)])
d705t7_cc <- c( "AS", "OS","AU", "BX", "CH", "CI", "CN", "CU", "CZ", "DD","DL",
"DN", "DR", "FI", "FR", "GR", "HK", "HO", "IY", "JP",
"MX", "NK", "NL", "NO", "PH", "PL", "PM", "PO", "RA","RS",
"SN", "SP", "SW", "TH", "TU", "UK", "US", "YG",
"BR", "BZ", "NU", "BE", "NZ")
d705t7_co <- c( "Austria", "Austria", "Australia", "Belgium", "Chile", "China", "Canada", "Cuba", "Czechoslovakia", "Germany", "Germany", "Denmark", "DominicanRepublic", "Finland", "France", "Greece", "HongKong", "Honduras", "Italy", "Japan", "Mexico", "Nicaragua", "Netherlands", "Norway", "Philippines", "Poland", "Panama", "Portugal", "USSR", "USSR", "Sweden", "Spain", "Switzerland", "Thailand", "Turkey", "UK", "US", "Yugoslavia","UK","Brazil", "NU", "Bermuda", "NewZealand")
d705t7_cc<-data.frame(d705t7_cc,d705t7_co)
year<-data$yr[1]
if ( year >= 1956 ) {
itudata <- get_ituco(data)
data <- merge(data,itudata,all.x=T)
} else {
data$ituco<-NA
}
tmp<-data
iyear<-year
if ( print.comm ) cat("got ituco \n")
#stop()
tmp$best_co <- NA
if(!("recruiting_country" %in% names(tmp))) tmp$recruiting_country<-NA
#if(!("country" %in% names(tmp))) tmp$country<-NA
if(!("ituco" %in% names(tmp))) tmp$ituco<-NA
tmp$best_co <- ifelse(!is.na(tmp$c1),tmp$c1,tmp$recruiting_country)
tmp$best_co <- ifelse(is.na(tmp$best_co),tmp$recruiting_country,tmp$best_co)
coded.id<-"^[AMO|BAR|BAT|EUC|IDD|MIN|SCA|TBW|MPD]{3}[AA|UK|FR|DE|EU|HK]{2}[0-9]{2}$"
tmp$id.ok <- ifelse(grepl(coded.id,tmp$track.id),"coded",tmp$id.ok)
tmp$best_co <- ifelse(is.na(tmp$best_co) & tmp$id.ok=="coded",substr(tmp$track.id,4,5),tmp$best_co)
if ( print.comm ) cat("about to merge 1 \n")
print(names(tmp))
print(names(icoads_cc))
tmp<-merge(tmp,icoads_cc,by.x=c("c1"),by.y=c("cc.num"),all.x=T)
if ( print.comm ) cat("merged 1 \n")
tmp$best_co<-ifelse(!is.na(tmp$country),tmp$country,tmp$best_co)
tmp$country<-NULL
tmp$best_co[which(tmp$best_co=="99")] <- NA
tmp$best_co<-ifelse(is.na(tmp$best_co) & !is.na(tmp$ituco),tmp$ituco,tmp$best_co)
#tmp$best_co<-ifelse(tmp$dck== 780 | tmp$dck==782, tmp$wod_country, tmp$best_co)
tmp$best_co<-ifelse(tmp$dck %in% c(249,204,205,211,216,229,239,245) & is.na(tmp$best_co), "UK", tmp$best_co)
tmp$best_co<-ifelse(tmp$dck %in% c(110,116,195,281,667,703,708,710,874) & is.na(tmp$best_co), "US", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(186) ) & is.na(tmp$best_co), "USSR", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(731,733,735) ) & is.na(tmp$best_co), "Russia", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(188,702) ) & is.na(tmp$best_co), "Norway", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(750,900) ) & is.na(tmp$best_co), "Australia", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(899) ) & is.na(tmp$best_co), "SouthAfrica", tmp$best_co)
tmp$best_co<-ifelse((tmp$dck %in% c(118,119,761,762) ) & is.na(tmp$best_co), "Japan", tmp$best_co)
tmp$best_co[which(tmp$best_co == "Unknown")] <- NA
tmp$best_co[which(tmp$best_co=="DE")] <- "Germany"
tmp$best_co[which(tmp$best_co=="SE")] <- "Sweden"
tmp$best_co[which(tmp$best_co=="RU")] <- "Russia"
tmp$best_co[which(tmp$best_co=="CA")] <- "Canada"
tmp$best_co[which(tmp$best_co=="DK")] <- "Denmark"
tmp$best_co[which(tmp$best_co=="GB")] <- "UK"
tmp$best_co[which(tmp$best_co=="IE")] <- "Ireland"
tmp$best_co[which(tmp$best_co=="IN")] <- "India"
tmp$best_co[which(tmp$best_co=="ES")] <- "Spain"
tmp$best_co[which(tmp$best_co=="IT")] <- "Italy"
tmp$best_co[which(tmp$best_co=="LT")] <- "Lithuania"
tmp$best_co[which(tmp$best_co=="PT")] <- "Portugal"
tmp$best_co[which(tmp$best_co=="KR")] <- "RepKorea"
tmp$best_co[which(tmp$best_co=="NI")] <- "Nicaragua"
tmp$best_co[which(tmp$best_co=="ZA")] <- "SouthAfrica"
tmp$best_co[which(tmp$best_co=="YU")] <- "Yugoslavia"
tmp$best_co[which(tmp$best_co=="CD")] <- "DRCongo"
tmp$best_co[which(tmp$best_co=="AR")] <- "Argentina"
tmp$best_co[which(tmp$best_co=="IL")] <- "Israel"
tmp$best_co[which(tmp$best_co=="IS")] <- "Iceland"
tmp$best_co[which(tmp$best_co=="VE")] <- "Venezuela"
tmp$best_co[which(tmp$best_co=="MC")] <- "Monoco"
tmp$best_co[which(tmp$best_co=="EC")] <- "Ecuador"
tmp$best_co[which(tmp$best_co=="MG")] <- "Madagascar"
tmp$best_co[which(tmp$best_co=="RO")] <- "Romania"
tmp$best_co[which(tmp$best_co=="PE")] <- "Peru"
tmp$best_co[which(tmp$best_co=="CR")] <- "CostaRica"
tmp$best_co[which(tmp$best_co=="PA")] <- "Panama"
tmp$best_co[which(tmp$best_co=="AT")] <- "Austria"
tmp$best_co[which(tmp$best_co=="BN")] <- "Brunei"
tmp$best_co[which(tmp$best_co=="TR")] <- "Turkey"
tmp$best_co[which(tmp$best_co=="UA")] <- "Ukraine"
tmp$best_co[which(tmp$best_co=="GH")] <- "Ghana"
tmp$best_co[which(tmp$best_co=="TW")] <- "Taiwan"
tmp$best_co[which(tmp$best_co=="ID")] <- "Indonesia"
tmp$best_co[which(tmp$best_co=="NC")] <- "NewCaledonia"
tmp$best_co[which(tmp$best_co=="CY")] <- "Cyprus"
tmp$best_co[which(tmp$best_co=="LV")] <- "Latvia"
tmp$best_co[which(tmp$best_co=="EE")] <- "Estonia"
tmp$best_co[which(tmp$best_co=="SG")] <- "Singapore"
tmp$best_co[which(tmp$best_co=="LB")] <- "Lebanon"
tmp$best_co[which(tmp$best_co=="LR")] <- "Liberia"
tmp$best_co[which(tmp$best_co=="AG")] <- "AntiguaBarbuda"
tmp$best_co[which(tmp$best_co=="BS")] <- "Bahamas"
tmp$best_co[which(tmp$best_co=="MY")] <- "Malaysia"
tmp$best_co[which(tmp$best_co=="HR")] <- "Croatia"
tmp$best_co[which(tmp$best_co=="Argentine Republic")] <- "Argentina"
tmp$best_co[which(tmp$best_co=="Russian Federation")] <- "Russia"
tmp$best_co<-trimws(tmp$best_co)
tmp$best_co<-gsub(" ","",tmp$best_co)
data <- tmp
data$best_co[which(data$best_co == "NZ")] <- "NewZealand"
data$best_co[which(data$best_co == "GR")] <- "Greece"
data$best_co[which(data$best_co == "AU")] <- "Australia"
data$best_co[which(data$best_co == "NL")] <- "Netherlands"
data$best_co[which(data$best_co == "HK")] <- "HongKong"
data$best_co[which(data$best_co == "JP")] <- "Japan"
data$best_co[which(data$best_co == "NO")] <- "Norway"
data$best_co[which(data$best_co == "CL")] <- "Chile"
data$best_co[which(data$best_co == "FR")] <- "France"
data$best_co[which(data$best_co == "BR")] <- "Brazil"
data$best_co[which(data$best_co == "PL")] <- "Poland"
data$best_co[which(data$best_co == "BE")] <- "Belgium"
data$best_co[which(data$best_co == "CN")] <- "China"
data$best_co[which(data$best_co == "FI")] <- "Finland"
data$best_co[which(data$best_co == "PH")] <- "Philippines"
data$best_co[which(data$best_co == "CH")] <- "Switzerland"
data$best_co[which(data$best_co == "CU")] <- "Cuba"
data$best_co[which(data$best_co == "TZ")] <- "Tanzania"
data$best_co[which(data$best_co == "GDR")] <- "EastGermany"
data$best_co[which(data$best_co == "FRG")] <- "WestGermany"
data$best_co[which(data$best_co == "TH")] <- "Thailand"
data$best_co[which(data$best_co == "LK")] <- "SriLanka"
data$best_co[which(data$best_co == "RussianFederation")] <- "Russia"
data$best_co[which(data$best_co == "CS")] <- NA
data$best_co[which(data$best_co == "SA")] <- "SaudiArabia"
data$best_co[which(data$best_co == "KE")] <- "Kenya"
data$best_co[which(data$best_co == "DD")] <- "EastGermany"
data$best_co[which(data$best_co == "PM")] <- "StPandM"
data$best_co[which(data$best_co == "PK")] <- "Pakistan"
data$best_co[which(data$best_co == "BD")] <- "Bangladesh"
data$best_co[which(data$best_co == "BG")] <- "Bulgaria"
data$best_co[which(data$best_co == "XX")] <- NA
data$best_co[which(data$best_co == "OT")] <- NA
data$best_co[which(data$best_co == "NU")] <- NA
data$best_co[which(data$best_co == "EA")] <- NA
data$best_co[which(data$best_co == "ZY")] <- NA
data$best_co[which(data$best_co == "ZZ")] <- NA
data$best_co[which(data$best_co == "SovietUnion")] <- "USSR"
data$best_co[which(data$best_co == "Russia" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Ukraine" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Latvia" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Belarus" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Estonia" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Kazakhstan" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Moldova" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Uzbekistan" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "Lithuania" & iyear < 1991 & iyear > 1922)] <- "USSR"
data$best_co[which(data$best_co == "USSR" & iyear < 1922)] <- "Russia"
data$best_co[which(data$best_co == "Ukraine" & iyear < 1922)] <- "Russia"
data$best_co[which(data$best_co == "WestGermany" & iyear < 1945 )] <- "Germany"
data$best_co[which(data$best_co == "EastGermany" & iyear < 1945 )] <- "Germany"
data$best_co[which(data$best_co == "RepKorea" & iyear < 1945 )] <- "Korea"
data$best_co[which(data$best_co == "DPRK" & iyear < 1945 )] <- "Korea"
data$best_co[which(data$best_co == "WestGermany" & iyear > 1990 )] <- "Germany"
data$best_co[which(data$best_co == "EastGermany" & iyear > 1990 )] <- "Germany"
data$best_co[which(data$best_co == "Namibia" & data$dck == 780 )] <- NA
data$best_co[which(data$best_co == "Namibia" & data$dck == 782 )] <- NA
data$best_co[which(data$best_co == "" )] <- NA
# }
if ( FALSE ) {
#print(table(data$best_co,data$dck,useNA='ifany'))
cat("-----------------------------------------------","\n")
#print(addmargins(table(data$dck,data$best_co,useNA='ifany')))
if ( sum(!is.na(data$best_co)) > 0 ) {
#print(addmargins(table(data$best_co)))
#print(table(!is.na(data$best_co)))
cat(iyear,"percent with cc: ",round(sum(!is.na(data$best_co))/dim(data)[1],2)*100,'\n')
cat("-----------------------------------------------","\n")
cat("best_co","\n")
cat("-----------------------------------------------","\n")
print(table(data$best_co,useNA='ifany'))
cat("-----------------------------------------------","\n")
cat("c1","\n")
cat("-----------------------------------------------","\n")
print(table(data$c1,useNA='ifany'))
cat("-----------------------------------------------","\n")
cat("recruiting_country","\n")
cat("-----------------------------------------------","\n")
print(table(data$recruiting_country,useNA='ifany'))
cat("-----------------------------------------------","\n")
cat("dck","\n")
cat("-----------------------------------------------","\n")
print(table(data$dck,useNA='ifany'))
}
cat("-----------------------------------------------","\n")
cat("-----------------------------------------------","\n")
}
data$cflag <- NA
data$cflag <- ifelse ( !is.na(data$c1) & !(data$c1 %in% c("XX","OT","NU","EA","ZY","ZZ","99")),1,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & !is.na(data$recruiting_country),2,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck == 780 ,3,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck == 782 ,3,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck == 705 ,3,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck == 706 ,3,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck == 707 ,3,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(249,204,205,211,216,229,239,245) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(110,116,195,667,703,708,710,874) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(186) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(731,733,735) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(188,702) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(750,900) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(899) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & data$dck %in% c(118,119,761,762) ,4,data$cflag)
data$cflag <- ifelse ( is.na(data$cflag) & !is.na(data$ituco),9,data$cflag)
# remove country designation if # rep < 30 per month & only from callsign
few.co <- names(table(data$best_co))[which(table(data$best_co) < 30)]
few <- subset(data,best_co %in% few.co)
data$best_co <- ifelse(data$best_co %in% few.co & data$cflag == 9, NA, data$best_co)
# remove country designation if # rep < 10 per month
few.co <- names(table(data$best_co))[which(table(data$best_co) < 10)]
few <- subset(data,best_co %in% few.co)
data$best_co <- ifelse(data$best_co %in% few.co, NA, data$best_co)
data$best_co <- ifelse(data$best_co == "GF", "FrenchGuiana", data$best_co)
data$best_co <- ifelse(is.na(data$best_co) & data$id.ok=="callsign" & substr(data$track.id,1,2) == "9V", "Malta", data$best_co)
data$cflag <- ifelse ( is.na(data$best_co),NA,data$cflag)
#print(table(data$track.id[data$id.ok=="callsign" & is.na(data$best_co)]))
#fn.out <- paste0("/noc/mpoc/surface_data/TRACKING/FinalTrack/COUNTRY/",iyear,".",imonth,".Rda")
#for.out <- data[,c("yr","mo","dck","sid","uid","c1","c1m","recruiting_country","id","track.id","best_co","cflag")]
#for.out <- unique(for.out)
#saveRDS(for.out,fn.out)
return(data[,c("uid","best_co","cflag")])
}
check_squares <- function(dfg, dfb, nmin=1, do.plots=FALSE) {
vnames<-c("sst","at","slp","dpt")
v2<- c("f","f2","nm")
nn<-c(paste0(rep(vnames,each=length(v2)),rep(v2,times=length(vnames))))
op<-data.frame(t(rep(0,times=length(nn))))
names(op)<-nn
maskb<-dfb$dck==732 & is.na(dfb$os)
if(sum(maskb) == 0 ) return(op)
if(sum(!maskb) == 0 ) return(op)
maskb2<-dfb$dck==732 & !is.na(dfb$os)
maskg<-dfg$dck==732
if(do.plots)par(mfrow=c(2,2))
for ( vname in vnames ) {
op[paste0(vname,"nm")]<-min(sum(!is.na(dfb[maskb,vname])),sum(!is.na(dfb[!maskb,vname])))
# doesn't work if both distributions have zero range
if ( sum(!is.na(dfb[maskb,vname])) > 0 & sum(!is.na(dfb[!maskb,vname])) > 0 ) {
rr1<-range(dfb[maskb,vname],na.rm=T)[2]-range(dfb[maskb,vname],na.rm=T)[1]
rr2<-range(dfb[!maskb,vname],na.rm=T)[2]-range(dfb[!maskb,vname],na.rm=T)[1]
} else {
rr1<-0
rr2<-0
}
nn1<-sum(!is.na(dfb[maskb,vname]))
nn2<-sum(!is.na(dfb[!maskb,vname]))
if ( rr1 > 0 | rr2 > 0 ) {
if ( min(nn1,nn2) > 1 ) {
test.t<-t.test(dfb[maskb,vname],dfb[!maskb,vname])
fail.t<-if ( test.t$conf.int[2] < 0 | test.t$conf.int[1] > 0 ) 1 else 0
} else {
fail.t <- (-9)
}
test.wil<-wilcox.test(dfb[maskb,vname],dfb[!maskb,vname],exact=F,conf.int=T)
fail.wil<- if ( test.wil$conf.int[2] < 0 | test.wil$conf.int[1] > 0 ) 1 else 0
if ( do.plots ) {
if ( min(nn1,nn2) > 1 ) {
boxplot(dfb[vname]~maskb,varwidth=T,main=paste(vname,fail.t,fail.wil,sum(!is.na(dfb$sst[maskb]))))
} else {
plot.new()
}
}
} else {
fail.t <- (-9)
fail.wil <- (-9)
if ( do.plots ) plot.new()
}
op[paste0(vname,"f")]<-fail.t
op[paste0(vname,"f2")]<-fail.wil
}
return(op)
}
y5<-seq(-90,90,5)
y5.2<-seq(-87.5,87.5,5)
x5<-seq(0,360,5)
x5.2<-seq(2.5,358,5)
#mboxes<-readRDS("squares_mormet.Rda")
#mboxes$source<-"M"
iboxes<-readRDS("squares_icoads.Rda")
iboxes$source<-"I"
sure<-subset(iboxes,dck!="m732")
sure$lon.grad<-ifelse(sure$lon.grad %in% c(-1,1),sure$lon.grad,NA)
sure$lat.grad<-ifelse(sure$lat.grad %in% c(-1,1),sure$lat.grad,NA)
amb<-subset(iboxes,dck=="m732")
source("check_squares.R")
if ( TRUE ) {
#df.full<-icoads.utils::read_trackmonth("/noc/mpoc/surface_data/D732pairs/NEW3",syr=1950,eyr=2020)
ff<-list.files("/noc/mpoc/surface_data/D732pairs/NEW3",full.names=T)
sp<-lapply(ff,readRDS)
sp<-sp[sapply(sp,nrow)>0]
sp<-sp[sapply(sp,ncol)==55]
df.full<-do.call(rbind,sp)
y5<-seq(-90,90,5)
y5.2<-seq(-87.5,87.5,5)
x5<-seq(0,360,5)
x5.2<-seq(2.5,358,5)
#df.full$ppos<-paste(df.full$lat5,df.full$i.lat5,df.full$lon5,df.full$i.lon5,sep="_")
pp<-subset(df.full,lat5!=i.lat5 | lon5!=i.lon5)
}
if ( FALSE ) {
# this to check whether we identify any new boxes using
# the original mormet data - seems not
mor.amb<-mboxes
for ( ib in 1:nrow(sure) ) {
a<-which((mboxes$lat5==sure$lat5[ib] & mboxes$lon5==sure$lon5[ib]) | (mboxes$lat5==sure$i.lat5[ib] & mboxes$lon5==sure$i.lon5[ib]))
b<-which((mboxes$i.lat5==sure$lat5[ib] & mboxes$i.lon5==sure$lon5[ib]) | (mboxes$i.lat5==sure$i.lat5[ib] & mboxes$i.lon5==sure$i.lon5[ib]))
irow<-a[which(b%in%a)]
cat(ib,irow,"\n")
mor.amb<-mor.amb[-irow,]
}
}
for ( iyear in min(iboxes$syr):max(iboxes$eyr) ) {
#for ( iyear in 1974 ) {
#for ( iyear in 1967:1974 ) {
#for ( iyear in 1962:max(iboxes$eyr) ) {
#for ( iyear in 1975:max(iboxes$eyr)) {
cat(iyear," ")
for ( imo in 1:12) {
cat(imo," ")
cmo<-if(imo<10)paste0("0",imo) else imo
#cat(imo,"\n")
#for ( imo in 1) {
fn<-paste0("MormetWithUid/",iyear,"_",cmo,".Rda")
df<-readRDS(fn)
df$lat5<-y5.2[findInterval(df$lat,y5,all.inside=T)]
df$lon5<-x5.2[findInterval(df$lon,x5,all.inside=T)]
df$dlat5<-df$lat-df$lat5
df$dlon5<-df$lon-df$lon5
df$uid<-ifelse(is.na(df$uid),df$mor.uid,df$uid)
fn2<-paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/INPUT_Rda/r3.0.0/ICOADS_R3.0.0T_",iyear,"-",cmo,".Rda")
ico<-readRDS(fn2)
ico<-subset(ico,!(dck %in% c(731,732)))
df<-icoads.utils::rbind_all_columns(df,ico)
num<-sum(df$dck==732 & is.na(df$os))
fill<-rep(-9,times=num)
if ( length(df$uid[df$dck==732 & is.na(df$os)]) == 0 ) next
flags<-data.frame(uid=df$uid[df$dck==732 & is.na(df$os)],badBox=fill,susBox=fill,badDate=fill,match=fill,regExl=fill,diff.sst=fill,diff.at=fill,diff.slp=fill,diff.dpt=fill,reject=fill,new.lat=NA,new.lon=NA)
sub<-subset(df,dck==732 & is.na(os))
nmin<-5
# for these we know which the correct box is
for ( ib in 1:nrow(sure) ) {
flags$badBox<-ifelse(sub$lat5==sure$i.lat5[ib] & sub$lon5==sure$i.lon5[ib],1,flags$badBox)
flags$badDate<-ifelse(sub$lat5==sure$i.lat5[ib] & sub$lon5==sure$i.lon5[ib] & sub$yr >= sure$syr[ib] & sub$yr <= sure$eyr[ib],1,flags$badBox)
test.bad<-subset(df,lat5==sure$i.lat5[ib]&lon5==sure$i.lon5[ib])
mm<-test.bad$dck==732 & is.na(test.bad$os)
if ( max(sum(mm),sum(!mm)) < nmin ) next
test.bad$new.lat<-sure$lat5[ib]+test.bad$dlat*sure$lat.grad[ib]
test.bad$new.lon<-sure$lon5[ib]+test.bad$dlon*sure$lon.grad[ib]
test.move<-subset(df,lat5==sure$lat5[ib]&lon5==sure$lon5[ib])
#cat(ib,"sure",sure$i.lat5[ib],sure$i.lon5[ib],"\n")
#print(table(test.bad$dck,test.bad$os,useNA='ifany'))
#if(nrow(test.move) > 20 & nrow(test.bad) > 20 ) stop()
dum<-check_squares(test.move,test.bad,nmin=nmin,do.plots=FALSE)
#if ( max(dum) > 0 ) print(dum)
if(dum$sstf%in% c(0,1)) flags$diff.sst <- ifelse(flags$uid %in% test.bad$uid, dum$sstf, flags$diff.sst)
if(dum$atf%in% c(0,1)) flags$diff.at <- ifelse(flags$uid %in% test.bad$uid, dum$atf, flags$diff.at)
if(dum$slpf%in% c(0,1)) flags$diff.slp <- ifelse(flags$uid %in% test.bad$uid, dum$slpf, flags$diff.slp)
if(dum$dptf%in% c(0,1)) flags$diff.dpt <- ifelse(flags$uid %in% test.bad$uid, dum$dptf, flags$diff.dpt)
#if(max(dum)==0)next
#flags<-merge(flags,test.bad[,c("uid","new.lat","new.lon")],by="uid",all.x=T)
for ( iuid in test.bad$uid[test.bad$dck==732&is.na(test.bad$os)] ) {
mmf<-which(flags$uid==iuid)
mmb<-which(test.bad$uid==iuid)
flags$new.lat[mmf]<-test.bad$new.lat[mmb]
flags$new.lon[mmf]<-test.bad$new.lon[mmb]
}
}
# problem region in Indian Ocean
flags$regExl<-ifelse(sub$lon >= 70 & sub$lon <= 80 & sub$yr >= 1975 & sub$yr <= 1981, 1, flags$regExl)
# flag if match in pp file
flags$match<-ifelse(flags$uid %in% pp$i.uid,1,flags$match)
flags$match<-ifelse(flags$uid %in% pp$uid,1,flags$match)
#flags$reject<-ifelse(flags$badBox==1|flags$susBox==1|flags$badDate==1|flags$match==1|flags$regExl==1|flags$susDate==1,1,0)
#flags$reject<-ifelse(flags$badDate==1|flags$match==1|flags$regExl==1|flags$susDate==1,1,0)
flags$reject<-ifelse(flags$match==1|flags$regExl==1,1,0)
flags$reject<-ifelse(flags$diff.sst==1|flags$diff.at==1|flags$diff.slp==1|flags$diff.dpt==1,1,flags$reject)
flags$reject<-ifelse((flags$diff.sst==-9|flags$diff.at==-9|flags$diff.slp==-9|flags$diff.dpt==-9)&(flags$badBox==1|flags$susBox==1|flags$badDate==1),1,flags$reject)
saveRDS(flags,paste0("testD732flags/",iyear,"_",imo,".Rda"))
} # end loop over month
cat("\n")
} # end loop over year
y5<-seq(-90,90,5)
y5.2<-seq(-87.5,87.5,5)
x5<-seq(0,360,5)
x5.2<-seq(2.5,358,5)
#for ( iyear in 1888:1995 ) {
#for ( iyear in 1888:1995 ) {
for ( iyear in 1940:1995 ) {
cat(iyear,": ")
for ( imo in 1:12 ) {
cat(imo," ")
#fn<-paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/HOOVER_RUNZERO/",iyear,".",imo,".Rda")
#if(!file.exists(fn))next
#pig<-readRDS(fn)
cmo<-if(imo<10)paste0("0",imo) else imo
fn.mor<-paste0("/noc/users/eck/TESTING/D732/MormetWithUid/",iyear,"_",cmo,".Rda")
if ( !file.exists(fn.mor)) next
mor<-readRDS(fn.mor)
mor$dck<-paste0("m",mor$dck)
fn2<-paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/INPUT_Rda/r3.0.0/ICOADS_R3.0.0T_",iyear,"-",cmo,".Rda")
pig<-readRDS(fn2)
#if(!("os" %in% names(tmp)))tmp$os<-NA
#if(!("op" %in% names(tmp)))tmp$op<-NA
#pig<-merge(pig,tmp[,c("uid","os","op")])
pig<-subset(pig,!(dck %in% c(731,732)))
pig$dck<-paste0("d",pig$dck)
pig2<-icoads.utils::rbind_all_columns(pig,mor)
pig2<-icoads.utils::add_date2(pig2)
#function (df.in, key.list, close.list, tol.id = 2, tol.pos = 1,
# tol.yr = 0, tol.mo = 0, tol.dy = 0, tol.hr = 0, tol.sst = 1,
# tol.slp = 1, tol.at = 1, tol.dpt = 1, tol.w = 1, tol.d = 1,
# tol.ww = 0, tol.w1 = 0, tol.vv = 0, tol.n = 0)
#pig2<-pig[,c("date","uid","yr","mo","dy","hr","lat","lon","dck","orig.id","track.id","sst","slp","at","w","os","op")]
pig2<-pig2[,c("date","uid","yr","mo","dy","hr","lat","lon","dck","id","sst","slp","at","w","d","os","op","tape","mor.uid")]
#v.want<-c("date","uid","yr","mo","dy","hr","lat","lat5","lon","lon5","dck","id","sst","slp","at","w","lon.pos5","lat.pos5","os","op")
pig2$lat5<-y5.2[findInterval(pig2$lat,y5,all.inside=T)]
pig2$lon5<-x5.2[findInterval(pig2$lon,x5,all.inside=T)]
pig2$lat.pos5<-pig2$lat-pig2$lat5
pig2$lon.pos5<-pig2$lon-pig2$lon5
pig2$alat.pos5<-abs(pig2$lat.pos5)
pig2$alon.pos5<-abs(pig2$lon.pos5)
sp<-split(pig2,format(pig2$date,"%j"))
#op<-lapply(sp,icoads.utils::get_mis,key.list=c("yr","mo","dy","hr"),close.list=c("sst","slp","at","w"),tol.sst=0.6,tol.at=0.6,tol.slp=0.6,tol.w=3.0)
op<-lapply(sp,icoads.utils::get_mis,key.list=c("yr","mo","dy","hr","alat.pos5","alon.pos5"),close.list=c("sst","slp","at","w"),tol.sst=0.6,tol.at=0.6,tol.slp=0.6,tol.w=3.0)
fn.out<-paste0("/noc/mpoc/surface_data/D732pairs/NEW3/",iyear,"_",imo,".Rda")
op<-op[sapply(op,nrow)>0]
if ( length(op) == 0 ) {
if (file.exists(fn.out)) file.remove(fn.out)
next
}
pp<-do.call(rbind,op)
if (nrow(pp) == 0 ) {
if (file.exists(fn.out)) file.remove(fn.out)
next
}
pp2<-subset(pp,dck=="m732"|i.dck=="m732")
if (nrow(pp2) == 0 ) {
if (file.exists(fn.out)) file.remove(fn.out)
next
}
rownames(pp2)<-NULL
cat("saving",fn.out,"\n")
saveRDS(pp2,fn.out)
#stop()
}
cat("\n")
}
require(ncdf4)
#nc<-nc_open("/gws/nopw/j04/glosat/development/data/results/NMAT/ECK/version_2/OCEAN_BASIN/data.nc")
nc<-nc_open("/noc/mpoc/surface_data/ICOADS_R3_PROC/OCEAN_BASIN/data.nc")
Xlon<-ncvar_get(nc,"X")
Ylat<-ncvar_get(nc,"Y")
Z<-ncvar_get(nc,"Z")
basin<-ncvar_get(nc,"basin")
bb<-ncatt_get(nc,"basin","CLIST")$value
nc_close(nc)
bb<-gsub("\\[","",bb)
bb<-gsub("\\]","",bb)
bb<-gsub("\\) \\(",",",bb)
bb<-gsub(" ","",bb)
bb<-gsub("\\(","",bb)
bb<-gsub("\\)","",bb)
bb<-read.table(text=bb,sep=",",header=F)
names(bb)<-bb[1,]
bb<-names(bb)
tmp<-basin[,,1]
basin<-basin[,,1]
basin[1:180,]<-tmp[181:360,]
basin[181:360,]<-tmp[1:180,]
basin_name<-basin
basin_name<-ifelse(!is.na(basin),bb[basin],NA)
#!/usr/bin/env Rscript
my_args = commandArgs(trailingOnly=TRUE)
#code_dir="/gws/nopw/j04/glosat/NMAT/ECK/HEIGHT_ENS/"
code_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/"
#data_dir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/FINAL_PROC"
data_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/FINAL_PROC"
#pub47_dir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/HOSTACE_PROC_ANC_INFO/PUB47byMONTH"
#out_dir="./ENS_HTS/"
out_dir="."
out_grid_dir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/GRID_HTS/"
#source("/gws/nopw/j04/glosat/NMAT/ECK/version_2/get_basin.R")
source(paste0(code_dir,"get_basin.R"))
#--------------------------------------------------------------------------------
Sys.setenv(TZ='GMT')
#--------------------------------------------------------------------------------
options(warn=1)
if ( length(my_args) < 2 ) {my_args<-my_args2}
for ( year in my_args[1]:my_args[2] ) {
cat("--------------------------------------------------","\n")
cat("Generating default grids for",year,"\n")
cat("--------------------------------------------------","\n")
flist<-list.files(data_dir,pattern=as.character(year),full.names=T)
if ( length(flist) == 0 ) {
data_dir2<-gsub("FINAL_PROC","HOOVER_RUNZERO",data_dir)
flist<-list.files(data_dir2,pattern=as.character(year),full.names=T)
data$final.id<-data$track.id
}
sp<-lapply(flist,readRDS)
data<-do.call(rbind,sp)
#data<-subset(data,mo==12)
fmiss<-round(sum(is.na(data$final.id))/nrow(data)*100,1)
cat("n.good=",nrow(data),"miss id=",fmiss,"%\t","\n")
data$lon180<-ifelse(data$lon>=180,data$lon-360,data$lon)
data$ix10<-floor((data$lon180+180)*0.1)+1
data$iy10<-floor((data$lat+90)*0.1)+1
data$ix5<-floor((data$lon180+180)*0.2)+1
data$iy5<-floor((data$lat+90)*0.2)+1
data$ix2<-floor((data$lon180+180)*0.5)+1
data$iy2<-floor((data$lat+90)*0.5)+1
data$ix1<-floor((data$lon180+180)*1.0)+1
data$iy1<-floor((data$lat+90)*1.0)+1
ind<-array(NA,dim=c(nrow(data),2))
ind[,1]<-ifelse(data$ix1<=360,data$ix1,360)
ind[,2]<-ifelse(data$iy1<=180,data$iy1,180)
data$basin<-basin_name[ind]
data$basin<-ifelse(is.na(data$basin),"coastal",data$basin)
basin5<-array(NA,c(72,36))
basin5n<-array(NA,c(72,36))
for ( ix in seq(1,359,5) ) {
for ( iy in seq(1,179,5) ) {
#sub<-basin_name[ix:(ix+4),iy:(iy+4)]
sub<-basin[ix:(ix+4),iy:(iy+4)]
subn<-basin_name[ix:(ix+4),iy:(iy+4)]
if(sum(!is.na(sub)) == 0 ) next
if(length(table(sub))>1) {
tt<-sort(table(sub))
if(tt[1]/sum(!is.na(sub))<0.25 & length(tt) == 2 ) {
#print(tt)
basin5[ceiling(ix/5),ceiling(iy/5)]<-as.numeric(names(tt)[2])
basin5n[ceiling(ix/5),ceiling(iy/5)]<-names(table(subn))[2]
} else {
#print(table(sub))
next
}
} else {
basin5[ceiling(ix/5),ceiling(iy/5)]<-as.numeric(names(table(sub)))
basin5n[ceiling(ix/5),ceiling(iy/5)]<-names(table(subn))
}
}
}
cat("got basins","\n")
#hts.by.id <- read.table(paste0("ENS_HTS/htbyid_",year,".psv.gz"),header=F,sep="|")
if ( file.exists(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyid_",year,".psv")) ) {
hts.by.id <- read.table(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyid_",year,".psv"),header=F,sep="|",comment.char="",quote=FALSE)
cat("got ID hts","\n")
} else {
cat('no height file, skipping',year,"\n")
next
}
#hts.by.uid <- read.table(paste0("ENS_HTS/htbyuid_",year,".psv.gz"),header=F,sep="|")
hts.by.uid <- read.table(paste0("/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/output_data/ENS_HTS/htbyuid_",year,".psv"),header=F,sep="|")
cat("got UID hts","\n")
h.id<-data.frame(final.id=hts.by.id$V1,ht=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),mean),ht.sd=apply(hts.by.id[,2:ncol(hts.by.id)],c(1),sd))
h.uid<-data.frame(uid=unique(hts.by.uid$V1),ht=NA,ht.sd=NA)
sp<-split(hts.by.uid,hts.by.uid$V1)
h.uid$ht<-sapply(sp,function(X) mean(X$V4))
h.uid$ht.sd<-sapply(sp,function(X) sd(X$V4))
data<-merge(data,h.id,all.x=T)
data.noid<-subset(data,is.na(ht))
data.noid$ht<-NULL
data.noid$ht.sd<-NULL
data.id<-subset(data,!is.na(ht))
data.noid<-merge(data.noid,h.uid,all.x=T)
data<-rbind(data.id,data.noid)
# hts by 5 deg box
data$tmp<-paste0(data$ix5,"_",data$iy5)
sp<-split(data,data$tmp)
ht<-data.frame(ht=sapply(sp,function(X) round(mean(X$ht,na.rm=T),2)))
ht$tmp<-rownames(ht)
#ht.sd<-data.frame(sd=sapply(sp,function(X) round(sd(X$at.ht,na.rm=T),2)))
ht.sd<-data.frame(sd=sapply(sp,function(X) round((quantile(X$ht,5/6,na.rm=T)-quantile(X$ht,1/6,na.rm=T))*0.5,2)))
ht.num<-data.frame(num=sapply(sp,function(X) sum(!is.na(X$ht))))
stri<-".ix5.iy5"
ht.app<-data.frame(tmp=ht$tmp,ht=ht$ht,sd=ht.sd$sd,num=ht.num$num)
ht.app<-subset(ht.app,!is.na(ht.app$ht))
ht.app<-subset(ht.app,ht.app$num>50)
names(ht.app)[2:4]<-paste0(names(ht.app)[2:4],stri)
ht.5x5<-ht.app
tmp<-strsplit(ht.5x5$tmp,"_")
want<-array(NA,c(nrow(ht.5x5),2))
want[,1]<-sapply(tmp,function(X) as.numeric(X[1]))
want[,2]<-sapply(tmp,function(X) as.numeric(X[2]))
# start off with 5x5 values
op.ht<-array(NA,c(72,36))
op.ht[want]<-ht.5x5$ht.ix5.iy5
op.sd<-array(NA,c(72,36))
op.sd[want]<-ht.5x5$sd.ix5.iy5
op.num<-array(NA,c(72,36))
op.num[want]<-ht.5x5$num.ix5.iy5
#op.ht<-ifelse(is.na(op.ht),bas10.ht,op.ht)
#op.ht<-ifelse(is.na(op.ht),bas.ht,op.ht)
#op.ht<-ifelse(is.na(op.ht),lat10.ht,op.ht)
#op.sd<-ifelse(is.na(op.sd),bas10.sd,op.sd)
#op.sd<-ifelse(is.na(op.sd),bas.sd,op.sd)
#op.sd<-ifelse(is.na(op.sd),lat10.sd,op.sd)
#
# infill southern ocean
for ( i in 35:1 ) {
if(sum(!is.na(op.ht[,i]))==0 ) {
#cat(i,sum(!is.na(op.ht[,i])),"\n")
op.ht[,i]<-op.ht[,i+1]
op.sd[,i]<-min(op.sd[,i+1],2)
}
}
saveRDS(op.ht,paste0(out_grid_dir,"deg5_ht_",year,".Rda"))
saveRDS(op.sd,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
saveRDS(op.num,paste0(out_grid_dir,"deg5_sd_",year,".Rda"))
} # end of loop over years
This diff is collapsed.
#!/usr/bin/env Rscript
# code written by Elizabeth Kent, National Oceanography Centre
# code written for the NERC GloSAT project
# Description:
## IMPORTANT: all paths to all data are defined in config.yml
## input_data:
# 1. arguments (e.g. c(1990, 1990, "pre", "none"))
# 2. output_data/HOOVER_RUNZERO/
# my_args[1] is start year
# my_args[2] is end year
# my_args[3] is "pre" or "post" tracking
# my_args[4] dcks to select, either "none" or e.g. "123,124"
## Outputs the data to a home dir:
# output_data/DISJTR
# and duplicate text files to
# output_data/PAIR_DUPFILES
my_args = commandArgs(trailingOnly=TRUE)
source("~eck/keeping_going2.R")
if(length(my_args)<2)my_args<-my_args2
n.args<-length(my_args)
if(n.args< 2) {my_args<-my_args2}
n.args<-length(my_args)
if(n.args < 2 | n.args> 4) {stop("wrong number of input my_args or my_args2 not set")}
if(n.args == 2 & my_args[1] >= 1982 ) my_args[3] <- "pre"
if(n.args == 2 ) my_args[3] <- "post"
if(n.args < 4) my_args[4] <- "none"
config<-config::get(file = "config.yml")
exit <- function() { invokeRestart("abort") }
source("/noc/users/eck/Rscripts/read_rdsfiles.R")
if (!dir.exists(paste0(config$output_dir,"DISJTR_NEW"))) dir.create(paste0(config$output_dir,"DISJTR_NEW"))
print.comm <- FALSE
writeOutText = FALSE
read.in<-TRUE
keep.going<-TRUE
save.data<-TRUE
gen.plots<-FALSE
tot.rep<-12
f.add<-1
vv<-c("date","lon","lat","dck","sid","d","w","at","sst","slp","n")
vv2<-c(vv,"uid","orig.id","track.id","testid","id.ok")
min.len1 <- 12
min.len2<-ceiling(0.5*min.len1)
min.len3<-ceiling(0.3*min.len1)
min.len.use<-min.len1
stop.loop<-FALSE
syr<-as.numeric(my_args[1])
eyr<-as.numeric(my_args[2])
cat('---------------------------------------------------------',"\n")
cat("running grouping for",my_args[1],"to",my_args[2],my_args[3],"tracking \n")
cat('---------------------------------------------------------',"\n")
data<-icoads.utils::read_trackmonth(paste0(config$output_dir,"HOOVER_RUNZERO/"),syr=syr,eyr=eyr)
if ( is.null(data) ) {
cat('no data for',my_args[1],my_args[2],'skipping \n')
if ( my_args[1] != my_args[2] ) exit()
}
if ( nrow(data) == 0 ) {
cat('no data for',my_args[1],my_args[2],'skipping \n')
if ( my_args[1] != my_args[2] ) exit()
}
# needed if "pre"
if(!("orig.id") %in% names(data)) data$orig.id<-data$id
# fix some dates ...
for ( iv in names(data) ) {
if ( "POSIXlt" %in% class(data[,iv])) data[,iv]<-as.POSIXct(data[,iv])
#cat(iv,class(data[,iv]),"\n")
}
if ( my_args[4] != "none" ) {
dck2sel<-read.table(text=my_args[4],sep=",")
data<-subset(data,dck %in% dck2sel)
cat("using data dcks", my_args[4], "\n")
} else {
cat("using data from all dcks \n")
}
if(!("id.ok" %in% names(data) )) data$id.ok<-data$id.type
if(!("track.id" %in% names(data) )) data$track.id<-data$new.id
data$lon360<-data$lon
# disjoint groups needs lon +/- 180
data$lon<-ifelse(data$lon>=180,data$lon-360,data$lon)
data<-icoads.utils::calc_local(data)
if ( my_args[3] == "pre" ) {
data2<-subset(data,!(id.ok %in% c("callsign","name","coded","valid","assigned","fill","track","trackfill")))
} else {
data2<-subset(data,!(id.ok %in% c("callsign","name","coded","valid","assigned","fill")))
}
if ( max(data$yr) > 2000 ) {
data2<-subset(data,hr%%1 == 0)
data2<-subset(data2,format(data2$date,"%M")=="00")
data2<-subset(data2,format(data2$date,"%S")=="00")
} else {
data2<-data
}
n.tot<-nrow(data2)
#if(my_args[1] == "2020") data2<-subset(data2,mo==2 & dy < 15)
#data2<-subset(data2,mo==2)
cat('---------------------------------------------------------',"\n")
cat(my_args[1],"sampling type","\n")
print(addmargins(table(data2$dck,data2$samp.flag)))
cat('---------------------------------------------------------',"\n")
togroup<-subset(data2,!(id.ok %in% c("callsign","name","coded","valid","assigned","fill")))
if ( max(data2$yr) < 1900 ) togroup<-subset(togroup,!(dck %in% c(701,721,730) & id.ok=="few"))
if ( nrow(togroup) <= 4 ) {
cat('no data to track for',my_args[1],my_args[2],'skipping \n')
if ( my_args[1] != my_args[2] ) exit()
#next
}
if ( (syr < 1970 & length(my_args) == 2) | my_args[3] == "post" ) {
togroup<-subset(togroup,!(id.ok %in% c("track","trackfill")))
cat("removing data already tracked","\n")
if ( nrow(togroup) == 0 ) {
cat("no untracked data \n")
exit()
}
print(table(togroup$dck,togroup$id.ok))
}
stat<-icoads.utils::find_stations(togroup)
if ( !is.null(stat) ) {
cat("got",nrow(stat),"reports in",length(unique(stat$stat_loc)),"station positions \n ")
} else {
cat("no stations \n")
}
if(!is.null(stat)) togroup<-subset(togroup,!(uid %in% stat$uid))
#togroup<-subset(togroup,mo<=3)
cat("grouping",nrow(togroup),"reports \n")
togroup.old<-togroup
# subset to whole hour reports
togroup<-subset(togroup,as.numeric(format(togroup$date,"%H"))%%1==0)
# and remove hours with too much data (some days have huge # obs)
tt<-table(togroup$date)
lim<-quantile(tt,c(0.99))*2
tt<-tt[tt<=lim]
if ( length(tt) > 0 ) togroup<-subset(togroup,date %in% as.POSIXct(names(tt)))
togroup.early<-subset(togroup,yr<1855)
togroup.gmt<-subset(togroup,samp.flag!="L" & yr >= 1855)
togroup.loc<-subset(togroup,samp.flag=="L" & yr >= 1855)
# subset local data to 4 hours
togroup.loc<-subset(togroup.loc,as.numeric(format(togroup.loc$local,"%H"))%%4==0)
# now get midday d192 reports
togroup.mid<-subset(togroup.loc,dck %in% c(192) & as.numeric(format(togroup.loc$local,"%H"))==12)
togroup.loc<-subset(togroup.loc,!(uid %in% togroup.mid$uid))
if ( nrow(togroup.loc) > 0 ) togroup.loc<-togroup.loc[order(togroup.loc$loc),]
#stop()
poss.fill<-togroup
if(exists("tracked.check"))rm(tracked.check)
len.old<-0
num.grouped<-0
###############################################
## togroup is the dataframe of reports to group
###############################################
togroup.keep<-togroup
samp.type <- c("gmt","loc","mid","early")
#samp.type <- c("gmt","loc")
grouped.data<-data.frame(uid="",testid="")
months<-0
if ( syr >1950 & syr <= 1989 ) months <- seq(1,12)
#months<-2
for ( imo in months ) {
if ( imo != 0 ) {
cat('==================',"running for month",imo,"\n")
togroup<-subset(togroup.keep,mo==imo)
togroup.early<-subset(togroup,yr<1855)
togroup.gmt<-subset(togroup,samp.flag!="L" & yr >= 1855)
togroup.loc<-subset(togroup,samp.flag=="L" & yr >= 1855)
# subset local data to 4 hours
togroup.loc<-subset(togroup.loc,as.numeric(format(togroup.loc$local,"%H"))%%4==0)
# now get midday d192 reports
togroup.mid<-subset(togroup.loc,dck %in% c(192) & as.numeric(format(togroup.loc$local,"%H"))==12)
togroup.loc<-subset(togroup.loc,!(uid %in% togroup.mid$uid))
if ( nrow(togroup.loc) > 0 ) togroup.loc<-togroup.loc[order(togroup.loc$loc),]
poss.fill<-togroup
} else {
cat('------------------',"running for full year","\n")
}
for ( isamp in 1:length(samp.type) ) {
sampt<-samp.type[isamp]
if ( sampt == "gmt" & nrow(togroup.gmt) == 0 ) next
if ( sampt == "gmt" ) cat("got GMT reports",nrow(togroup.gmt),"\n")
if ( sampt == "loc" & nrow(togroup.loc) == 0 ) next
if ( sampt == "loc" ) cat("got LOCAL reports",nrow(togroup.loc),"\n")
if ( sampt == "mid" & nrow(togroup.mid) == 0 ) next
if ( sampt == "mid" ) cat("got Midday reports",nrow(togroup.mid),"\n")
if(syr >= 1990 & sampt == "loc" ) { if ( nrow(togroup.loc) < 30) next }
if(syr >= 1990 & sampt == "mid" ) { if ( nrow(togroup.mid) < 30) next }
if ( sampt == "early" & nrow(togroup.early) != 0 ) cat("got early reports",nrow(togroup.early),"\n")
if ( sampt == "early" & nrow(togroup.early) == 0 ) next
for ( irep in 1:tot.rep ) {
if(irep == 1) {
min.len.use<-min.len1
keep.going<-TRUE
} else if (f.add<0.02 ) { # only added 2% of missing data
if ( min.len.use == min.len1 ) {
min.len.use <- min.len2
} else if ( min.len.use == min.len2 ) {
min.len.use <- min.len3
} else if ( min.len.use == min.len3 ) {
keep.going<-FALSE
}
} else if ( irep == tot.rep -1 & keep.going == TRUE ) {
min.len.use <- min(min.len.use,min.len2)
} else if ( irep == tot.rep & keep.going == TRUE ) {
min.len.use <- min(min.len.use,min.len3)
}
#if(!keep.going) next
#for ( irep in 1:1 ) {
if(!keep.going) {
cat("iteration",irep,"no new info","\n")
next
}
kg.op<-NULL
cat('---------------------------------------------------------',"\n")
if ( imo != 0 ) {
cat(my_args[1],"month",imo,'Sampling type',sampt,"iteration",irep,'minimum reports',min.len.use,"\n")
} else {
cat(my_args[1],'Sampling type',sampt,"iteration",irep,'minimum reports',min.len.use,"\n")
}
if ( irep > 1 ) cat("number already grouped =",nrow(grouped.data),"\n")
cat('---------------------------------------------------------',"\n")
if ( sampt == "gmt" ) {
gmt.loop<-1
# track 6 hourly and fill in remainder
test<-subset(togroup.gmt,!(uid %in% grouped.data$uid))
if(nrow(test) < min.len.use ) {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
next
}
prfix<-paste0("gmt",irep,"_")
use.int<-6
mg<-6
op<-icoads.utils::loop_disj_func(test,"gmt",mg,use.int,min.len.use)
} else if ( sampt == "mid" ) {
# dck 192 has daily reports at midday
test<-subset(togroup.mid, !(uid %in% grouped.data$uid))
if(nrow(test) < min.len.use ) {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
next
}
prfix<-paste0("d192_",irep,"_")
use.int<-24
mg<-3
op<-icoads.utils::loop_disj_func(test,"loc",mg,use.int,min.len.use)
} else if ( sampt == "early" ) {
# pre 1855 reports
test<-togroup.early
prfix<-paste0(sampt,irep,"_")
use.int<-1
mg<-50
min.len.use<-min.len3
op<-icoads.utils::loop_disj_func(test,"loc",mg,use.int,min.len.use)
keep.going<-FALSE
} else if ( sampt == "loc" ) {
# local time reports
test<-subset(togroup.loc, !(uid %in% grouped.data$uid))
if(nrow(test) < min.len.use ) {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
next
}
prfix<-paste0("loc",irep,"_")
if(irep == 1) min.len.use<-min.len1
use.int<-4
mg<-9
op<-icoads.utils::loop_disj_func(test,"loc",mg,use.int,min.len.use)
}
if ( !is.null(op) ) {
if ( class(op) == "character" ) {
if ( op == "noData" | op == "datesNotOK" ) {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
next
}
}
#print(op)
if ( nrow(op) > 0 ) {
cat("about to evaluate tracks ...","\n")
#e.op<-evaluate_tracks(test, op, poss.fill, sampt, prfix, min.len.use)
e.op<-icoads.utils::evaluate_tracks(test, op, poss.fill, sampt, prfix, 2)
if ( !is.null(e.op) ) {
subset(e.op,nobs>=min.len.use)
#print(table(e.op$nobs))
e.op$nobs<-NULL
}
#print(table(e.op$nobs))
e.op$nobs<-NULL
cat("done \n")
if (!is.null(e.op) ) {
grouped.data<-rbind(grouped.data,e.op)
#source("plot_tr_diag.R")
cat('# candidates',nrow(op),'# added',nrow(e.op),"# total",nrow(grouped.data),'\n')
# record fraction added to see whether to continue
f.add<-nrow(e.op)/nrow(subset(test,!(uid %in% e.op$uid)))
cat("fraction added",round(f.add,3),"\n")
if(nrow(e.op) < min.len.use) {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
}
} else {
cat('no data to add this time around \n')
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
}
} else {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
}
} else {
#kg.op<-icoads.utils::keeping_going(min.len.use,min.len1,min.len2,min.len3,gmt.loop,sampt,keep.going)
kg.op<-keeping_going(min.len.use,min.len1,min.len2,min.len3,keep.going)
if(is.null(kg.op)) {
cat('here \n')
#keep.going<-FALSE
next
}
min.len.use<-kg.op[[1]]
#gmt.loop<-kg.op[[2]]
#keep.going<-kg.op[[3]]
#keep.going<-kg.op[[2]]
}
} # end loop over irep
cat('---------------------------------------------------------',"\n")
num.grouped<-len.old
} # repeat over type (gmt, local, mid)
} # end loop over months
cat("--------------------------------------------------","\n")
cat("ids before spline","\n")
tracked.check<-merge(togroup.keep,grouped.data)
tracked.check.old<-tracked.check
#print(table(grouped.data$testid))
print(table(table(grouped.data$testid)))
cat("--------------------------------------------------","\n")
#fn.out<-paste0("tr_outprespline_",my_args[1],"_",my_args[2],"_",my_args[3],".Rda")
#saveRDS(tracked.check[,vv2],fn.out)
#stop()
source("/noc/users/eck/nocgit/icoads-r-hostace/rscripts/spline_qc.R")
cat("--------------------------------------------------","\n")
cat("ids after spline","\n")
print(table(grouped.data$testid))
#print(table(table(tracked.check$testid)))
cat("--------------------------------------------------","\n")
if ( gen.plots) source("plot_overall.R")
if ( save.data ) {
fn.out<-paste0(config$output_dir,"DISJTR_NEW/tr_out_",my_args[1],"_",my_args[2],"_",my_args[3],".Rda")
saveRDS(tracked.check[,vv2],fn.out)
if ( !is.null(stat) ) {
if ( nrow(stat) > 0 ) {
fn.out<-paste0(config$output_dir,"DISJTR_NEW/stat_out_",my_args[1],"_",my_args[2],"_",my_args[3],".Rda")
saveRDS(stat,fn.out)
}
}
}
not.tracked<-subset(togroup.old,!(uid %in% tracked.check$uid))
if(!is.null(stat)) not.tracked<-subset(not.tracked,!(uid %in% stat$uid))
n.tot<-nrow(data)
n.ok<-sum(data2$id.ok %in% c("callsign","name","coded","valid","assigned","fill"))
n.oldtrack<-sum(data2$id.ok %in% c("track","trackfill"))
n.group<-nrow(tracked.check)
n.stat<-if(!is.null(stat)) nrow(stat) else 0
n.newtrack<-n.stat+n.group
cat("-------------------------------------------------------","\n")
cat("total reports ",nrow(data),'\n')
cat("OK reports ",n.ok,'\n')
cat("oldtr reports ",n.oldtrack,'\n')
cat('grouped reports ',n.group,"\n")
cat('not tracked old ',n.tot-n.ok-n.oldtrack,"\n")
if(my_args[3] == "pre" ) {
cat('not using old tracking',"\n")
cat('not tracked ',n.tot-n.ok-n.group-n.stat,"\n")
cat('% tracked ',round((n.ok+n.group+n.stat)/n.tot*100,2),"\n")
} else {
cat('using old tracking',"\n")
cat('not tracked ',n.tot-n.ok-n.oldtrack-n.group-n.stat,"\n")
cat('% tracked ',round((n.ok+n.oldtrack+n.group+n.stat)/n.tot*100,2),"\n")
}
# cat("-------------------------------------------------------","\n")
#!/usr/bin/env Rscript
# code written by Elizabeth Kent, National Oceanography Centre
# code written for the NERC GloSAT project
# Description:
## IMPORTANT: all paths to all data are defined in config.yml
## input_data:
# 1. arguments (e.g. 1990)
# 2. output_data/HOOVER_RUN_ZERO
# 3. output_data/DISJ/stat
Sys.setenv(TZ='GMT')
options(warnings=1)
exit <- function() { invokeRestart("abort") }
my_args = commandArgs(trailingOnly=TRUE)
if(length(my_args)== 0) {my_args<-my_args2}
if(length(my_args)!= 1) {stop("wrong number of input my_args or my_args2 not set")}
require(icoads.utils)
config<-config::get(file = "config.yml")
print.comm <- TRUE
dck.nodup<-c(701)
p47vars<-c("ship_callsign","ship_name","vessel_length","vessel_breadth","vessel_freeboard","vessel_draught","recruiting_country","visual_observing_height","thermometer1_height_above_summer_load_line","anemometer1_height_above_summer_load_line","barometer1_height_above_summer_load_line","sea_thermometer1_height_above_summer_load_line","vessel_type2","p47.uid")
#source("/noc/users/eck/nocgit/icoads.utils/R/check_track_func.R")
#source("/noc/users/eck/nocgit/icoads.utils/R/read_rdsfiles.R")
#source("/noc/users/eck/nocgit/icoads.utils/R/get_breaks_func.R")
#source("/noc/users/eck/nocgit/icoads.utils/R/identify_dups.R")
# my_args[1] is start year
iyr<-my_args[1]
#########################################################################################
cat('--------------------------------------------- \n')
cat('running final joining prog for',iyr,'\n')
cat('--------------------------------------------- \n')
dir.out <- paste0(config$output_dir,"FINAL_PROC/")
if (!dir.exists(dir.out) ) dir.create(dir.out)
dir.in <- paste0(config$output_dir,"HOOVER_RUNZERO/")
df<-icoads.utils::read_trackmonth(dir.in,syr=iyr)
pub47dir <- config$pub_47_by_month_path
#pub_47_by_month_path: "/noc/mpoc/surface_data/ICOADS_R3_PROC/HOSTACE_PROC_ANC_INFO/PUB47byMONTH"
if(!is.null(df)) {
disj.in <- paste0(config$output_dir,"DISJTR_NEW/")
if ( iyr < 1982 ) {
fn.op<-paste0(disj.in,"tr_out_",iyr,"_",iyr,"_post.Rda")
} else {
fn.op<-paste0(disj.in,"tr_out_",iyr,"_",iyr,"_pre.Rda")
}
cat(fn.op,"\n")
if(file.exists(fn.op)) {
op<-readRDS(fn.op)
cat("got group output",nrow(op),"\n")
} else {
if ( iyr >= 1855 ) {
cat("no disj data, stopping")
stop()
} else {
op<-NULL
}
cat("no disj track data","\n")
}
if ( !is.null(op ) ) {if(nrow(op) == 0) op<-NULL}
if ( !is.null(op) ) {
# problem with dups, need to fix
tt<-as.data.frame(table(op$testid))
names(tt)[1]<-"testid"
op<-merge(op,tt)
dup<-op$uid[duplicated(op$uid)]
sub<-subset(op,uid %in% dup)
op<-subset(op,!(uid %in% dup))
sp<-split(sub,sub$uid)
ff<-function(X) {
X<-X[order(-X$Freq),]
X<-X[1,]
}
sp<-lapply(sp,ff)
sub<-do.call(rbind,sp)
op<-rbind(op,sub)
# end dup fix
}
st<-NULL
if ( iyr < 1982 ) {
fn.st<-paste0(disj.in,"stat_out_",iyr,"_",iyr,"_post.Rda")
} else {
fn.st<-paste0(disj.in,"stat_out_",iyr,"_",iyr,"_pre.Rda")
}
if(file.exists(fn.st) ) {
st<-readRDS(fn.st)
cat("got station data","\n")
} else {
cat("no station data","\n")
st<-NULL
}
#cat('read in grouped and station data \n')
#########################################################################################
if ( !is.null(op) ) {
tojoin<-data.frame(ship1=NA,ship2=NA)
tojoin<-na.omit(tojoin)
#pdf(paste0('PDF_JOIN/check_joins_',iyr,'.pdf'),height=9,width=9)
#if(!is.null(st)) {
# tmp<-subset(df,uid %in% op$uid | uid %in% st$uid)
#} else {
# tmp<-subset(df,uid %in% op$uid )
#}
tmp<-op
tab.all<-table(tmp$testid,as.numeric(format(tmp$date,"%j")))
jjs<-unique(as.numeric(format(tmp$date,"%j")))
if(length(jjs)<366) {
cols<-setdiff(seq(1,366),jjs)
extra<-array(0,dim=c(nrow(tab.all),length(cols)))
colnames(extra)<-cols
tab.all<-cbind(tab.all,extra)
tab.all<-tab.all[,order(as.numeric(colnames(tab.all)))]
}
for ( ship in rownames(tab.all) ) {
#a<-sp.gr[[igr]]
a<-subset(tmp,testid==ship)
if(nrow(a) < 6 ) next
a<-a[order(a$date),]
j1<-unique(as.numeric(format(a$date,"%j")))
mmin<-max(min(j1-2),1)
mmax<-min(max(j1+2),ncol(tab.all))
pp<-tab.all[,mmin:mmax]
pp<-pp[rowSums(pp)>0,]
cat(ship," ")
cat('no months',length(unique(a$mo)),'no days',length(unique(format(a$date,"%j")))," ")
cat('no candidate tracks',nrow(pp),"\n")
if(length(unique(a$mo)) >= 6 ) {
cat('more than 6 months, skipping','\n')
next # long track, takes ages
}
if(length(unique(format(a$date,"%j"))) >= 100 ) {
cat('more than 100 days, skipping','\n')
next # long track, takes ages
}
#for ( name.track in rownames(pp) ) {
# b<-subset(sub.tr,newid==name.track)
for ( name.add in rownames(pp) ) {
b<-subset(tmp,testid==name.add)
if(name.add==ship)next
if(nrow(b) < 6 ) next
b<-b[order(b$date),]
dd<-c(a$date,b$date)
max.gap<-max(abs(diff(dd)))
units(max.gap)<-"days"
if(max.gap>2) next
toch<-rbind(a,b)
toch<-toch[order(toch$date),]
ch<-icoads.utils::check_track_func(toch)
if(is.null(ch$t.diff))next
if(max(ch$distance)>300) next
if(length(ch$uid.speed) > 0.2*nrow(toch) ) next
if(max(ch$t.diff) > 24*3)next
seg<-icoads.utils::get_breaks_func(toch)
if(is.null(seg))next
if(max(seg$seg)>0)next
cat('---------------------------------------------------------',"\n")
print(table(toch$dck,toch$testid))
cat('---------------------------------------------------------',"\n")
if ( FALSE ) {
par(mfrow=c(2,2))
nameg<-a$testid[1]
namet<-b$testid[1]
plot(toch$lon,toch$lat,col=as.factor(toch$testid),main=paste(nameg,namet))
maps::map(add=T,col="orange",interior=F)
plot(toch$lon,toch$lat,col=as.factor(toch$testid),xlim=c(-180,180),ylim=c(-90,90),main=iyr)
maps::map(add=T,col="orange",interior=F)
plot(toch$date,toch$lat,col=as.factor(toch$testid),main=paste(names(table(toch$dck)),collapse="/ "))
plot(toch$date,toch$lon,col=as.factor(toch$testid),main=paste(nrow(a),nrow(b),nrow(toch)))
tojoin<-rbind(tojoin,data.frame(ship1=nameg,ship2=namet))
#readline(prompt="Press [enter] to continue")
#stop()
}
}
}
# combine ids
if ( nrow(tojoin) > 0 ) {
for ( tj in 1:nrow(tojoin) ) {
tmp$testid<-ifelse(tmp$testid == tojoin$ship1[tj] | tmp$testid == tojoin$ship2[tj],paste0(tojoin$ship1[tj],"_",tojoin$ship1[tj]),tmp$testid)
}
}
tt<-table(tmp$testid)
tt<-as.data.frame(tt)
names(tt)[1]<-"testid"
tmp<-merge(tmp,tt)
tmp<-subset(tmp,Freq>=1)
} # end of is.null(op)
df$lon<-ifelse(df$lon>180,df$lon-360,df$lon)
cat("got data",nrow(df),"\n")
if ( !is.null(op) ) {
df<-merge(df,tmp[,c("uid","testid")],all.x=T)
cat('merged grouped data',"\n")
} else {
df$testid<-NA
}
if ( !is.null(st) ) {
df<-merge(df,st[,c("uid","stat_loc")],all.x=T)
cat("merged station data","\n")
} else {
df$stat_loc<-NA
}
df$id.ok<-ifelse(!is.na(df$testid),"assigned",df$id.ok)
df$final.id<-ifelse(!is.na(df$testid),df$testid,df$track.id)
df$id.ok<-ifelse(!is.na(df$stat_loc),"assigned",df$id.ok)
df$final.id<-ifelse(is.na(df$final.id),df$stat_loc,df$final.id)
df$id.ok<-ifelse(is.na(df$final.id),"missing",df$id.ok)
sp.good<-split(df,df$final.id)
nn<-sapply(sp.good,nrow)
sp.good<-sp.good[nn>4]
df.bad<-subset(df,!(final.id %in% names(sp.good)) | is.na(df$final.id))
list.dup<-lapply(sp.good,icoads.utils::identify_dups)
dup.uids<-as.vector(unlist(list.dup))
sp.good<-lapply(sp.good,function(X) X<-subset(X,!(uid %in% dup.uids)))
list.ch<-lapply(sp.good,icoads.utils::check_track_func)
spd.uids<-lapply(list.ch,function(X) X<-X$uid.speed)
spd.uids<-as.vector(unlist(spd.uids))
list.seg<-lapply(sp.good,icoads.utils::get_breaks_func)
f.good<-function(X) {
X<-data.frame(uid=X$uid,t.diff=X$t.diff,ship.speed=X$speed,ship.course=X$course,speed.flag=ifelse(X$uid %in% X$uid.speed,1,0),max.speed=rep(X$max.speed,times=X$num),Freq=rep(X$num,times=X$num))
}
f.bad<-function(X) {
nna<-rep(NA,times=X$num)
X<-data.frame(uid=X$uid,t.diff=nna,ship.speed=nna,ship.course=nna,speed.flag=nna,max.speed=nna,Freq=rep(X$num,times=X$num))
}
good.bad<-sapply(list.ch,function(X) !is.null(X$max.speed))
list.ch.good<-list.ch[good.bad]
list.ch.bad<-list.ch[!good.bad]
list.ch.df.good<-lapply(list.ch.good, f.good)
list.ch.df.bad<-lapply(list.ch.bad, f.bad)
df.app<-do.call(rbind,c(list.ch.df.good,list.ch.df.bad))
df.seg<-do.call(rbind,list.seg)
#df$Freq.old<-df$Freq
#df$Freq<-NULL
df<-merge(df,df.app,all.x=T)
df<-merge(df,df.seg,all.x=T)
df$proc.flag<-""
df$proc.flag<-ifelse(df$uid %in% dup.uids,"dup",df$proc.flag)
df$proc.flag<-ifelse(df$uid %in% spd.uids,"spd",df$proc.flag)
if ( FALSE ) {
for ( ib in 1:length(list.seg) ) {
ship<-names(list.seg)[ib]
if(max(list.seg[[ib]]$seg) <= -1 ) {
} else {
sub<-subset(df,final.id==ship)
par(mfrow=c(2,2))
plot(sub$date,sub$lat,col=as.factor(sub$seg),main=ship)
plot(sub$date,sub$lon,col=as.factor(sub$seg))
plot(sub$lon,sub$lat,col=as.factor(sub$seg))
maps::map(add=T,col="orange",interior=F)
}
}
}
YRMO <- split(df, data.frame(df$yr, df$mo), drop=TRUE)
if(length(YRMO) > 0 ) {
names(YRMO) <- gsub("\\.","_", names(YRMO))
# OK to index in this way as there are only missing months pre-Pub47 and at the end
for ( imo in 1:length(YRMO) ) {
fn47<-paste0(pub47dir,"/pub47_",names(YRMO)[imo],".Rda")
if ( file.exists(fn47) ) {
tmp<-YRMO[[imo]]
p47<-icoads.utils::get_pub47(files=FALSE,iyr,imo)
#p47<-subset(p47,preferred==1) # where duplicate entries for a month
#p47$p47.uid<-p47$id
#p47$id<-NULL
to.drop<-names(tmp)[names(tmp) %in% names(p47)]
tmp[,to.drop]<-NULL
# these things need to be fixed, should have be removed earlier
tmp<-subset(tmp,dflag<3)
to.drop<-c("odv","odz","opcv","opcz","rrr","tr","qc26","qc21","c1m","eoh","sim","hop","osv","osz","osiv","osiz","onv","onz","ophv","ophz","ocv","ocz","oav","oaz","oov","ooz","opv","opz","lov","hob")
tmp[,to.drop]<-NULL
YRMO[[imo]]<-merge(tmp,p47[,p47vars],by.x="final.id",by.y="ship_callsign",all.x=T)
} else {
YRMO[[imo]][,p47vars]<-NA
YRMO[[imo]]$ship_callsign<-NULL
}
}
filenames<-paste0(dir.out,names(YRMO),".Rda")
jj<-mapply(saveRDS, YRMO, file = filenames)
}
} # switch for no input data
#saveRDS(tmp[,c("uid","newid")],paste0(disj.in,"djt_joins_",iyr,".Rda"))
#dev.off()
#!/bin/csh
# Immediately exit a script when it encounters an error
#set -e
# start year = $1
# end year = $2
#conda activate ~/miniconda3/envs/r_env
source /noc/users/metman/SETUPS/setup_icproc
#basedir="/gws/nopw/j04/glosat/development/data/interim/HOSTACE_PROC/TEST_CONFIG/"
set basedir="/noc/mpoc/surface_data/ICOADS_R3_PROC/RUN_ZERO/"
set codedir="/noc/users/eck/nocgit/icoads-r-hostace/"
if ( ! -d ${basedir}OPFILES ) then
#echo Exists
#else
mkdir ${basedir}OPFILES
mkdir ${basedir}output_data
endif
#Adjust your log_output dir as needed
set log_output_dir="${basedir}OPFILES"
touch ${log_output_dir}/dop.$1.$2
/bin/rm ${log_output_dir}/dop.$1.$2
touch ${log_output_dir}/dop.$1.$2
echo "logfile" ${log_output_dir}/dop.$1.$2
echo 'running disjoint tracking' >> ${log_output_dir}/dop.$1.$2
echo 'start year' $1 >> ${log_output_dir}/dop.$1.$2
echo 'end year' $2 >> ${log_output_dir}/dop.$1.$2
echo 'start_at_disj' >> ${log_output_dir}/dop.$1.$2
echo "---------------" >> ${log_output_dir}/dop.$1.$2
set y = $1
while ( $y <= $2 )
echo "XXX getting disjoint" $y $y >> ${log_output_dir}/dop.$1.$2
if ( $y <= 1981 ) then
#Rscript $codedir/rscripts/run_disj_track.R $y $y "post" "none" >> ${log_output_dir}/dop.$1.$2
Rscript /noc/users/eck/run_disj_track2.R $y $y "post" "none" >> ${log_output_dir}/dop.$1.$2
else
#Rscript $codedir/rscripts/run_disj_track.R $y $y "pre" "none" >> ${log_output_dir}/dop.$1.$2
Rscript /noc/users/eck/run_disj_track2.R $y $y "pre" "none" >> ${log_output_dir}/dop.$1.$2
endif
echo 'XXX running joining' >> ${log_output_dir}/dop.$1.$2
#Rscript $codedir/rscripts/run_joins.R $y >> ${log_output_dir}/dop.$1.$2
# this just looks in DISJTR_NEW rather than DISJTR
Rscript /noc/users/eck/run_joins2.R $y >> ${log_output_dir}/dop.$1.$2
echo 'XXX getting height ensemble' >> ${log_output_dir}/dop.$1.$2
Rscript get_height_ens.R $y $y >> ${log_output_dir}/dop.$1.$2
echo 'XXX generating gridded heights' >> ${log_output_dir}/dop.$1.$2
Rscript get_gridded_vals.R $y $y >> ${log_output_dir}/dop.$1.$2
echo 'XXX getting ships with wrong diurnal cycle' >> ${log_output_dir}/dop.$1.$2
Rscript /noc/users/eck/add_diurnal_fails.R $y >> ${log_output_dir}/dop.$1.$2
@ y = $y + 1
end
echo "XXX finished disjoint, joins, heights and diurnal flags" $1 $2 >> ${log_output_dir}/dop.$1.$2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment