############### Info: # Date: 2015-07-06 by SR # A full model search by "arma_search" will applied to personally choosen DGPS (within this model class) # the outcome can be plotted as a heatmap # Added: best criterion for all information criteria ############### Predefined functions arma_search <- function( x, min_order=c(0,0), max_order=c(5,5), include.mean=FALSE, method="mle", ic=c("AIC", "BIC", "HIC"), trace=FALSE ){ # Input: # - xx multivariate time series # - minOrder: minimal ARMA(p,q)-order allowed # - maxOrder: maximal ARMA(p,q)-order allowed # - trace: trace the alogrithm and model search # Output: # - list( aic=best_aic, fit=best_fit, model=best_model ) library(fArma) best_ic <- 1e9 len <- nrow( x ) #length of series for( p in min_order[1]:max_order[1] ) for( q in min_order[2]:max_order[2] ){ # for all allowed combinations of p,q if( p == 0 && q == 0 ){ next } formula <- as.formula( paste0( "x ~ arma(", p ,",", q , ")" ) ) fit <- tryCatch( armaFit( formula, data=x , method=method, include.mean=include.mean), error=function( err ) FALSE, warning=function( warn ) FALSE ) if( !is.logical( fit ) ){ if(ic == "AIC") { # AIC fit_ic <- fit@fit$aic }else if(ic == "BIC"){ # BIC n_params <- length(fit@fit$coef)+1 # +1 for variance n_obs <- length(fit@data$x) fit_ic <- -2*fit@fit$loglik + n_params * log(n_obs) }else if(ic == "HIC"){ # HIC n_params <- length(fit@fit$coef)+1 # +1 for variance n_obs <- length(fit@data$x) fit_ic <- -2*fit@fit$loglik +2* n_params * log(log(n_obs)) }else{print( "No IC chosen" ) } if( fit_ic < best_ic ){ best_ic <- fit_ic best_fit <- fit best_model <- c( p, q ) } if( trace ){ ss <- paste0("(", p, ",", q, "): ", ic ," = ", fit_ic ) print( ss ) } } else{ if( trace ){ ss <- paste( sep="", "(", p, ",", q, "): None" ) print( ss ) } } } if( best_ic < 1e9 ){ #return( list( ic=best_ic, fit=best_fit, model=best_model ) ) return( c(best_model, best_fit@fit$coef) ) } return( FALSE ) } ic_heatmap <- function(model_matrix, ic, p,q){ # Input: # - model_matrix: pxq matrix with appearances choosen in the model search # - ic: information criterion for title # - p,q choosen orders by model search # Output: # - heatmap for plot my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 99) # 100 percent col_breaks <- c( seq(0,10,length=40), # for red seq(11,40,length=30), # for yellow seq(41,100,length=30)) # for green hm <- heatmap.2( model_matrix, cellnote = model_matrix, # same data set for cell labels main = paste0("IC: ",ic," (DGP: ARMA(", p, ",",q,"))"), # heat map title notecol="black", # change font color of cell labels to black density.info="none", # turns off density plot inside color legend trace="none", # turns off trace lines inside the heat map #margins =c(10,10), # widens margins around plot col=my_palette, # use on color palette defined earlier breaks=col_breaks, # enable color transition at specified limits dendrogram="none", # only draw a row dendrogram Colv="NA", Rowv="NA", # turn off column and row clustering srtCol=0, adjCol = c(0.5,1), key = TRUE) return(hm) } ############### Packages library("RColorBrewer") library("dynlm") library("parallel") library("fArma") library("gplots") ############### Params and Data # setups are lists with specifications of # - reps: number of replications # - n: number of observations # - ar: vector of AR coefs with order a1, a2, ... # - ma: vector ma coefs with order b1, b2, ... # - min_order / max_order: the model set in which to search, i.e. # all models within ARMA(min_oder[1], min_order[2]) to ARMA(max_oder[1], max_order[2]) set.seed(101) min_order <- c(0,0) # p_min = 0 and q_min = 0 max_order <- c(3,3) # p_max = 3 and q_max = 3 ### Define your setup: # 1st case setup <- list(reps = 1000, ar = c(0.8, -0.2, 0.5),ma = c(NULL), var = 1, n=1000) # ARMA(1,0) with alpha_1=0.8 # 2nd case setup <- list(reps = 1000, ar = c(0.3, 0.5),ma = c(NULL), var = 1, n=1000) # 3d case setup <- list(reps = 1000, ar = c(0.3,0.5),ma = c(0.2,-0.3), var = 1, n=1000) # Test for stationarity if(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1) print("Not stationary") # reps replications of ARMA(p,q) samples with n observations ts <- replicate( setup[["reps"]], arima.sim(n = setup[["n"]], list(ar = c(setup[["ar"]]), ma=c(setup[["ma"]]), sd = sqrt(setup[["var"]])))) # for each criterion: do a full model search by function "arma_search" # for this setup for(ic in c("AIC", "BIC", "HIC")){ cl <- makeCluster(7) clusterEvalQ(cl, {library(fArma)}) # send FArma library to all Clusters clusterExport(cl, varlist=c("ts", "arma_search"), envir=environment()) # send necessary objects of global environments to all Clusters clusterEvalQ(cl, sessionInfo()) assign(ic, value = parCapply(cl, ts, arma_search, min_order=min_order , max_order=max_order, ic=ic)) # Column par Apply stopCluster(cl) } ic <- list(AIC, BIC, HIC) #save(ic, file=paste0("ic_ar",length(setup["ar"]) ,"_ma",length(setup["ar"]),".RData")) #save(ic, file=paste0("ic_ar08.RData")) #save(ic, file=paste0("ic_ar03ar05.RData")) #save(ic, file=paste0("ic_ar03ar05ma02mam03.RData")) # load old data: load("ic_ar03ar05.RData") load("ic_ar03ar05ma02mam03.RData") load("ic_ar08.RData") ### densities pdf("vergleich_kriterien.pdf", width=16, height=10) plot(density(sapply(ic_ar08[[1]], function(x) x["ar1"])), col="red", ylim=c(0,25)) abline(v=0.8, col="black", cex=2) lines(density(sapply(ic_ar08[[2]], function(x) x["ar1"])), col="blue", lwd=1) lines(density(sapply(ic_ar08[[3]], function(x) x["ar1"])), col="green", lwd=1) legend("topright", inset=0.02, legend=c("AIC","BIC","HIC"), col=c("red", "blue", "green"), lty=c(1,1,1,1), lwd=c(1,1,1,1)) dev.off() # analysis load("ic_ar08.RData") lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0 lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(1,0)))/2000*100) # # 63,8% - 99,7% -91,8% load("ic_ar03ar05.RData") lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0 lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(2,0)))/2000*100) # # 77.6 - 87.15 - 94.65 load("ic_ar03ar05ma02mam03.RData") lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0 lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(2,2)))/2000*100) # # 38,55 - 12,15% - 24,65% ######################################################### ## Heatmaps and Model Matrix Generation ######################################################### ic <- c(1,2,3) names(ic) <-c("AIC", "BIC", "HIC") orders <- matrix(c(1,0,2,0, 2,2), ncol=2, byrow=T) counter=0 for(r_data in c("ic_ar08","ic_ar03ar05","ic_ar03ar05ma02mam03")){ # load each data set or each process counter <- counter +1 print(r_data) load(paste0(r_data, ".RData")) for(k in ic){ # for each information criterion print(names(ic)[k]) ### Generate a Model Matrix: i.e. a 4x4 matrix with percentages of chosen models model_matrix <- matrix(NA, ncol=4, nrow=4) dimnames(model_matrix)<-list(c("p=0","p=1", "p=2", "p=3"), c("q=0","q=1", "q=2", "q=3")) for(ar in min_order[1]:max_order[1]){ # for all models we have looked into for(ma in min_order[2]:max_order[2]){ print(paste0("ARMA(p,q) mit p=", ar, " und q=", ma, ": ", sum(sapply(ic_ar08[[k]], function(x) x[1]==ar && x[2]==ma),na.rm=TRUE)/1000*100)) model_matrix[paste0("p=",ar), paste0("q=",ma)] <- sum(sapply(ic_ar08[[k]], function(x) x[1]==ar && x[2]==ma),na.rm=TRUE)/1000*100 } } if(sum(model_matrix)!=100) print(paste0("ERROR; Sum was: ",sum(model_matrix))) print(paste0(names(ic)[k], "_p" , orders[counter,1] , "_q", orders[counter,2] ,"_n1000reps1000.pdf")) # Generate heatmap pdf(paste0(names(ic)[k], "_p" , orders[counter,1] , "_q", orders[counter,2] ,"_n1000reps1000.pdf"), width=16, height=10) ic_heatmap(model_matrix, ic=names(ic)[k], p=orders[counter,1], q=orders[counter,2]) dev.off() } } ######################################################### ## best criterion for all information criteria ## For all processes ARMA(0-3,0-3) ######################################################### set.seed(101) min_order <- c(3,0) # p_min = 0 and q_min = 0 max_order <- c(3,3) # p_max = 3 and q_max = 3 search_min_order <- c(0,0) # p_min = 0 and q_min = 0 search_max_order <- c(5,5) # p_max = 3 and q_max = 3 var <- 1 # variance of shocks reps <- 1000 # number of replications of time series n_obs <- 1000 # number of observations per time series summary_list <- list() for(AR in min_order[1]:max_order[1]){ for(MA in min_order[2]:max_order[2]){ setup <- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs) # ARMA(1,0) with alpha_1=0.8 # Test for stationarity: Get new DGP values if not stationary if(AR > 0 && MA == 0){ #print("here") while(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1.001){ print("ARpMA0 Reboot") setup <- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs) } } else if(AR == 0 && MA > 0){ #print("here2") while(max(abs(polyroot(c(1, -setup[["ma"]]))))<= 1.001){ print("AR0MAq Reboot") setup <- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs) } } else if(AR > 0 && MA > 0){ print("here3") while(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1.001 && max(abs(polyroot(c(1, -setup[["ma"]]))))<= 1.001){ print("ARpMAq Reboot") setup <- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs) } } print(paste0("AR (",AR,") Parameter: " , round(setup[["ar"]],2) , " und MA(",MA,") Parameter: ", round(setup[["ma"]], 2))) # reps replications of ARMA(p,q) samples with n observations ts <- replicate( setup[["reps"]], arima.sim(n = setup[["n"]], list(ar = c(setup[["ar"]]), ma=c(setup[["ma"]]), sd = sqrt(setup[["var"]])))) for(ic in c("AIC", "BIC", "HIC")){ cl <- makeCluster(7) clusterEvalQ( cl, {library(fArma)}) # send FArma library to all Clusters clusterExport(cl, varlist=c("ts", "arma_search"), envir=environment()) # send necessary objects of global environments to all Clusters clusterEvalQ(cl, sessionInfo()) assign(ic, value = parCapply(cl, ts, arma_search, min_order=search_min_order , max_order=search_max_order, ic=ic)) # Column par Apply #test = apply(ts, 2, arma_search, min_order=search_min_order , max_order=search_max_order, ic=ic) # Column par Apply # HIC stopCluster(cl) } # save the summary in a list consisting of an entry for each process summary_list[[paste0(AR,"-",MA)]] <- list(coef = c(setup[["ar"]], setup[["ma"]]), simulations = ts, ic = list(AIC, BIC, HIC)) } } save(summary_list, file="summary.RData") str(summary_list[[1]]$ic, max=1) ic=c(1,2,3) names(ic)=c("AIC", "BIC", "HIC") for(list in 1:length(summary_list)){ for(i in ic) {# information criteria model_matrix <- matrix(NA, ncol=6, nrow=6) dimnames(model_matrix)<-list(c("p=0","p=1", "p=2", "p=3", "p=4", "p=5"), c("q=0","q=1", "q=2", "q=3", "q=4", "q=5")) for(ar in search_min_order[1]:search_max_order[1]){ # for all models we have looked into for(ma in search_min_order[2]:search_max_order[2]){ print(paste0("ARMA(p,q) mit p=", ar, " und q=", ma, ": ", sum(sapply(summary_list[[list]]$ic[[i]], function(x) x[1]==ar && x[2]==ma), na.rm=TRUE)/1000*100)) model_matrix[paste0("p=",ar), paste0("q=",ma)] <- sum(sapply(summary_list[[list]]$ic[[i]], function(x) x[1]==ar && x[2]==ma), na.rm=TRUE)/1000*100 } } if(sum(model_matrix)!=100) print(paste0("ERROR; Sum was: ",sum(model_matrix))) # Generate heatmap orders <- as.numeric(unlist(strsplit(names(summary_list)[list] , "-"))) #print(paste0(names(ic)[i], "_p" , orders[1] , "_q", orders[2] ,"_n1000reps1000.pdf")) pdf(paste0(names(ic)[i], "_p" , orders[1] , "_q", orders[2] ,"_n1000reps1000.pdf"), width=16, height=10) ic_heatmap(model_matrix, ic=names(ic)[i], p=orders[1], q=orders[2]) dev.off() } }