############### Info:
# Date: 2015-07-06 by SR
# A full model search by "arma_search" will applied to personally choosen DGPS (within this model class)
# the outcome can be plotted as a heatmap
# Added: best criterion for all information criteria


############### Predefined functions
arma_search 	<- function( 	x,  min_order=c(0,0), max_order=c(5,5), include.mean=FALSE, 
								method="mle", ic=c("AIC", "BIC", "HIC"), trace=FALSE ){ 
  # Input: 
  # - xx multivariate time series
  # - minOrder: minimal ARMA(p,q)-order allowed
  # - maxOrder: maximal ARMA(p,q)-order allowed
  # - trace: trace the alogrithm and model search
  # Output:
  # - list( aic=best_aic, fit=best_fit, model=best_model )
  library(fArma)
  best_ic 		<- 1e9
  len 			<- nrow( x ) #length of series
  for( p in min_order[1]:max_order[1] ) for( q in min_order[2]:max_order[2] ){ 
    # for all allowed combinations of p,q
    if( p == 0 && q == 0 ){    
      next
    }    
    formula 		<- as.formula( paste0( "x ~ arma(", p ,",", q , ")" ) )
    
    fit 			<- tryCatch( armaFit( formula, data=x , method=method, include.mean=include.mean),
								error=function( err ) FALSE,
								warning=function( warn ) FALSE )
    if( !is.logical( fit ) ){ 
		if(ic == "AIC") { # AIC
			fit_ic 		<- fit@fit$aic
		}else if(ic == "BIC"){ # BIC
			n_params 	<- length(fit@fit$coef)+1 # +1 for variance
			n_obs 		<- length(fit@data$x)
			fit_ic 		<- -2*fit@fit$loglik + n_params * log(n_obs)
		}else if(ic == "HIC"){ # HIC
			n_params 	<- length(fit@fit$coef)+1 # +1 for variance
			n_obs 		<- length(fit@data$x)
			fit_ic 		<- -2*fit@fit$loglik +2* n_params * log(log(n_obs))
		}else{print( "No IC chosen" )
		}
      
		if( fit_ic <  best_ic ){    
			best_ic 	<- fit_ic
			best_fit 	<- fit
			best_model 	<- c( p, q )
		}    
		if( trace ){    
			ss 			<- paste0("(", p, ",", q, "): ", ic ," = ", fit_ic )
			print( ss ) 
		}	    
    } else{    
		if( trace ){    
			ss 			<- paste( sep="", "(", p, ",", q, "): None" )
			print( ss ) 
		}    
    }    
	}
	if(  best_ic < 1e9 ){
		#return( list( ic=best_ic, fit=best_fit, model=best_model ) )
		return( c(best_model, best_fit@fit$coef) )
	}
  return( FALSE )
}



ic_heatmap <- function(model_matrix, ic, p,q){
  # Input: 
  # - model_matrix: pxq matrix with appearances choosen in the model search
  # - ic: information criterion for title
  # - p,q choosen orders by model search
  # Output:
  # - heatmap for plot
	my_palette 	<- colorRampPalette(c("red", "yellow", "green"))(n = 99) # 100 percent
	col_breaks 	<- c(	seq(0,10,length=40),                # for red
						seq(11,40,length=30),               # for yellow
						seq(41,100,length=30))              # for green
  
  hm 			<- heatmap.2(	model_matrix,
								cellnote = model_matrix,  # same data set for cell labels
								main = paste0("IC: ",ic," (DGP: ARMA(", p, ",",q,"))"), # heat map title
								notecol="black",      # change font color of cell labels to black
								density.info="none",  # turns off density plot inside color legend
								trace="none",         # turns off trace lines inside the heat map
								#margins =c(10,10),     # widens margins around plot
								col=my_palette,       # use on color palette defined earlier 
								breaks=col_breaks,    # enable color transition at specified limits
								dendrogram="none",     # only draw a row dendrogram
								Colv="NA", Rowv="NA", # turn off column and row clustering
								srtCol=0,   adjCol = c(0.5,1),
								key = TRUE)
	return(hm)
}

############### Packages
library("RColorBrewer")
library("dynlm")
library("parallel")
library("fArma")
library("gplots")

############### Params and Data
# setups are lists with specifications of
# - reps: number of replications
# - n: number of observations
# - ar: vector of AR coefs with order a1, a2, ...
# - ma: vector ma coefs with order b1, b2, ...
# - min_order / max_order: the model set in which to search, i.e.
# all models within ARMA(min_oder[1], min_order[2]) to ARMA(max_oder[1], max_order[2])

set.seed(101)

min_order 		<- c(0,0) # p_min = 0 and q_min = 0
max_order 		<- c(3,3) # p_max = 3 and q_max = 3


### Define your setup:
# 1st case
setup 			<- list(reps = 1000, ar = c(0.8, -0.2, 0.5),ma = c(NULL), var = 1, n=1000)  # ARMA(1,0) with alpha_1=0.8

# 2nd case
setup 			<- list(reps = 1000, ar = c(0.3, 0.5),ma = c(NULL), var = 1, n=1000) 

# 3d case
setup 			<- list(reps = 1000, ar = c(0.3,0.5),ma = c(0.2,-0.3), var = 1, n=1000)

# Test for stationarity
if(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1) print("Not stationary")

# reps replications of ARMA(p,q) samples with n observations
ts 				<- replicate(	setup[["reps"]], 
								arima.sim(n = setup[["n"]], 
								list(ar = c(setup[["ar"]]), ma=c(setup[["ma"]]), sd = sqrt(setup[["var"]]))))

# for each criterion: do a full model search by function "arma_search"
# for this setup
for(ic in c("AIC", "BIC", "HIC")){
	cl 				<- makeCluster(7)
	clusterEvalQ(cl, {library(fArma)}) # send FArma library to all Clusters
	clusterExport(cl, varlist=c("ts", "arma_search"), envir=environment()) # send necessary objects of global environments to all Clusters
	clusterEvalQ(cl, sessionInfo())
	assign(ic, value = parCapply(cl, ts, arma_search, min_order=min_order , max_order=max_order, ic=ic)) # Column par Apply
	stopCluster(cl)
}

ic 				<- list(AIC, BIC, HIC)
#save(ic, file=paste0("ic_ar",length(setup["ar"]) ,"_ma",length(setup["ar"]),".RData"))
#save(ic, file=paste0("ic_ar08.RData"))
#save(ic, file=paste0("ic_ar03ar05.RData"))
#save(ic, file=paste0("ic_ar03ar05ma02mam03.RData"))

# load old data:
load("ic_ar03ar05.RData")
load("ic_ar03ar05ma02mam03.RData")
load("ic_ar08.RData")

### densities
pdf("vergleich_kriterien.pdf", width=16, height=10)
plot(density(sapply(ic_ar08[[1]], function(x) x["ar1"])), col="red", ylim=c(0,25))
abline(v=0.8, col="black", cex=2)
lines(density(sapply(ic_ar08[[2]], function(x) x["ar1"])), col="blue", lwd=1)
lines(density(sapply(ic_ar08[[3]], function(x) x["ar1"])), col="green", lwd=1)

legend("topright", inset=0.02,
       legend=c("AIC","BIC","HIC"),
       col=c("red", "blue", "green"),
       lty=c(1,1,1,1), lwd=c(1,1,1,1))
dev.off()


# analysis
load("ic_ar08.RData")
lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0
lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(1,0)))/2000*100) # # 63,8% - 99,7% -91,8%


load("ic_ar03ar05.RData")
lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0
lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(2,0)))/2000*100) # # 77.6 - 87.15 - 94.65


load("ic_ar03ar05ma02mam03.RData")
lapply(ic_ar08, function(x) sum(sapply(x, function(x) is.null(x["ar1"])))) # 0-0-0
lapply(ic_ar08, function(x) sum(sapply(x, function(x) x[1:2] == c(2,2)))/2000*100) # # 38,55 - 12,15% - 24,65%


#########################################################
## Heatmaps and Model Matrix Generation
#########################################################

ic				<- c(1,2,3)
names(ic)		<-c("AIC", "BIC", "HIC")
orders 			<- matrix(c(1,0,2,0, 2,2), ncol=2, byrow=T)

counter=0
for(r_data in c("ic_ar08","ic_ar03ar05","ic_ar03ar05ma02mam03")){ # load each data set or each process
	counter 		<- counter +1
	print(r_data)
	load(paste0(r_data, ".RData"))
	for(k in ic){  # for each information criterion
		print(names(ic)[k])  
		### Generate a Model Matrix: i.e. a 4x4 matrix with percentages of chosen models
		model_matrix <- matrix(NA, ncol=4, nrow=4)
		dimnames(model_matrix)<-list(c("p=0","p=1", "p=2", "p=3"), c("q=0","q=1", "q=2", "q=3"))
		for(ar in min_order[1]:max_order[1]){ # for all models we have looked into
			for(ma in min_order[2]:max_order[2]){
				print(paste0("ARMA(p,q) mit p=", ar, " und q=", ma, ": ", sum(sapply(ic_ar08[[k]], function(x) x[1]==ar && x[2]==ma),na.rm=TRUE)/1000*100))
				model_matrix[paste0("p=",ar), paste0("q=",ma)] <- sum(sapply(ic_ar08[[k]], function(x) x[1]==ar && x[2]==ma),na.rm=TRUE)/1000*100
			}
		}
    if(sum(model_matrix)!=100) print(paste0("ERROR; Sum was: ",sum(model_matrix))) 
    print(paste0(names(ic)[k], "_p" , orders[counter,1] , "_q", orders[counter,2] ,"_n1000reps1000.pdf"))
    # Generate heatmap
    pdf(paste0(names(ic)[k], "_p" , orders[counter,1] , "_q", orders[counter,2] ,"_n1000reps1000.pdf"), width=16, height=10)
    ic_heatmap(model_matrix, ic=names(ic)[k], p=orders[counter,1], q=orders[counter,2])
    dev.off()
  }  
}


#########################################################
## best criterion for all information criteria
## For all processes ARMA(0-3,0-3)
#########################################################


set.seed(101)

min_order        <- c(3,0) # p_min = 0 and q_min = 0
max_order        <- c(3,3) # p_max = 3 and q_max = 3
search_min_order <- c(0,0) # p_min = 0 and q_min = 0
search_max_order <- c(5,5) # p_max = 3 and q_max = 3
var              <- 1      # variance of shocks
reps             <- 1000   # number of replications of time series
n_obs            <- 1000   # number of observations per time series

summary_list <- list()

for(AR in min_order[1]:max_order[1]){
  for(MA in min_order[2]:max_order[2]){
		setup 			<- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs)  # ARMA(1,0) with alpha_1=0.8
    # Test for stationarity: Get new DGP values if not stationary
	if(AR > 0 && MA == 0){
      #print("here")
      while(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1.001){
        print("ARpMA0 Reboot")
        setup 				<- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs)        
      }   
    } else if(AR == 0 && MA > 0){
      #print("here2")
      while(max(abs(polyroot(c(1, -setup[["ma"]]))))<= 1.001){
        print("AR0MAq Reboot")
        setup 				<- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs)        
      }
    } else if(AR > 0 && MA > 0){
      print("here3")
      while(max(abs(polyroot(c(1, -setup[["ar"]]))))<= 1.001 && max(abs(polyroot(c(1, -setup[["ma"]]))))<= 1.001){
        print("ARpMAq Reboot")
        setup 				<- list(reps = reps, ar = runif(AR, min=-1, max=1), ma = runif(MA, min=-1, max=1), var = var, n = n_obs)        
      }
    }
    
    print(paste0("AR (",AR,") Parameter: " , round(setup[["ar"]],2) , " und MA(",MA,") Parameter: ",  round(setup[["ma"]], 2)))
    
    # reps replications of ARMA(p,q) samples with n observations
    ts 				<- replicate(	setup[["reps"]], 
									arima.sim(n = setup[["n"]], 
									list(ar = c(setup[["ar"]]), ma=c(setup[["ma"]]), 
									sd = sqrt(setup[["var"]]))))
    
    
    
    for(ic in c("AIC", "BIC", "HIC")){
      cl 			<- makeCluster(7)
      clusterEvalQ(	cl, {library(fArma)}) # send FArma library to all Clusters
      clusterExport(cl, varlist=c("ts", "arma_search"), envir=environment()) # send necessary objects of global environments to all Clusters
      clusterEvalQ(cl, sessionInfo())
      assign(ic, value = parCapply(cl, ts, arma_search, min_order=search_min_order , max_order=search_max_order, ic=ic)) # Column par Apply
      #test =  apply(ts, 2, arma_search, min_order=search_min_order , max_order=search_max_order, ic=ic) # Column par Apply
      # HIC
      stopCluster(cl)
    }
    
  
   # save the summary in a list consisting of an entry for each process
   summary_list[[paste0(AR,"-",MA)]] <- list(coef = c(setup[["ar"]], setup[["ma"]]),
                                             simulations = ts,
                                             ic = list(AIC, BIC, HIC))                                         
  }
}

save(summary_list, file="summary.RData")
str(summary_list[[1]]$ic, max=1)


ic=c(1,2,3)
names(ic)=c("AIC", "BIC", "HIC")

for(list in 1:length(summary_list)){
  for(i in ic) {# information criteria
    model_matrix <- matrix(NA, ncol=6, nrow=6)
    dimnames(model_matrix)<-list(c("p=0","p=1", "p=2", "p=3", "p=4", "p=5"), c("q=0","q=1", "q=2", "q=3", "q=4", "q=5"))
    for(ar in search_min_order[1]:search_max_order[1]){ # for all models we have looked into
      for(ma in search_min_order[2]:search_max_order[2]){
        print(paste0("ARMA(p,q) mit p=", ar, " und q=", ma, ": ", 
                     sum(sapply(summary_list[[list]]$ic[[i]], function(x) x[1]==ar && x[2]==ma), na.rm=TRUE)/1000*100))
        model_matrix[paste0("p=",ar), paste0("q=",ma)] <- sum(sapply(summary_list[[list]]$ic[[i]], function(x) x[1]==ar && x[2]==ma), na.rm=TRUE)/1000*100
      }
    }
    if(sum(model_matrix)!=100) print(paste0("ERROR; Sum was: ",sum(model_matrix))) 
    # Generate heatmap
    orders <- as.numeric(unlist(strsplit(names(summary_list)[list] , "-")))
    #print(paste0(names(ic)[i], "_p" , orders[1] , "_q", orders[2] ,"_n1000reps1000.pdf"))
    pdf(paste0(names(ic)[i], "_p" , orders[1] , "_q", orders[2] ,"_n1000reps1000.pdf"), width=16, height=10)
    ic_heatmap(model_matrix, ic=names(ic)[i], p=orders[1], q=orders[2])
    dev.off()
  }
}