#################################################
### AOE ARIMA Prediction
### This R Script contains Non-seasonal and Seasonal ARIMA prediction

library("zoo")
library("dynlm")
library("forecast")
library("fpp") # for datasets; install.packages("fpp")

#################################################
### Non-seasonal ARIMA
# a)import BMW data and construct vector with adjusted price series
setwd("T:/Lehre/SS_2016/AOE/Aufgabenkatalog/data")
BMW 		<- read.csv("BMWData.csv")
price 		<- rev(BMW$Adj.Close)
names(price)<- rev(BMW$Date) #assign names to the elements in the price-vector;
lprice 		<- log(price)
lreturn 	<- diff(lprice)

plot(lreturn, type = "l")


# b) Use auto.arima for model fit

model_fit 	<- auto.arima(lreturn, max.p=5, max.q=5, max.order=10, max.d=1, start.p=2, start.q=2, 
							stationary=FALSE, seasonal=FALSE, ic="aicc", stepwise=TRUE, trace=TRUE, 
							allowdrift=FALSE, allowmean=TRUE)
summary(model_fit)

# c) Use arima function for manual model fit:
summary(arima(lreturn, order = c(1, 0, 3), include.mean = FALSE))

# d) predict using forecast-function H = 50 steps ahead
H 			<- 5
plot(forecast(model_fit, h = H))
forecasts 	<- forecast(model_fit, h = H)$mean 

# e) doing a manual forecast
coef 		<- model_fit$coef

y 			<- c(lreturn, rep(0, times = H))
resid		<- c(model_fit$residuals, rep(0, times = H))

for(h in 1:(H)){ # h = 1 
	T		 	<- length(lreturn)+h-1
	# y_T+1 = alpha_1^hat * y_T + theta_1^hat epsilon_T^hat + theta_2^hat epsilon_T-1^hat + ... if 
	y[T + 1] 	<- coef["ar1"] * y[T]  + coef[-1] %*% resid[(T-2):T] 
	print(T)
	print(coef["ar1"] * y[T])
	print(resid[(T-2):T]) 
	print(coef[-1] %*% resid[(T-2):T]) 
	print(y[T + 1])
}

# Manual vs. functional forecast:
y[2406:(2406+H-1)] - forecasts

# f) compare different models by calculating the RMSE (root mean squared error)
getrmse <- function(x,h,...)
{	
  train.end 	<- time(x)[length(x)-h] 
  test.start 	<- time(x)[length(x)-h+1]
  train 		<- window(x,end=train.end) # training sample
  test 			<- window(x,start=test.start) # testing sample
  fit 			<- Arima(train,...)
  fc 			<- forecast(fit,h=h)
  return(accuracy(fc,test)[2,"RMSE"]) # ?accuracy of forecasts fc vs. realized testing values
}

getrmse(lreturn,h=24,order=c(1,0,3))
getrmse(lreturn,h=24,order=c(1,0,2))
getrmse(lreturn,h=24,order=c(2,0,3))
getrmse(lreturn,h=24,order=c(2,0,2))
getrmse(lreturn,h=24,order=c(3,0,3))
getrmse(lreturn,h=24,order=c(3,0,2))

#################################################
### Seasonal ARIMA in the case of monthly cortecosteroid drug sales in Australia
# m = 12 
lh02 		<- log(h02)
par(mfrow=c(2,1))
plot(h02, ylab="H02 sales (million scripts)", xlab="Year")
plot(lh02, ylab="Log H02 sales", xlab="Year")

tsdisplay(diff(lh02,12), main="Seasonally differenced H02 scripts", xlab="Year") #In the plots of the seasonally differenced data, there are spikes in the PACF at lags 12 and 24, but nothing at seasonal lags in the ACF. This may be suggestive of a seasonal AR(2) term. In the non-seasonal lags, there are three significant spikes in the PACF suggesting a possible AR(3) term. The pattern in the ACF is not indicative of any simple model. 

fit 		<- Arima(h02, order=c(3,0,1), seasonal=c(0,1,2), lambda=0)
fit_2 		<- Arima(h02, order=c(3,0,1), seasonal=c(2,1,0), lambda=0)



tsdisplay(residuals(fit))
Box.test(residuals(fit), lag=36, fitdf=6, type="Ljung") # The residuals from this model are shown in Figure 8.22. There are significant spikes in both the ACF and PACF, and the model fails a Ljung-Box test. The model can still be used for forecasting, but the prediction intervals may not be accurate due to the correlated residuals

plot(forecast(fit), ylab="H02 sales (million scripts)", xlab="Year")