### from here: ### http://ellisp.github.io/blog/2015/11/15/linear-model-timeseries/ getwd() http://toddwschneider.com/posts/analyzing-1-1-billion-nyc-taxi-and-uber-trips-with-a-vengeance/ http://dmlc.ml/rstats/2015/11/03/training-deep-net-with-R.html # Installation fo GraphicsMagick library(animation) library(dplyr) library(tidyr) library(ggplot2) library(gridExtra) library(showtext) #-------------set up------------- # Fonts and themes: font.add.google("Poppins", "myfont") showtext.auto() theme_set(theme_light(base_family = "myfont")) # sample and population size: n <- 200 popn <- n * 10 ar_par <- 0.99 #----------simulate data--------- set.seed(123) # Linear model with a time series random element, n * 10 in length: # DGP: y = 0.3 * x + u where u_t = 0.8 * u_t-1 + e_t and e_t ~ IID df1 <- data.frame( x = rnorm(popn)) %>% mutate(y = 0.3 * x + scale(arima.sim(list(ar = ar_par), popn)), ind = 1:popn, type = "TimeSeries") # cut back to just the first n points: df1 <- df1[1:n, ] # Same linear model, with i.i.d. white noise random element: # DGP: y = 0.3 * x + u where u_t ~ IID df2 <- data.frame( x = rnorm(n)) %>% mutate(y = 0.3 * x + rnorm(n), ind = 1:n, type = "CrossSection") # draw the time series response for DGP 1: (p0 <- df1 %>% ggplot(aes(x = ind, y = y)) + geom_line() + labs(x = "Time") + ggtitle("Y from linear model with time series random element")) # draw the time series response for DGP 2: (p0 <- df2 %>% ggplot(aes(x = ind, y = y)) + geom_line() + labs(x = "Time") + ggtitle("Y from linear model with iid random element")) df_both <- rbind(df1, df2) saveGIF({ for(i in 5:n){ # I name the images i + 1000 so alphabetical order is also numeric df1_tmp <- df1[1:i, ] df2_tmp <- df2[1:i, ] residuals1 <- data.frame(res = residuals(lm(y ~ x, data = df1_tmp)), ind = 1:i, type = "TimeSeries") residuals2 <- data.frame(res = residuals(lm(y ~ x, data = df2_tmp)), ind = 1:i, type = "CrossSection") # connected scatter plots: p1 <- ggplot(df_both[c(1:i, (n + 1) : (n + i)), ], aes(x, y, colour = ind)) + facet_wrap(~type, ncol = 2) + geom_path() + geom_point() + geom_abline( slope = 0.3) + geom_smooth(method = "lm", formula = y ~ 1 + x, se = TRUE, size = 2, colour = "red") + theme(legend.position = "none") + xlim(range(df_both$x)) + ylim(range(df_both$y)) + ggtitle(paste("Connected scatterplot showing regression on first", i, "points")) # Residuals plots p2 <- residuals1 %>% rbind(residuals2) %>% mutate(type = factor(type, levels = c("CrossSection", paste0("TimeSeries with a_1 = ", ar_par)))) %>% ggplot(aes(x = ind, y = res)) + scale_x_continuous(limits = c(0, n)) + facet_wrap(~type) + geom_line() + geom_point() + ggtitle("Residuals from regression so far") + labs(x = "Time", y = "Residuals") grid.arrange(p1, p2) }}, movie.name = "srl_with_ar_error.gif", interval = 0.1, ani.width = 600, ani.height = 600)