params <- list(EVAL = TRUE) ## ---- echo = FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE, eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", fig.align = "center") library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) ## ----------------------------------------------------------------------------- load(url('https://github.com/atsa-es/MARSS/raw/master/data/lakeWAplankton.rda')) ## ----------------------------------------------------------------------------- outcomes <- c('Greens', 'Bluegreens', 'Diatoms', 'Unicells', 'Other.algae') ## ----------------------------------------------------------------------------- # loop across each plankton group to create the long datframe plankton_data <- do.call(rbind, lapply(outcomes, function(x){ # create a group-specific dataframe with counts labelled 'y' # and the group name in the 'series' variable data.frame(year = lakeWAplanktonTrans[, 'Year'], month = lakeWAplanktonTrans[, 'Month'], y = lakeWAplanktonTrans[, x], series = x, temp = lakeWAplanktonTrans[, 'Temp'])})) %>% # change the 'series' label to a factor dplyr::mutate(series = factor(series)) %>% # filter to only include some years in the data dplyr::filter(year >= 1965 & year < 1975) %>% dplyr::arrange(year, month) %>% dplyr::group_by(series) %>% # z-score the counts so they are approximately standard normal dplyr::mutate(y = as.vector(scale(y))) %>% # add the time indicator dplyr::mutate(time = dplyr::row_number()) %>% dplyr::ungroup() ## ----------------------------------------------------------------------------- head(plankton_data) ## ----------------------------------------------------------------------------- dplyr::glimpse(plankton_data) ## ----------------------------------------------------------------------------- plot_mvgam_series(data = plankton_data, series = 'all') ## ----------------------------------------------------------------------------- plankton_data %>% dplyr::filter(series == 'Other.algae') %>% ggplot(aes(x = time, y = temp)) + geom_line(size = 1.1) + geom_line(aes(y = y), col = 'white', size = 1.3) + geom_line(aes(y = y), col = 'darkred', size = 1.1) + ylab('z-score') + xlab('Time') + ggtitle('Temperature (black) vs Other algae (red)') ## ----------------------------------------------------------------------------- plankton_data %>% dplyr::filter(series == 'Diatoms') %>% ggplot(aes(x = time, y = temp)) + geom_line(size = 1.1) + geom_line(aes(y = y), col = 'white', size = 1.3) + geom_line(aes(y = y), col = 'darkred', size = 1.1) + ylab('z-score') + xlab('Time') + ggtitle('Temperature (black) vs Diatoms (red)') ## ----------------------------------------------------------------------------- plankton_train <- plankton_data %>% dplyr::filter(time <= 112) plankton_test <- plankton_data %>% dplyr::filter(time > 112) ## ----notrend_mod, include = FALSE, results='hide'----------------------------- notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), by = series) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, trend_model = 'None') ## ----eval=FALSE--------------------------------------------------------------- ## notrend_mod <- mvgam(y ~ ## # tensor of temp and month to capture ## # "global" seasonality ## te(temp, month, k = c(4, 4)) + ## ## # series-specific deviation tensor products ## te(temp, month, k = c(4, 4), by = series) - 1, ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, ## trend_model = 'None') ## ## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 1) ## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 2) ## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 3) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 1) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 2) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 3) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 1) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 3) ## ----------------------------------------------------------------------------- priors <- get_mvgam_priors( # observation formula, which has no terms in it y ~ -1, # process model formula, which includes the smooth functions trend_formula = ~ te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), by = trend) - 1, # VAR1 model with uncorrelated process errors trend_model = VAR(), family = gaussian(), data = plankton_train) ## ----------------------------------------------------------------------------- priors[, 3] ## ----------------------------------------------------------------------------- priors[, 4] ## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) ## ----var_mod, include = FALSE, results='hide'--------------------------------- var_mod <- mvgam(y ~ -1, trend_formula = ~ # tensor of temp and month should capture # seasonality te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, trend_model = VAR(), priors = priors, adapt_delta = 0.99, burnin = 1000) ## ----eval=FALSE--------------------------------------------------------------- ## var_mod <- mvgam( ## # observation formula, which is empty ## y ~ -1, ## ## # process model formula, which includes the smooth functions ## trend_formula = ~ te(temp, month, k = c(4, 4)) + ## te(temp, month, k = c(4, 4), by = trend) - 1, ## ## # VAR1 model with uncorrelated process errors ## trend_model = VAR(), ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, ## ## # include the updated priors ## priors = priors, ## silent = 2) ## ----------------------------------------------------------------------------- summary(var_mod, include_betas = FALSE) ## ----------------------------------------------------------------------------- plot(var_mod, 'smooths', trend_effects = TRUE) ## ----warning=FALSE, message=FALSE--------------------------------------------- A_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ A_pars[i, j] <- paste0('A[', i, ',', j, ']') } } mcmc_plot(var_mod, variable = as.vector(t(A_pars)), type = 'hist') ## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') } } mcmc_plot(var_mod, variable = as.vector(t(Sigma_pars)), type = 'hist') ## ----warning=FALSE, message=FALSE--------------------------------------------- mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist') ## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) ## ----varcor_mod, include = FALSE, results='hide'------------------------------ varcor_mod <- mvgam(y ~ -1, trend_formula = ~ # tensor of temp and month should capture # seasonality te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, trend_model = VAR(cor = TRUE), burnin = 1000, adapt_delta = 0.99, priors = priors) ## ----eval=FALSE--------------------------------------------------------------- ## varcor_mod <- mvgam( ## # observation formula, which remains empty ## y ~ -1, ## ## # process model formula, which includes the smooth functions ## trend_formula = ~ te(temp, month, k = c(4, 4)) + ## te(temp, month, k = c(4, 4), by = trend) - 1, ## ## # VAR1 model with correlated process errors ## trend_model = VAR(cor = TRUE), ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, ## ## # include the updated priors ## priors = priors, ## silent = 2) ## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') } } mcmc_plot(varcor_mod, variable = as.vector(t(Sigma_pars)), type = 'hist') ## ----------------------------------------------------------------------------- Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), nrow = 5, ncol = 5)) rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series) round(median_correlations, 2) ## ----------------------------------------------------------------------------- irfs <- irf(varcor_mod, h = 12) ## ----------------------------------------------------------------------------- plot(irfs, series = 3) ## ----------------------------------------------------------------------------- # create forecast objects for each model fcvar <- forecast(var_mod) fcvarcor <- forecast(varcor_mod) # plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score - score(fcvar, score = 'variogram')$all_series$score plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), max(abs(diff_scores), na.rm = TRUE)), bty = 'l', xlab = 'Forecast horizon', ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) abline(h = 0, lty = 'dashed') ## ----------------------------------------------------------------------------- # plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - score(fcvar, score = 'energy')$all_series$score plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), max(abs(diff_scores), na.rm = TRUE)), bty = 'l', xlab = 'Forecast horizon', ylab = expression(energy[VAR1cor]~-~energy[VAR1])) abline(h = 0, lty = 'dashed')