library(data.table) # Efficient data manipulation
library(HMDHFDplus) # Handle Mortality and HFDplus data
library(dplyr) # Data manipulation and transformation
library(ggplot2) # Elegant plotting
library(forecast) # Time series forecasting
library(car) # Companion to Applied Regression
library(mgcv) # Generalized additive models
library(itsadug) # Time series analysis and decomposition
Generalized Additive Forecasting Mortality
Abstract
This study introduces a novel Generalized Additive Mixed Model (GAMM) for mortality modelling, employing mortality covariates \(k_{t}\) and \(k_{ct}\) as proposed by Dastranj-Kolář (DK-LME). The GAMM effectively predicts age-specific death rates (ASDRs) in both single and multi-population contexts. Empirical evaluations using data from the Human Mortality Database (HMD) demonstrate the model’s exceptional performance in accurately capturing observed mortality rates. In the DK-LME model, the relationship between log ASDRs, and \(k_{t}\) did not provide a perfect fit. Our study shows that the GAMM addresses this limitation. Additionally, as discussed in the DK-LME model, ASDRs represent longitudinal data. The GAMM offers a suitable alternative to the DK-LME model for modelling and forecasting mortality rates. We will compare the forecast accuracy of the GAMM with both the DK-LME and Li-Lee models in multi-population scenarios, as well as with the Lee-Carter models in single population scenarios. Comparative analyses highlight the GAMM’s superior sample fitting and out-of-sample forecasting performance, positioning it as a promising tool for mortality modelling and forecasting.
Keywords: Longitudinal data analysis, Nonparametric modelling, Smoothing functions, Restricted maximum likelihood, Random walks with drift.
For more details, refer to the related paper: Generalized Additive Forecasting Mortality:
https://doi.org/10.48550/arXiv.2311.18698
Affiliation
Department of Mathematics and Statistics, Masaryk University, Kotlářská 2, 611 37 Brno, Czech Republic
Load packages
First, load necessary libraries
Dataset Import
Importing downloaded datasets from the Human Mortality Database (HMD) website
# List of Dataset Filenames
<- c("AUT.Deaths_5x1.txt", "AUT.Exposures_5x1.txt",
mylist "CZE.Deaths_5x1.txt", "CZE.Exposures_5x1.txt")
Mortality Data Processing and Rate Calculation for AUT and CZE
# Function to read HMD data and filter based on Year and Age
<- function(file_index) {
read_and_filter_data <- data.table::data.table(HMDHFDplus::readHMD(mylist[file_index]))
data 1961 <= Year & Year <= 2019 & 0 <= Age & Age <= 100]
data[
}
# Process female and male mortality data for datdx and datex
<- lapply(c(1, 3), function(i) list(
datdx Female = read_and_filter_data(i)$Female,
Male = read_and_filter_data(i)$Male)
)
<- lapply(c(2, 4), function(i) list(
datex Female = read_and_filter_data(i)$Female,
Male = read_and_filter_data(i)$Male)
)
# Calculate ratios of datdx to datex for both female and male
<- lapply(1:2, function(i) {
dat list(
Female = matrix(datdx[[i]]$Female / datex[[i]]$Female,
59, 22, byrow = TRUE),
Male = matrix(datdx[[i]]$Male / datex[[i]]$Male,
59, 22, byrow = TRUE)
)
})
# Unpack matrices for AUT
<- dat[[1]]$Female
AUT_Female <- dat[[1]]$Male
AUT_Male
# Unpack matrices for CZE
<- dat[[2]]$Female
CZE_Female <- dat[[2]]$Male
CZE_Male
<- list(AUT_Female, AUT_Male,
M0 CZE_Female, CZE_Male)
Construction of ASDRs DataFrame for Training Set
# Log-transformed Mortality Data Subset for Training
# Extract the first 50 rows of log-transformed mortality
# data for AUT and CZE females and males
<- lapply(M0, function(mat) log(mat[1:50,]))
M
# Combine matrices column-wise to create a new matrix MB
<- do.call(cbind, M)
MB
# Calculate row means of the MB matrix
<- rowMeans(MB)
l
# Replicate row means to create a vector k1, matching the
# dimensions of the training set
<- rep(l, times = 88)
k1
# Create a new vector 'k2' by squaring each element in 'k1'
<- k1^2
k2
# Define a function to process each pair of matrices
<- function(matrix1, matrix2, columns1, columns2, times1, times2)
process_matrices
{ <- rep(rowMeans(cbind(matrix1[, columns1], matrix2[, columns1])),
result1 times = times1)
<- rep(rowMeans(cbind(matrix1[, columns2], matrix2[, columns2])),
result2 times = times2)
return(rep(c(result1, result2), 2))
}
# Initialize an empty list 'kcList' to store individual elements of kc
<- list()
kcList
# Iterate through pairs of matrices in M
for (i in seq(1, length(M), by = 2)) {
# Extract matrices corresponding to the pairs
<- M[[i]]
matrix1 <- M[[i + 1]]
matrix2
# Process matrices and append to kcList
<- c(kcList, process_matrices(matrix1, matrix2, 1:10, 11:22, 10, 12))
kcList
}
# Combine all elements in kcList into a single vector 'kc1'
<- unlist(kcList)
kc1
# Create a new vector 'kc2' by squaring each element in 'kc1'
<- kc1^2
kc2
# Initialize vectors for training set
<- rep(1961:2010, times = 88)
year <- factor(c(0, 1, seq(5, 100, by = 5)))
age_levels <- rep(c(0, 1, seq(5, 100, by = 5)), each = 50, times = 4)
age <- year - age
cohort
# Initialize vectors for training set
<- c("Female", "Male")
gender_levels <- rep(gender_levels, each = 22 * 50, times = 2)
gender <- c("Group[0,40]", "Group[45,100]")
age_groups # Create a vector representing age groups for each observation
<- rep(c(rep(age_groups[1], 50 * 10), rep(age_groups[2], 50 * 12)), 4)
age_groups <- c("AUT", "CZE")
Country_levels <- rep(Country_levels, each = 44 * 50)
Country
# Combine results into data frame for training set
<- data.frame(k1, k2, kc1, kc2, cohort, y = as.vector(MB), age,
ASDRs stringsAsFactors = FALSE)
gender, Country,
# Convert factors to specified levels
$age <- factor(ASDRs$age, levels = age_levels)
ASDRs$gender <- factor(ASDRs$gender, levels = gender_levels)
ASDRs$Country <- factor(ASDRs$Country, levels = Country_levels)
ASDRs
# Create interaction variables for grouping
$Country_gender_age <- interaction(ASDRs$Country, ASDRs$gender, ASDRs$age)
ASDRs$gender_age <- interaction(ASDRs$gender, ASDRs$age) ASDRs
# Display the structure of the resulting data frame
str (ASDRs)
'data.frame': 4400 obs. of 11 variables:
$ k1 : num -4.5 -4.47 -4.5 -4.54 -4.52 ...
$ k2 : num 20.3 20 20.2 20.6 20.4 ...
$ kc1 : num -6.46 -6.5 -6.5 -6.51 -6.55 ...
$ kc2 : num 41.8 42.2 42.2 42.3 42.9 ...
$ cohort : num 1961 1962 1963 1964 1965 ...
$ y : num -3.53 -3.51 -3.57 -3.66 -3.67 ...
$ age : Factor w/ 22 levels "0","1","5","10",..: 1 1 1 1 1 1 1 1 1 1 ...
$ gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
$ Country : Factor w/ 2 levels "AUT","CZE": 1 1 1 1 1 1 1 1 1 1 ...
$ Country_gender_age: Factor w/ 88 levels "AUT.Female.0",..: 1 1 1 1 1 1 1 1 1 1 ...
$ gender_age : Factor w/ 44 levels "Female.0","Male.0",..: 1 1 1 1 1 1 1 1 1 1 ...
# Display the first 10 rows of the resulting data frame
head(ASDRs, 10)
k1 k2 kc1 kc2 cohort y age gender Country
1 -4.502050 20.26846 -6.464022 41.78358 1961 -3.525682 0 Female AUT
2 -4.467448 19.95809 -6.495653 42.19351 1962 -3.511197 0 Female AUT
3 -4.498229 20.23407 -6.499591 42.24468 1963 -3.567012 0 Female AUT
4 -4.536020 20.57548 -6.506412 42.33340 1964 -3.663142 0 Female AUT
5 -4.519100 20.42227 -6.548114 42.87780 1965 -3.670532 0 Female AUT
6 -4.524642 20.47239 -6.524282 42.56625 1966 -3.713301 0 Female AUT
7 -4.504795 20.29318 -6.533959 42.69262 1967 -3.797139 0 Female AUT
8 -4.506762 20.31091 -6.572200 43.19381 1968 -3.793644 0 Female AUT
9 -4.483010 20.09738 -6.547301 42.86715 1969 -3.845562 0 Female AUT
10 -4.484632 20.11192 -6.548822 42.88707 1970 -3.825803 0 Female AUT
Country_gender_age gender_age
1 AUT.Female.0 Female.0
2 AUT.Female.0 Female.0
3 AUT.Female.0 Female.0
4 AUT.Female.0 Female.0
5 AUT.Female.0 Female.0
6 AUT.Female.0 Female.0
7 AUT.Female.0 Female.0
8 AUT.Female.0 Female.0
9 AUT.Female.0 Female.0
10 AUT.Female.0 Female.0
Data Visualization
# Create a modified dataset for visualization
<- ASDRs %>%
ASDRsnw mutate(Age = age) %>%
mutate(year = as.numeric(as.character(year)))
# Create a ggplot visualizing ASDR trends over time, colored
# by age, and faceted by gender and Country
ggplot(ASDRsnw, aes(year, y, color = Age)) +
geom_point(size = 0.7) +
geom_line(linewidth = 0.5) +
facet_grid(gender ~ Country) +
xlab("Year") + ylab("ASDR (log)") +
theme(axis.text=element_blank(), axis.ticks=element_blank()) +
theme_bw() +
guides(color = guide_legend(override.aes = list(size =3))) +
theme(axis.text.x = element_text(angle=90, hjust=1))
Plot of \(k_{t}\)
ggplot(ASDRsnw, aes(year, y, color = Age)) +
geom_point(size = 0.7) +
geom_point(aes(x = year, y = k1), color = "black", size = 1) +
geom_line(aes(x = year, y = k1), color = "black", linewidth = 0.7) +
xlab("Year") +
ylab("ASDR (log)") +
theme_bw() +
guides(color = guide_legend(override.aes = list(size = 5)))
Plot of \(k_{ct}\)
ggplot(ASDRsnw, aes(year, y, color = Age)) +
geom_point(size = 0.3) +
geom_point(aes(x = year, y = kc1, color = age_groups), size = 0.6) +
geom_line(aes(x = year, y = kc1, color = age_groups), linewidth = 0.4) +
scale_color_manual(values = c("Group[0,40]" = "red", "Group[45,100]" = "blue")) +
xlab("Year") +
ylab("ASDR (log)") +
facet_grid(~Country) +
theme_bw() +
guides(color = guide_legend(override.aes = list(size = 3))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "bottom")
Fit a GAMM Using bam Function
# Fit a GAMM using bam function and measure the time taken
system.time({
<- bam(y ~ age + gender_age +
m1 # Intercept terms for age and gender-age interaction
s(kc1, bs = "ts") +
# Smooth term for kc1 using a thin-plate regression spline
s(kc1, by = gender_age, bs = "ts") +
# Smooth term for kc1 by gender-age interaction
s(cohort, bs = "ts") +
# Smooth term for cohort using a thin-plate regression spline
s(Country_gender_age, bs = "re") +
# Random effect for Country-gender-age interaction
s(k1, Country_gender_age, bs = "fs", m = 1) +
# Random factor smooth for k1 with Country-gender-age interactions
s(cohort, Country_gender_age, bs = "fs", m = 1),
# Random factor smooth for cohort with Country-gender-age interactions
data = ASDRs)
})
user system elapsed
394.25 11.20 461.86
Explanation
1. \(s(kc1, bs = "ts")\) : Smooth term for kc1 using a thin-plate regression spline
Mathematical Representation: \(f(kc1)\)
Represents a smooth term for kc1 using a thin-plate regression spline.
2. \(s(kc1, by = gender:age, bs = "ts")\) : Smooth term for kc1 by gender:age interaction
Represents a smooth term for kc1 by gender-age interaction, allowing different smooth curves for kc1 based on the combinations of gender and age. The smoothness is imposed on each combination independently.
3. \(s(k1, Country:gender:age, bs = "fs", m = 1)\) : Random factor smooth interaction for k1 with Country:gender:age interaction
Represents a random factor smooth interaction for k1 with Country-gender-age interaction using a factor-smooth basis (“fs”). This allows a common smoothness constraint across different levels of Country, gender, and age. This implies a random smooth effect for k1 shared across different levels of Country, gender, and age.
4. \(s(cohort, Country:gender:age, bs = "fs", m = 1)\) : Random factor smooth interaction for cohort with Country:gender:age interactions
Represents a random factor smooth interaction for cohort with Country-gender-age interactions using a factor smooth basis (“fs”). This implies a random smooth effect for the cohort variable shared across different levels of Country, gender, and age.
# Plot the autocorrelation function (ACF) of the model residuals
acf_resid(m1)
Check for Heteroskedasticity and Assess Normality
# Add fitted values and residuals to the ASDRs data frame
$fit <- fitted(m1)
ASDRs$res <- resid(m1)
ASDRs
# Scatter plot to check for heteroskedasticity
plot(ASDRs$fit, ASDRs$res,
main = "GAMM for the 4 Populations",
xlab = "Fitted Values", ylab = "Residuals",
pch = 1, frame = FALSE, cex = 0.5, col = "black"
)
# Quantile-quantile plot to assess normality assumption
qqPlot(ASDRs$res,
ylab = deparse(substitute())
)
[1] 2181 4351
Refine the Model
# Filter out data points with absolute residuals outside the range [-0.07, 0.07]
<- ASDRs[abs(ASDRs$res) <= 0.07, ]
ASDRs2
# Fit a GAMM to the refined dataset ASDRs2
system.time({
<- bam(y ~ age+gender_age+
m2 s(kc1,bs="ts")+
s(kc1,by=gender_age, bs="ts") +
s(cohort,bs="ts")+
s(Country_gender_age,bs="re") +
s(k1,Country_gender_age,bs="fs",m=1)+
s(cohort,Country_gender_age,bs="fs",m=1),
data=ASDRs2,
select = TRUE
# Enable selection penalties for smoother effects,
# allowing REML comparison of models
) })
user system elapsed
454.65 12.03 528.98
Check for Heteroskedasticity and Assess Normality (Refined Model)
# Add fitted values and residuals to the ASDRs2 data frame
$fit <- fitted(m2)
ASDRs2$res <- resid(m2)
ASDRs2
# Scatter plot to check for heteroskedasticity
plot(ASDRs2$fit, ASDRs2$res,
xlab = "Fitted Values", ylab = "Residuals",
pch = 1, frame = FALSE, cex = 0.09, col = "black"
)
# Quantile-quantile plot to assess normality assumption
qqPlot(ASDRs2$res,
ylab = deparse(substitute())
)
[1] 111 1119
# Print summary statistics of the GAMM
print(summary(m2))
Family: gaussian
Link function: identity
Formula:
y ~ age + gender_age + s(kc1, bs = "ts") + s(kc1, by = gender_age,
bs = "ts") + s(cohort, bs = "ts") + s(Country_gender_age,
bs = "re") + s(k1, Country_gender_age, bs = "fs", m = 1) +
s(cohort, Country_gender_age, bs = "fs", m = 1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.543354 0.301630 -5.117 3.31e-07 ***
age1 -3.077195 0.397813 -7.735 1.42e-14 ***
age5 -4.486662 0.332882 -13.478 < 2e-16 ***
age10 -3.979338 0.377617 -10.538 < 2e-16 ***
age15 -3.486161 0.290903 -11.984 < 2e-16 ***
age20 -3.151064 0.289236 -10.894 < 2e-16 ***
age25 -3.029308 0.324161 -9.345 < 2e-16 ***
age30 -2.989491 0.286581 -10.432 < 2e-16 ***
age35 -2.720658 0.285910 -9.516 < 2e-16 ***
age40 -2.729674 0.357736 -7.630 3.16e-14 ***
age45 -5.694335 0.344039 -16.551 < 2e-16 ***
age50 -4.537635 0.343731 -13.201 < 2e-16 ***
age55 -4.030300 0.343651 -11.728 < 2e-16 ***
age60 -3.566703 0.343871 -10.372 < 2e-16 ***
age65 -3.877051 0.344372 -11.258 < 2e-16 ***
age70 -2.808841 0.345099 -8.139 5.86e-16 ***
age75 -2.780184 0.346130 -8.032 1.38e-15 ***
age80 -1.849454 0.347396 -5.324 1.09e-07 ***
age85 -1.578434 0.348834 -4.525 6.29e-06 ***
age90 -0.917206 0.358083 -2.561 0.010475 *
age95 -0.626256 0.355871 -1.760 0.078551 .
age100 1.159337 3.165969 0.366 0.714252
gender_ageMale.0 1.375182 2.474404 0.556 0.578416
gender_ageFemale.1 0.000000 0.000000 NaN NaN
gender_ageMale.1 0.339560 0.418167 0.812 0.416848
gender_ageFemale.5 0.000000 0.000000 NaN NaN
gender_ageMale.5 1.143994 0.398782 2.869 0.004151 **
gender_ageFemale.10 -0.678034 0.351784 -1.927 0.054026 .
gender_ageMale.10 0.000000 0.000000 NaN NaN
gender_ageFemale.15 -1.089046 0.215503 -5.054 4.61e-07 ***
gender_ageMale.15 0.000000 0.000000 NaN NaN
gender_ageFemale.20 -1.136416 0.094528 -12.022 < 2e-16 ***
gender_ageMale.20 0.000000 0.000000 NaN NaN
gender_ageFemale.25 -0.987734 0.221423 -4.461 8.48e-06 ***
gender_ageMale.25 0.000000 0.000000 NaN NaN
gender_ageFemale.30 -0.904790 0.200943 -4.503 6.98e-06 ***
gender_ageMale.30 0.000000 0.000000 NaN NaN
gender_ageFemale.35 -0.604116 0.062529 -9.661 < 2e-16 ***
gender_ageMale.35 0.000000 0.000000 NaN NaN
gender_ageFemale.40 -0.779184 0.336873 -2.313 0.020794 *
gender_ageMale.40 0.000000 0.000000 NaN NaN
gender_ageFemale.45 0.000000 0.000000 NaN NaN
gender_ageMale.45 0.619801 0.048487 12.783 < 2e-16 ***
gender_ageFemale.50 -0.571564 0.154525 -3.699 0.000221 ***
gender_ageMale.50 0.000000 0.000000 NaN NaN
gender_ageFemale.55 -0.809849 0.048847 -16.579 < 2e-16 ***
gender_ageMale.55 0.000000 0.000000 NaN NaN
gender_ageFemale.60 -0.778948 0.053805 -14.477 < 2e-16 ***
gender_ageMale.60 0.000000 0.000000 NaN NaN
gender_ageFemale.65 0.000000 0.000000 NaN NaN
gender_ageMale.65 0.692174 0.079251 8.734 < 2e-16 ***
gender_ageFemale.70 -0.574908 0.070085 -8.203 3.50e-16 ***
gender_ageMale.70 0.000000 0.000000 NaN NaN
gender_ageFemale.75 0.000000 0.000000 NaN NaN
gender_ageMale.75 0.376813 0.137231 2.746 0.006073 **
gender_ageFemale.80 -0.326683 0.091539 -3.569 0.000365 ***
gender_ageMale.80 0.000000 0.000000 NaN NaN
gender_ageFemale.85 0.000000 0.000000 NaN NaN
gender_ageMale.85 0.160520 0.155277 1.034 0.301333
gender_ageFemale.90 -0.103188 0.175105 -0.589 0.555714
gender_ageMale.90 0.000000 0.000000 NaN NaN
gender_ageFemale.95 0.000000 0.000000 NaN NaN
gender_ageMale.95 -0.003264 0.202707 -0.016 0.987154
gender_ageFemale.100 0.000000 0.000000 NaN NaN
gender_ageMale.100 3.207742 5.844574 0.549 0.583157
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(kc1) 4.193e+00 9 115.234 < 2e-16 ***
s(kc1):gender_ageFemale.0 9.478e-01 9 0.944 2.78e-06 ***
s(kc1):gender_ageMale.0 3.948e+00 9 3.910 < 2e-16 ***
s(kc1):gender_ageFemale.1 9.752e-01 9 0.989 1.10e-06 ***
s(kc1):gender_ageMale.1 9.632e-01 9 1.076 7.78e-07 ***
s(kc1):gender_ageFemale.5 4.075e-01 9 0.074 0.04469 *
s(kc1):gender_ageMale.5 1.007e+00 9 1.626 < 2e-16 ***
s(kc1):gender_ageFemale.10 6.421e-01 9 0.187 0.00607 **
s(kc1):gender_ageMale.10 8.612e-01 9 0.555 5.42e-05 ***
s(kc1):gender_ageFemale.15 5.015e-01 9 0.106 0.02074 *
s(kc1):gender_ageMale.15 1.825e-05 9 0.000 0.42429
s(kc1):gender_ageFemale.20 1.870e-05 9 0.000 0.79114
s(kc1):gender_ageMale.20 2.034e-05 9 0.000 0.38221
s(kc1):gender_ageFemale.25 2.747e-01 9 0.042 0.07285 .
s(kc1):gender_ageMale.25 4.377e-01 9 0.085 0.04332 *
s(kc1):gender_ageFemale.30 5.285e-01 9 0.121 0.02490 *
s(kc1):gender_ageMale.30 2.962e-04 9 0.000 0.12172
s(kc1):gender_ageFemale.35 3.613e-04 9 0.000 0.13391
s(kc1):gender_ageMale.35 5.249e-05 9 0.000 0.19760
s(kc1):gender_ageFemale.40 9.071e-01 9 0.730 6.00e-05 ***
s(kc1):gender_ageMale.40 8.017e-01 9 0.378 0.00213 **
s(kc1):gender_ageFemale.45 1.397e-05 7 0.000 0.78753
s(kc1):gender_ageMale.45 2.154e-05 7 0.000 0.53495
s(kc1):gender_ageFemale.50 5.196e-01 7 0.153 0.05172 .
s(kc1):gender_ageMale.50 1.884e-05 7 0.000 0.84382
s(kc1):gender_ageFemale.55 1.435e-05 7 0.000 0.69966
s(kc1):gender_ageMale.55 4.078e-05 7 0.000 0.25546
s(kc1):gender_ageFemale.60 7.951e-05 7 0.000 0.21250
s(kc1):gender_ageMale.60 1.916e-05 7 0.000 0.77660
s(kc1):gender_ageFemale.65 1.795e-05 7 0.000 0.80744
s(kc1):gender_ageMale.65 6.785e-02 7 0.010 0.16405
s(kc1):gender_ageFemale.70 4.662e-04 7 0.000 0.18855
s(kc1):gender_ageMale.70 2.226e-05 7 0.000 0.56798
s(kc1):gender_ageFemale.75 1.821e-05 7 0.000 0.83326
s(kc1):gender_ageMale.75 3.398e-01 7 0.073 0.10163
s(kc1):gender_ageFemale.80 1.839e-05 7 0.000 0.91198
s(kc1):gender_ageMale.80 1.843e-05 7 0.000 0.92500
s(kc1):gender_ageFemale.85 3.395e-05 7 0.000 0.31362
s(kc1):gender_ageMale.85 3.657e-01 7 0.082 0.09613 .
s(kc1):gender_ageFemale.90 3.140e-01 7 0.065 0.10780
s(kc1):gender_ageMale.90 1.568e-01 7 0.027 0.14577
s(kc1):gender_ageFemale.95 5.884e-02 7 0.009 0.15918
s(kc1):gender_ageMale.95 5.211e-01 7 0.154 0.05173 .
s(kc1):gender_ageFemale.100 2.707e+00 9 3.438 < 2e-16 ***
s(kc1):gender_ageMale.100 2.882e+00 9 5.952 < 2e-16 ***
s(cohort) 7.357e+00 9 5.733 < 2e-16 ***
s(Country_gender_age) 2.141e+00 44 0.053 < 2e-16 ***
s(k1,Country_gender_age) 3.350e+02 748 1.087 < 2e-16 ***
s(cohort,Country_gender_age) 2.839e+02 747 3.154 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Rank: 2130/2151
R-sq.(adj) = 1 Deviance explained = 100%
fREML = -6203.1 Scale est. = 0.00090126 n = 3566
# Create and display four plots based on the GAMM 'm2'
plot(m2, ylim = c(-0.6, 0.9), select = 24)
plot(m2, ylim = c(-0.6, 0.2), select = 46)
plot(m2, ylim = c(-0.094, 0.08), select = 48)
plot(m2, ylim = c(-0.64, 0.45), select = 49)
# Plot the autocorrelation function (ACF) of the model residuals
acf_resid(m2)
Building a New ASDRs DataFrame for the Test Set
# Use random walk with drift to forecast future values of k
<- rwf(
k_forecast # Time series data for forecasting
l, h = 9, # Forecast horizon (next 9 time points)
drift = TRUE, # Include a drift term in the random walk
level = c(80, 95) # Confidence levels for prediction intervals
)
# Repeat the mean forecast values 88 times
<- rep(k_forecast$mean[1:9], times = 4*22)
k1
# Create a new vector 'k2' by squaring each element in 'k1'
<- k1^2
k2
# Initialize an empty list 'kcList' to store individual elements of kc
<- list()
kcList
# Iterate through pairs of matrices in M,
# combining and calculating row means for specific columns
for (i in c(1, 3)) {
<- M[[i]]
matrix1 <- M[[i + 1]] # Adjust the index to access the second matrix
matrix2
# Extract row means for the first 10 columns and append to 'kcList'
<- c(kcList, rowMeans(cbind(matrix1[, 1:10], matrix2[, 1:10])))
kcList
# Extract row means for the last 14 columns and append to 'kcList'
<- c(kcList, rowMeans(cbind(matrix1[, 11:22], matrix2[, 11:22])))
kcList
}
# Combine all elements in kcList into a single vector 'kc0'
<- unlist(kcList)
kc0
# Initialize an empty vector 'ar' for storing forecasted values
<- c()
ar
# Iterate through 12 subsets of kc0 to forecast future values
# using random walk with drift
for (i in 1:4) {
# Extract a subset of kc0 for the current iteration
<- kc0[((i - 1) * 50 + 1):(i * 50)]
subset_kc0
# Forecast the next 9 values using random walk with drift
<- rwf(subset_kc0, h = 9, drift = TRUE, level = c(80, 95))
kc_forecast
# Extract the mean values from the forecast and append to 'ar'
<- append(ar, kc_forecast$mean[1:9])
ar
}
# Initialize an empty list 'kcList' for storing individual elements of kc
<- list()
kcList
# Define the sequence of indices for iterating through 'ar'
<- seq(1, length(ar), by = 18)
indices
# Iterate through the indices, combining and replicating blocks of 'ar'
for (i in indices) {
# Extract two consecutive blocks of 'ar'
<- ar[i:(i + 8)]
ar1 <- ar[(i + 9):(i + 17)]
ar2
# Replicate and combine the blocks according to the specified pattern
<- rep(c(rep(ar1, 10), rep(ar2, 12)), 2)
result1
# Append the result to 'kcList'
<- c(kcList, result1)
kcList
}
# Combine all elements in kcList into a single vector 'kc1'
<- unlist(kcList)
kc1
# Create a new vector 'kc2' by squaring each element in 'kc1'
<- kc1^2
kc2
# Initialize vectors for test set
<- rep(2011:2019, times = 88)
year <- factor(c(0, 1, seq(5, 100, by = 5)))
age_levels <- rep(c(0, 1, seq(5, 100, by = 5)), each = 9, times = 4)
age <- year - age
cohort
# Initialize vectors for test set
<- c("Female", "Male")
gender_levels <- rep(gender_levels, each = 22 * 9, times = 2)
gender <- c("AUT", "CZE")
Country_levels <- rep(Country_levels, each = 44 * 9)
Country
# Combine results into data frame for test set
<- data.frame(k1, k2, kc1, kc2, cohort, age,
newASDRs stringsAsFactors = FALSE)
gender, Country,
# Convert factors to specified levels
$age <- factor(newASDRs$age, levels = age_levels)
newASDRs$gender <- factor(newASDRs$gender, levels = gender_levels)
newASDRs$Country <- factor(newASDRs$Country, levels = Country_levels)
newASDRs$Country_gender_age <- interaction(newASDRs$Country,
newASDRs$gender, newASDRs$age)
newASDRs$gender_age <- interaction(newASDRs$gender, newASDRs$age)
newASDRs
# Add predictions to the 'newASDRs' data frame using the GAMM 'm2'
$pred <- predict(m2, newdata = newASDRs) newASDRs
Evaluating Forecast Accuracy: Mean Squared Error for GAMM Predictions (ASDRs)
# Compute Mean Squared Error (MSE) for GAMM Forecasting
# Initialize an empty vector to store MSE values
<- numeric()
MSE_test_gamm
# Iterate through the 4 data sets of predictions
for (n in 1:4) {
# Extract predicted values for the specific
# set (9 years, 22 observations each)
<- exp(
gamm_predictions matrix(newASDRs$pred[(((n - 1) * (9 * 22)) + 1):(n * (9 * 22))],
9, 22, byrow = FALSE)
)
# Extract actual values for the corresponding set
<- M0[[n]][51:59,]
actual_values
# Calculate the residuals (prediction errors)
<- actual_values - gamm_predictions
gamm_errors
# Compute MSE for the set and append to the vector
<- sum(gamm_errors[, 1:22]^2) / (22 * 9)
MSE_test_gamm_n <- c(MSE_test_gamm, MSE_test_gamm_n)
MSE_test_gamm
}
# Display the vector of MSE values for GAMM forecasting
MSE_test_gamm
[1] 5.500247e-05 7.385705e-04 7.923831e-05 7.815464e-04