library(dplyr) # Data manipulation and transformation
library(ggplot2) # Data visualization
library(tidyverse) # Collection of tidyverse packages
library(forecast) # Time series forecasting
library(geepack) # Generalized Estimating Equation PackageForecasting Mortality Rates with a Stochastic PCA–GEE Model: An Application to Solvency Capital Requirements
Load packages
First, load the packages to be used
Dataset
Importing the dataset containing log(q_{cgxt}) values for males and females from AUT (Austria) and CZE (Czech Republic)
List of Dataset Filenames
# log(q_{cgxt}) values for the years 1991 to 2021
load("dat.RData")
# log(q_{cgxt}) values for the years 1991 to 2010
load("dattrn.RData")M0 <- dat
MB0 <- do.call(cbind, M0)
t <- 20
M <- dattrn
MB <- do.call(cbind, M)Construction of ASDRs DataFrame for Training Set
# Initialize vectors for training set
t <- 20
# Initialize an empty list 'kcList' to store the individual elements of kc
kcList <- list()
# Iterate through specific pairs of matrices in the list 'M'
# This loop runs for i = 1 and i = 3, thus iterating through the 1st-2nd and 3rd-4th matrices in M
for (i in c(1, 3)) {
# Extract the matrices corresponding to the current pair
matrix1 <- M[[i]] # Get the i-th matrix from M
matrix2 <- M[[i + 1]] # Get the (i+1)-th matrix from M
# Perform principal component analysis (PCA) on the first 15 columns of the combined matrices
# Then repeat the first principal component (PC1) 15 times
result1 <- rep(
prcomp(cbind(matrix1[, 1:15], matrix2[, 1:15]), center = FALSE, scale. = FALSE)$x[, 1],
times = 15
)
# Perform PCA on the columns 16 to 41 of the combined matrices
# Then repeat the first principal component (PC1) 26 times
result2 <- rep(
prcomp(cbind(matrix1[, 16:41], matrix2[, 16:41]), center = FALSE, scale. = FALSE)$x[, 1],
times = 26
)
# Perform PCA on the columns 42 to 91 of the combined matrices
# Then repeat the first principal component (PC1) 40 times
result3 <- rep(
prcomp(cbind(matrix1[, 42:91], matrix2[, 42:91]), center = FALSE, scale. = FALSE)$x[, 1],
times = 50
)
# Combine the repeated principal component results into a single vector
combined_results <- c(result1, result2, result3)
# Repeat the combined results 2 times and append them to 'kcList'
kcList <- c(kcList, rep(combined_results, 2))
}
# Flatten 'kcList' into a single vector 'kc1'
kc1 <- unlist(kcList)
# Create a new vector 'kc2' by squaring each element in 'kc1'
kc2 <- kc1^2
# Define age groups for the 'yngold0' vector
yngold0 <- c("Group[0,14]", "Group[15,40]", "Group[41,90]")
# Create a repeated sequence for 'yngold' with specified age groups
# The repetition pattern is based on the number of observations (t) in each age group
yngold <- rep(
c(
rep("Group[0,14]", t * 15), # Repeat "Group[0,14]" t * 15 times
rep("Group[15,40]", t * 26), # Repeat "Group[15,40]" t * 26 times
rep("Group[41,90]", t * 50) # Repeat "Group[41,90]" t * 40 times
),
4 # Repeat the entire pattern 4 times
)
# Create a vector for 'gender' with alternating repetitions of "Female" and "Male"
gender <- rep(c("Female", "Male"), each = t * 91, times = 2)
# Create a vector for 'Country' with repetitions of "AUT" and "CZE"
Country <- rep(c("AUT", "CZE"), each = t * 91 * 2)
# Initialize vectors for the training set
# 'year' is repeated for each observation in the dataset
year <- rep(1991:2010, times = 4 * 91)
# Define levels for 'age' factor from 0 to 90
age_levels <- factor(0:90)
# Create 'age' vector by repeating age values across all observations
age <- rep(0:90, each = t, times = 4)
# Calculate 'cohort' by subtracting age from year
cohort <- year - age
# Combine all vectors into a data frame called 'ASDRs' for the training set
ASDRs <- data.frame(
kc1, kc2, # Variables 'kc1' and 'kc2' from previous calculations
cohort, # Cohort calculated above
y = as.vector(MB), # Response variable 'y', converted to a vector
age, # Age vector
gender, # Gender vector
Country, # Country vector
year, # Year vector
yngold, # Age group vector
stringsAsFactors = FALSE # Do not automatically convert strings to factors
)
# Convert 'age' to a factor with specified levels
ASDRs$age <- factor(ASDRs$age, levels = age_levels)
# Convert 'age' to numeric for a separate variable 'agenum'
ASDRs$agenum <- as.numeric(ASDRs$age)
# Convert 'gender' and 'Country' to factors with specified levels
ASDRs$gender <- factor(ASDRs$gender, levels = c("Female", "Male"))
ASDRs$Country <- factor(ASDRs$Country, levels = c("AUT", "CZE"))
# Create a 'subject' variable by interacting 'Country', 'gender', and 'age'
ASDRs$subject <- interaction(ASDRs$Country, ASDRs$gender, ASDRs$age)# Display the structure of the resulting data frame to check the data types and structure
str(ASDRs)'data.frame': 7280 obs. of 11 variables:
$ kc1 : num 46 46.3 45.3 46 46.7 ...
$ kc2 : num 2118 2145 2051 2117 2184 ...
$ cohort : int 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 ...
$ y : num -5.01 -5.03 -5.18 -5.2 -5.33 ...
$ age : Factor w/ 91 levels "0","1","2","3",..: 1 1 1 1 1 1 1 1 1 1 ...
$ gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
$ Country: Factor w/ 2 levels "AUT","CZE": 1 1 1 1 1 1 1 1 1 1 ...
$ year : int 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 ...
$ yngold : chr "Group[0,14]" "Group[0,14]" "Group[0,14]" "Group[0,14]" ...
$ agenum : num 1 1 1 1 1 1 1 1 1 1 ...
$ subject: Factor w/ 364 levels "AUT.Female.0",..: 1 1 1 1 1 1 1 1 1 1 ...
# Display the first few rows of the resulting data frame to inspect the data
head(ASDRs) kc1 kc2 cohort y age gender Country year yngold agenum
1 46.02698 2118.483 1991 -5.007169 0 Female AUT 1991 Group[0,14] 1
2 46.31294 2144.888 1992 -5.033727 0 Female AUT 1992 Group[0,14] 1
3 45.28995 2051.179 1993 -5.184065 0 Female AUT 1993 Group[0,14] 1
4 46.00825 2116.759 1994 -5.196172 0 Female AUT 1994 Group[0,14] 1
5 46.73129 2183.814 1995 -5.332980 0 Female AUT 1995 Group[0,14] 1
6 46.91257 2200.789 1996 -5.332367 0 Female AUT 1996 Group[0,14] 1
subject
1 AUT.Female.0
2 AUT.Female.0
3 AUT.Female.0
4 AUT.Female.0
5 AUT.Female.0
6 AUT.Female.0
Plot of \(k_{ct}\)
# Create a new data frame 'ASDRsnw' by converting the 'year' column to numeric
ASDRsnw <- ASDRs %>%
mutate(year = as.numeric(as.character(year)))
# Rename the 9th column to "PC1"
colnames(ASDRsnw)[9] <- "PC1"
# Generate a ggplot with points and lines
ggplot(ASDRsnw) +
# Add points to the plot with year on the x-axis, kc1 on the y-axis, and color based on 'PC1'
geom_point(aes(x = year, y = kc1, color = PC1), size = 1.6) +
# Add lines to the plot connecting the points with the same x and y aesthetics
geom_line(aes(x = year, y = kc1, color = PC1), linewidth = .8) +
# Manually set the colors for the 'PC1' groups
scale_color_manual(values = c("Group[0,14]" = "red", "Group[15,40]" = "green", "Group[41,90]" = "blue")) +
# Set the labels for the x and y axes
xlab("Year") +
ylab("Value") +
# Create a facet grid for the plot by 'Country'
facet_grid(~Country) +
# Apply a clean, white background theme to the plot
theme_bw() +
# Customize the legend to have larger points
guides(color = guide_legend(override.aes = list(size = 3))) +
# Rotate the x-axis text labels to be vertical and adjust their alignment
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# Set the font size of the x-axis text labels
theme(axis.text.x = element_text(size = 20)) +
# Set the font size of the y-axis text labels
theme(axis.text.y = element_text(size = 25)) +
# Set the font size of the legend text
theme(legend.text = element_text(size = 35)) +
# Set the font size of the axis titles
theme(axis.title = element_text(size = 25)) +
# Set the font size of the strip text (facet labels)
theme(strip.text = element_text(size = 35)) +
# Set the font size of the legend title
theme(legend.title = element_text(size = 20)) +
# Position the legend at the bottom of the plot
theme(legend.position = "bottom")
Fitting Four GEE Models using the geeglm Function
# Fit GEE model with exchangeable correlation structure
geeEx <- geeglm(
y ~ -1 + gender + age +
gender:age:kc1 +
gender:age:kc2 +
cohort,
data = ASDRs, # Data frame containing the variables
id = subject, # Grouping factor for repeated measures
waves = year, # Time variable for repeated measures
corstr = "exchangeable", # Exchangeable correlation structure
weights = agenum / mean(agenum) # Weights based on 'agenum' normalized by its mean
)
# Perform an ANOVA (Analysis of Variance) on the fitted GEE model
anova(geeEx)Analysis of 'Wald statistic' Table
Model: gaussian, link: identity
Response: y
Terms added sequentially (first to last)
Df X2 P(>|Chi|)
gender 2 1599 < 2.2e-16 ***
age 90 44422 < 2.2e-16 ***
cohort 1 4134 < 2.2e-16 ***
gender:age:kc1 182 41956 < 2.2e-16 ***
gender:age:kc2 182 19573 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
geeExl <- geeglm(
y ~ -1 + gender + age +
gender:age:kc1 +
cohort,
data = ASDRs, # Data frame containing the variables
id = subject, # Grouping factor for repeated measures
waves = year, # Time variable for repeated measures
corstr = "exchangeable", # Exchangeable correlation structure
weights = agenum / mean(agenum) # Weights based on 'agenum' normalized by its mean
)
# Perform an ANOVA (Analysis of Variance) on the fitted GEE model
anova(geeExl)Analysis of 'Wald statistic' Table
Model: gaussian, link: identity
Response: y
Terms added sequentially (first to last)
Df X2 P(>|Chi|)
gender 2 1599 < 2.2e-16 ***
age 90 44422 < 2.2e-16 ***
cohort 1 4134 < 2.2e-16 ***
gender:age:kc1 182 41956 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit GEE model with AR(1) correlation structure
geeAr1 <- geeglm(
y ~ -1 + gender + age +
gender:age:kc1 +
gender:age:kc2 +
cohort,
data = ASDRs, # Data frame containing the variables
id = subject, # Grouping factor for repeated measures
waves = year, # Time variable for repeated measures
corstr = "ar1", # AR1 (autoregressive) correlation structure
weights = agenum / mean(agenum) # Weights based on 'agenum' normalized by its mean
)
# Perform an ANOVA (Analysis of Variance) on the fitted GEE model
anova(geeAr1) Analysis of 'Wald statistic' Table
Model: gaussian, link: identity
Response: y
Terms added sequentially (first to last)
Df X2 P(>|Chi|)
gender 2 1594 < 2.2e-16 ***
age 90 44332 < 2.2e-16 ***
cohort 1 2955 < 2.2e-16 ***
gender:age:kc1 182 49660 < 2.2e-16 ***
gender:age:kc2 182 90733 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
geeAr1l <- geeglm(
y ~ -1+gender + age +
gender:age:kc1 +
cohort,
data = ASDRs, # Data frame containing the variables
id = subject, # Grouping factor for repeated measures
waves = year, # Time variable for repeated measures
corstr = "ar1", # AR1 (autoregressive) correlation structure
weights = agenum / mean(agenum) # Weights based on 'agenum' normalized by its mean
)
# geeAr1
# coef(geeAr1)
# vcov(geeAr1)
# summary(geeAr1)
# coef(summary(geeAr1))
# Perform an ANOVA (Analysis of Variance) on the fitted GEE model
anova(geeAr1l)Analysis of 'Wald statistic' Table
Model: gaussian, link: identity
Response: y
Terms added sequentially (first to last)
Df X2 P(>|Chi|)
gender 2 1594 < 2.2e-16 ***
age 90 44332 < 2.2e-16 ***
cohort 1 2955 < 2.2e-16 ***
gender:age:kc1 182 49660 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
QIC
# Calculate and display the QIC value for the model with an exchangeable correlation structure
QIC_Ex <- QIC(geeEx)
QIC_Ex QIC QICu Quasi Lik CIC params QICC
803.2096 1169.9697 -127.9849 273.6199 457.0000 -3622.5167
QIC_Exl <- QIC(geeExl)
QIC_Exl QIC QICu Quasi Lik CIC params QICC
1650.4986 816.4186 -133.2093 692.0400 275.0000 3408.0158
# Calculate and display the QIC value for the model with an AR1 correlation structure
QIC_Ar1 <- QIC(geeAr1)
QIC_Ar1 QIC QICu Quasi Lik CIC params QICC
791.1382 1169.2705 -127.6353 267.9338 457.0000 -3634.5881
QIC_Ar1l <- QIC(geeAr1l)
QIC_Ar1l QIC QICu Quasi Lik CIC params QICC
1491.1470 815.1953 -132.5977 612.9758 275.0000 3248.6642
ASDRs$fit <- predict(geeAr1,ASDRs)Creation of New Dataset for the GEE Model Prediction
# Initialize vectors for test set
t <- 11
# Initialize an empty list 'kcList' to store the individual elements of kc
kcList <- list()
# Iterate through specific pairs of matrices in the list 'M'
# This loop runs for i = 1 and i = 3, thus iterating through the 1st-2nd and 3rd-4th matrices in M
for (i in c(1, 3)) {
matrix1 <- M[[i]] # Get the i-th matrix from M
matrix2 <- M[[i + 1]] # Get the (i+1)-th matrix from M to pair with matrix1
# Perform principal component analysis (PCA) on the first 15 columns of the combined matrices
# Append the first principal component (PC1) to 'kcList1'
kcList1 <- c(kcList, prcomp(cbind(matrix1[, 1:15], matrix2[, 1:15]), center = FALSE, scale. = FALSE)$x[,1])
# Perform PCA on columns 16 to 41 of the combined matrices
# Append the first principal component (PC1) to 'kcList2'
kcList2 <- c(kcList1, prcomp(cbind(matrix1[, 16:41], matrix2[, 16:41]), center = FALSE, scale. = FALSE)$x[,1])
# Perform PCA on columns 42 to 91 of the combined matrices
# Append the first principal component (PC1) to 'kcList'
kcList <- c(kcList2, prcomp(cbind(matrix1[, 42:91], matrix2[, 42:91]), center = FALSE, scale. = FALSE)$x[,1])
}
# Combine all elements in kcList into a single vector 'kc0'
kc0 <- unlist(kcList)
# length(kc0) # Display the length of 'kc0' to verify the size of the combined vector
# Initialize an empty vector 'ar' for storing forecasted values
ar <- c()
# Iterate through 6 subsets of kc0 to forecast future values using random walk with drift
for (i in 1:6) {
# Extract a subset of kc0 for the current iteration
subset_kc0 <- kc0[((i - 1) * 20 + 1):(i * 20)]
# Forecast the next 9 values using random walk with drift
kc_forecast <- rwf(subset_kc0, h = 11, drift = TRUE, level = c(80, 95))
# Extract the mean values from the forecast and append to 'ar'
ar <- append(ar, kc_forecast$mean[1:11])
}
# Reinitialize 'kcList' to store new combinations of 'ar'
kcList <- list()
indices <- seq(1, length(ar), by = 33)
# Iterate through the indices, combining and replicating blocks of 'ar'
for (i in indices) {
# Extract three consecutive blocks of 'ar', each of length 9
ar1 <- ar[i:(i + 10)]
ar2 <- ar[(i + 11):(i + 21)]
ar3 <- ar[(i + 22):(i + 32)]
# Replicate and combine the blocks according to the specified pattern
result1 <- rep(c(rep(ar1, 15), rep(ar2, 26), rep(ar3, 50)), 2)
# Append the result to 'kcList'
kcList <- c(kcList, result1)
}
# Combine all elements in kcList into a single vector 'kc1'
kc1 <- unlist(kcList)
# Create a new vector 'kc2' by squaring each element in 'kc1'
kc2 <- kc1^2# Set the value of 't' to 9, representing the number of years from 2011 to 2021
t<- 11
# Create a 'gender' vector with alternating repetitions of "Female" and "Male"
# Each gender is repeated 91 times per year, across 't' years, repeated 2 times (once for each country)
gender <- rep(c("Female", "Male"), each = 91 * t, times = 2)
# Create a 'Country' vector with repetitions of "AUT" and "CZE"
# Each country is repeated for 2 genders * 91 ages * 't' years
Country <- rep(c("AUT", "CZE"), each = 2 * 91 * t)
# Create a 'year' vector representing the years 2011 to 2021
# This sequence is repeated for 4 (2 countries * 2 genders) * 91 ages
year <- rep(2011:2021, times = 4 * 91)
# Define levels for the 'age' factor from 0 to 90
age_levels <- factor(0:90)
# Create an 'age' vector by repeating ages 0 to 90 for each year, repeated 4 times (2 countries * 2 genders)
age <- rep(0:90, each = 11, times = 4)
# Calculate 'cohort' by subtracting 'age' from 'year'
cohort <- year - age
# Combine all variables into a data frame called 'newASDRs' for the test set
newASDRs <- data.frame(
kc1, kc2, # Variables 'kc1' and 'kc2' from previous calculations
cohort, # Cohort calculated above
y = as.vector(MB0[21:31,]), # Response variable 'y', taken from rows 21 to 29 of 'MB0'
age, # Age vector
gender, # Gender vector
Country, # Country vector
year, # Year vector
stringsAsFactors = FALSE # Do not automatically convert strings to factors
)
# Convert 'age' to a factor with specified levels (0 to 90)
newASDRs$age <- factor(newASDRs$age, levels = age_levels)
# Convert 'gender' to a factor with specified levels ("Female", "Male")
newASDRs$gender <- factor(newASDRs$gender, levels = c("Female", "Male"))
# Convert 'Country' to a factor with specified levels ("AUT", "CZE")
newASDRs$Country <- factor(newASDRs$Country, levels = c("AUT", "CZE"))
# Convert 'year' to a factor with specified levels (2011 to 2021)
newASDRs$year <- factor(newASDRs$year, levels = 2011:2021)
# Create a 'subject' variable by interacting 'Country', 'gender', and 'age'
newASDRs$subject <- interaction(newASDRs$Country, newASDRs$gender, newASDRs$age)head(newASDRs) kc1 kc2 cohort y age gender Country year subject
1 50.11586 2511.600 2011 -5.891180 0 Female AUT 2011 AUT.Female.0
2 50.32031 2532.133 2012 -5.719324 0 Female AUT 2012 AUT.Female.0
3 50.52475 2552.750 2013 -5.922384 0 Female AUT 2013 AUT.Female.0
4 50.72919 2573.451 2014 -5.778350 0 Female AUT 2014 AUT.Female.0
5 50.93364 2594.235 2015 -5.843578 0 Female AUT 2015 AUT.Female.0
6 51.13808 2615.103 2016 -5.860367 0 Female AUT 2016 AUT.Female.0
# Define constants for the observed and forecasted periods
observed_period <- 20
forecast_period <- 11
total_period <- 31
# Create labels for observed and forecasted data
observed_label <- rep("Observed", observed_period)
forecasted_label <- rep("Forecasted", forecast_period)
# Combine observed and forecasted labels for all groups
observation_type <- rep(factor(c(observed_label, forecasted_label)), 6)
# Generate a sequence for years (1991-2021) repeated for each group
year_sequence <- rep(1991:2021, 6)
# Define countries and repeat for each group and time period
countries <- rep(c("AUT", "CZE"), each = 3 * total_period)
# Combine observed data (kc0) and forecasted data (ar) into a single vector
trend_data <- c(
kc0[1:20], ar[1:11],
kc0[21:40], ar[12:22],
kc0[41:60], ar[23:33],
kc0[61:80], ar[34:44],
kc0[81:100], ar[45:55],
kc0[101:120], ar[56:66]
)
# Define age groups and repeat for the appropriate time periods
age_groups <- c("Group[0,14]", "Group[15,40]", "Group[41,90]")
age_group_labels <- rep(rep(age_groups, each = total_period), 2)
# Combine all data into a data frame
trend_df <- data.frame(
Trend = trend_data,
Year = year_sequence,
Type = observation_type,
Country = countries,
AgeGroup = age_group_labels
)
# Rename the age group column for clarity
colnames(trend_df)[5] <- "PC1"
# Create the plot using ggplot2
ggplot(trend_df) +
geom_point(aes(x = Year, y = Trend, color = PC1), size = 1.6) +
geom_line(aes(x = Year, y = Trend, color = PC1), linewidth = 0.8) +
scale_color_manual(values = c("Group[0,14]" = "red", "Group[15,40]" = "green", "Group[41,90]" = "blue")) +
xlab("Year") +
ylab("Value") +
facet_grid(~Country) +
theme_bw() +
guides(color = guide_legend(override.aes = list(size = 1))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(axis.text.x = element_text(size = 10)) +
theme(axis.text.y = element_text(size = 15)) +
theme(legend.text = element_text(size = 10)) +
theme(axis.title = element_text(size = 15)) +
theme(strip.text = element_text(size = 10)) +
theme(legend.title = element_text(size = 10)) +
theme(legend.position = "bottom")
# Add predictions using the GEE model
newASDRs$pred <- predict(geeAr1, newdata = newASDRs)
library(dplyr)
library(akima)
library(ggplot2)
max(exp(newASDRs$pred - newASDRs$y))[1] 7.281146
# 1) Prep & interpolate
df <- newASDRs %>%
filter(Country=="AUT", gender=="Female") %>%
mutate(
year = as.numeric(as.character(year)),
age = as.numeric(as.character(age)),
ratio = exp(pred - y)
)
xi <- seq(min(df$year), max(df$year), length.out = 400)
yi <- seq(min(df$age), max(df$age), length.out = 400)
interp_res <- with(df, interp(year, age, ratio,
xo = xi, yo = yi,
duplicate = "mean"))Warning in interp(year, age, ratio, xo = xi, yo = yi, duplicate = "mean"):
collinear points, trying to add some jitter to avoid colinearities!
Warning in interp(year, age, ratio, xo = xi, yo = yi, duplicate = "mean"):
success: collinearities reduced through jitter
grid_df <- expand.grid(year = interp_res$x,
age = interp_res$y)
grid_df$ratio <- as.vector(interp_res$z)
# 2) Define jet palette & breaks
brks <- seq(min(exp(newASDRs$pred - newASDRs$y)), max(exp(newASDRs$pred - newASDRs$y)), by = 0.05)
jet.cols <- colorRampPalette(c("blue","cyan","green","yellow","red"))(length(brks)+1)
# 3) Plot with integer years on x‐axis
ggplot(grid_df, aes(x = year, y = age)) +
geom_tile(aes(fill = ratio)) +
scale_fill_gradientn(
name = "Pred / Actual",
colours = jet.cols,
limits = c(min(exp(newASDRs$pred - newASDRs$y)), max(exp(newASDRs$pred - newASDRs$y))),
oob = scales::squish
) +
geom_contour(aes(z = ratio),
breaks = brks,
colour = "black",
linewidth = 0.3) +
scale_x_continuous(
breaks = 2011:2021,
expand = c(0, 0)
) +
labs(
x = "Forecast Year",
y = "Age",
title = "Forecast / Actual mortality, AUT ♀, 2011–2021"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid = element_blank(),
legend.key.height = unit(1.2, "cm")
)Warning: Removed 436 rows containing non-finite outside the scale range
(`stat_contour()`).

ggsave("Projectedactual.pdf", width = 10, height = 6)Warning: Removed 436 rows containing non-finite outside the scale range
(`stat_contour()`).
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
conversion failure on 'Forecast / Actual mortality, AUT ♀, 2011–2021' in
'mbcsToSbcs': for ♀ (U+2640)
dev.off()null device
1
Calculate 95% prediction intervals using the geeAr1 model on newASDRs
# Generate the model matrix for the GEE model terms on the newASDRs data
model_matrix_gee <- model.matrix(terms(geeAr1), newASDRs)
# Calculate the predicted mortality rates based on the GEE model coefficients
newASDRs$predicted_rate_gee <- model_matrix_gee %*% coef(geeAr1)
# Calculate the standard errors of the predicted rates
predicted_rate_variance_gee <- diag(model_matrix_gee %*% tcrossprod(geeAr1$geese$vbeta, model_matrix_gee))
# Create a data frame to store the results, including lower and upper bounds of the prediction interval
newASDRs <- data.frame(
newASDRs,
lower_bound_gee = newASDRs$predicted_rate_gee - 2 * sqrt(predicted_rate_variance_gee),
upper_bound_gee = newASDRs$predicted_rate_gee + 2 * sqrt(predicted_rate_variance_gee)
)
# Display the head of the new data frame with predicted rates and prediction intervals
head(newASDRs,20) kc1 kc2 cohort y age gender Country year subject
1 50.11586 2511.600 2011 -5.891180 0 Female AUT 2011 AUT.Female.0
2 50.32031 2532.133 2012 -5.719324 0 Female AUT 2012 AUT.Female.0
3 50.52475 2552.750 2013 -5.922384 0 Female AUT 2013 AUT.Female.0
4 50.72919 2573.451 2014 -5.778350 0 Female AUT 2014 AUT.Female.0
5 50.93364 2594.235 2015 -5.843578 0 Female AUT 2015 AUT.Female.0
6 51.13808 2615.103 2016 -5.860367 0 Female AUT 2016 AUT.Female.0
7 51.34253 2636.055 2017 -5.863674 0 Female AUT 2017 AUT.Female.0
8 51.54697 2657.090 2018 -6.040600 0 Female AUT 2018 AUT.Female.0
9 51.75141 2678.209 2019 -5.960229 0 Female AUT 2019 AUT.Female.0
10 51.95586 2699.411 2020 -5.783741 0 Female AUT 2020 AUT.Female.0
11 52.16030 2720.697 2021 -5.852679 0 Female AUT 2021 AUT.Female.0
12 50.11586 2511.600 2010 -8.609330 1 Female AUT 2011 AUT.Female.1
13 50.32031 2532.133 2011 -8.623564 1 Female AUT 2012 AUT.Female.1
14 50.52475 2552.750 2012 -8.768652 1 Female AUT 2013 AUT.Female.1
15 50.72919 2573.451 2013 -8.779867 1 Female AUT 2014 AUT.Female.1
16 50.93364 2594.235 2014 -8.653196 1 Female AUT 2015 AUT.Female.1
17 51.13808 2615.103 2015 -8.229947 1 Female AUT 2016 AUT.Female.1
18 51.34253 2636.055 2016 -8.705034 1 Female AUT 2017 AUT.Female.1
19 51.54697 2657.090 2017 -8.478877 1 Female AUT 2018 AUT.Female.1
20 51.75141 2678.209 2018 -8.263856 1 Female AUT 2019 AUT.Female.1
pred predicted_rate_gee lower_bound_gee upper_bound_gee
1 -5.794480 -5.794480 -5.987103 -5.601857
2 -5.794831 -5.794831 -5.983903 -5.605760
3 -5.793071 -5.793071 -5.979230 -5.606911
4 -5.789198 -5.789198 -5.973368 -5.605028
5 -5.783213 -5.783213 -5.966620 -5.599805
6 -5.775116 -5.775116 -5.959293 -5.590939
7 -5.764907 -5.764907 -5.951670 -5.578144
8 -5.752586 -5.752586 -5.943991 -5.561180
9 -5.738153 -5.738153 -5.936433 -5.539872
10 -5.721607 -5.721607 -5.929094 -5.514120
11 -5.702950 -5.702950 -5.922001 -5.483899
12 -8.039216 -8.039216 -8.262040 -7.816393
13 -8.034442 -8.034442 -8.268451 -7.800434
14 -8.027900 -8.027900 -8.274181 -7.781619
15 -8.019590 -8.019590 -8.279268 -7.759911
16 -8.009511 -8.009511 -8.283737 -7.735285
17 -7.997664 -7.997664 -8.287606 -7.707723
18 -7.984050 -7.984050 -8.290885 -7.677214
19 -7.968667 -7.968667 -8.293579 -7.643754
20 -7.951515 -7.951515 -8.295688 -7.607343
Evaluating Forecast Accuracy: Mean Squared Error for GEE Model Predictions
# Initialize an empty vector to store the Mean Squared Error (MSE) values
MSE_test_gee_pca <- c()
# Iterate through the 4 datasets of predictions
for (n in 1:4) {
# Extract predicted values for the specific set (9 years, 91 observations each)
start_index <- (n - 1) * (11 * 91) + 1
end_index <- n * (11 * 91)
gee_predictions <- exp(matrix(newASDRs$pred[start_index:end_index], 11, 91, byrow = FALSE))
# Extract actual values for the corresponding set from the M0 list
actual_values <- exp(M0[[n]][21:31,])
# Calculate the residuals (errors between actual and predicted values)
gee_errors <- actual_values - gee_predictions
# Compute the Mean Squared Error (MSE) for the current set
MSE_test_gee_n <- sum(gee_errors^2) / (91 * 11)
# Append the computed MSE to the MSE vector
MSE_test_gee_pca <- append(MSE_test_gee_pca, MSE_test_gee_n)
}
# Display the vector of MSE values for all datasets
MSE_test_gee_pca[1] 5.289035e-06 8.993636e-06 1.179988e-05 3.396036e-05