source("R/data_helpers.R")
-source("R/model_helpers.R")
-source("R/plotting_helpers.R")source("R/data_helpers.R")
+source("R/model_helpers.R")
+source("R/plotting_helpers.R")diff --git a/static/files/sardal-notebook.html b/static/files/sardal-notebook.html index 4d32f0e..0ac7343 100644 --- a/static/files/sardal-notebook.html +++ b/static/files/sardal-notebook.html @@ -2340,7 +2340,7 @@ window.Quarto = { -
+The data was provided as a set of Excel .xlsx files with sheets for the various isotopes. The data was manually put in CSV format, which is an open standard which is suitable for later import into R. The choice was made to separate the data by type (discharge or measurement at Särdal) and isotope. For measurements, a further distinction was made between species of Fucus spp.. The structure of the pre-processed data is shown below:
.
-├── discharges
-│ ├── discharges_C14.csv
-│ ├── discharges_Co60.csv
-│ ├── ...
-└── sardal
- ├── C14
- │ └── sardal_activity_C14_F_Various_Mattsson_2025.csv
- ├── Co60
- │ ├── sardal_activity_Co60_F_Serratus_Mattsson_2025.csv
- │ └── sardal_activity_Co60_F_Vesiculosus_Mattsson_2025.csv
- ├── Cs134
- │ ├── ...
- ├── ...
- └── ....
+├── discharges
+│ ├── discharges_C14.csv
+│ ├── discharges_Co60.csv
+│ ├── ...
+└── sardal
+ ├── C14
+ │ └── sardal_activity_C14_F_Various_Mattsson_2025.csv
+ ├── Co60
+ │ ├── sardal_activity_Co60_F_Serratus_Mattsson_2025.csv
+ │ └── sardal_activity_Co60_F_Vesiculosus_Mattsson_2025.csv
+ ├── Cs134
+ │ ├── ...
+ ├── ...
+ └── ...obs_date column, giving one sardal dataframe df_sardalsardal_path <- "data/sardal"
-sardal_files <- list.files(sardal_path, full.names = TRUE, pattern = "*.csv", recursive = TRUE) %>% set_names()
-sardal_df_list <- purrr::map(sardal_files, read_csv,
- col_types = cols(date = col_date(format = "%Y-%m-%d")))
-
-sardal_df_list <- map2(sardal_df_list, names(sardal_df_list),
- function(.x, .y){dplyr::select(.x, 1:2) %>%
- set_names(nm=c("obs_date", get_species_name(.y)))}) %>%
- map(~drop_na(.x))
-
-df_sardal <- data.frame(obs_date = get_full_time_range_by_day(sardal_df_list))
-df_sardal <- map(sardal_df_list, ~ full_join(df_sardal, .x, by = "obs_date"))
-df_sardal <- reduce(df_sardal, full_join, by = "obs_date")
-
-df_long_sardal <- df_sardal %>%
- pivot_longer(
- cols = -obs_date,
- names_to = "variable",
- values_to = "value"
- )sardal_path <- "data/sardal"
+sardal_files <- list.files(sardal_path, full.names = TRUE, pattern = "*.csv", recursive = TRUE) %>% set_names()
+sardal_df_list <- purrr::map(sardal_files, read_csv,
+ col_types = cols(date = col_date(format = "%Y-%m-%d")))
+
+sardal_df_list <- map2(sardal_df_list, names(sardal_df_list),
+ function(.x, .y){dplyr::select(.x, 1:2) %>%
+ set_names(nm=c("obs_date", get_species_name(.y)))}) %>%
+ map(~drop_na(.x))
+
+df_sardal <- data.frame(obs_date = get_full_time_range_by_day(sardal_df_list))
+df_sardal <- map(sardal_df_list, ~ full_join(df_sardal, .x, by = "obs_date"))
+df_sardal <- reduce(df_sardal, full_join, by = "obs_date")
+
+df_long_sardal <- df_sardal %>%
+ pivot_longer(
+ cols = -obs_date,
+ names_to = "variable",
+ values_to = "value"
+ )The same for steps are applied for the discharges, except we have no species and have a year time column instead of obs_date.
discharges_path <- "data/discharges"
-discharges_files <- list.files(discharges_path, full.names = TRUE, pattern = "*.csv", recursive = TRUE) %>% set_names()
-discharges_df_list <- purrr::map(discharges_files, read_csv)Rows: 54 Columns: 3
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (3): year, sellafield_TBq, la_hague_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 52 Columns: 5
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (5): year, sellafield_TBq, la_hague_TBq, barseback_TBq, winfrith_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 53 Columns: 3
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (3): year, sellafield_TBq, la_hague_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 53 Columns: 3
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (3): year, sellafield_TBq, la_hague_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 70 Columns: 3
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (3): year, sellafield_TBq, la_hague_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 40 Columns: 2
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (2): year, sellafield_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-Rows: 71 Columns: 3
-── Column specification ────────────────────────────────────────────────────────
-Delimiter: ","
-dbl (3): year, sellafield_TBq, la_hague_TBq
-
-ℹ Use `spec()` to retrieve the full column specification for this data.
-ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
-discharges_df_list <- map2(discharges_df_list, names(discharges_df_list),
- function(.x, .y) {
- isotope_name <- get_isotope_name(.y)
- column_names <- colnames(.x)[-1] # exclude year col.
- new_column_names <- c(colnames(.x)[1], paste(isotope_name, column_names, sep = "_")) # concatenate isotope name to cols.
-
- .x %>%
- set_names(nm = new_column_names) # update col. names
- }) %>%
- map(~drop_na(.x))
-
-df_discharges <- data.frame(year = get_full_time_range_by_year(discharges_df_list))
-df_discharges <- map(discharges_df_list, ~ full_join(df_discharges, .x, by = "year"))
-df_discharges <- reduce(df_discharges, full_join, by = "year")
-
-df_long_discharges <- df_discharges %>%
- pivot_longer(
- cols = -year,
- names_to = "variable",
- values_to = "value"
- )discharges_path <- "data/discharges"
+discharges_files <- list.files(discharges_path, full.names = TRUE, pattern = "*.csv", recursive = TRUE) %>% set_names()
+discharges_df_list <- purrr::map(discharges_files, read_csv)
+
+discharges_df_list <- map2(discharges_df_list, names(discharges_df_list),
+ function(.x, .y) {
+ isotope_name <- get_isotope_name(.y)
+ column_names <- colnames(.x)[-1] # exclude year col.
+ new_column_names <- c(colnames(.x)[1], paste(isotope_name, column_names, sep = "_")) # concatenate isotope name to cols.
+
+ .x %>%
+ set_names(nm = new_column_names) # update col. names
+ }) %>%
+ map(~drop_na(.x))
+
+df_discharges <- data.frame(year = get_full_time_range_by_year(discharges_df_list))
+df_discharges <- map(discharges_df_list, ~ full_join(df_discharges, .x, by = "year"))
+df_discharges <- reduce(df_discharges, full_join, by = "year")
+
+df_long_discharges <- df_discharges %>%
+ pivot_longer(
+ cols = -year,
+ names_to = "variable",
+ values_to = "value"
+ )data_Cs137 <- combine_data("Cs137", df_long_sardal, df_long_discharges)
-p <- plot_isotope(data_Cs137)
-pdata_Cs137 <- combine_data("Cs137", df_long_sardal, df_long_discharges)
+p <- plot_isotope(data_Cs137)
+por Co60:
data_Co60 <- combine_data("Co60", df_long_sardal, df_long_discharges)
-p <- plot_isotope(data_Co60)
-pdata_Co60 <- combine_data("Co60", df_long_sardal, df_long_discharges)
+p <- plot_isotope(data_Co60)
+pFor statistical analysis it is very important to know how much data one has, and how it is structured, and whether there are any missing values or irregularities. A first step could be to see how many unique daily measurements and unique yearly measurements we have:
sum_daily <- df_sardal %>%
- group_by(obs_date) %>%
- summarize(across(where(is.numeric), ~ any(!is.na(.x)))) %>%
- summarize(across(-obs_date, sum)) %>%
- pivot_longer(everything())
-
-sum_yearly <- df_sardal %>%
- mutate(year = year(obs_date)) %>%
- group_by(year) %>%
- summarize(across(where(is.numeric), ~ any(!is.na(.x)))) %>%
- summarize(across(-year, sum)) %>%
- pivot_longer(everything())
-
-sums_table <- merge(sum_daily, sum_yearly, by = 'name') %>%
- rename(days = value.x, years = value.y)
-
-sums_tablesum_daily <- df_sardal %>%
+ group_by(obs_date) %>%
+ summarize(across(where(is.numeric), ~ any(!is.na(.x)))) %>%
+ summarize(across(-obs_date, sum)) %>%
+ pivot_longer(everything())
+
+sum_yearly <- df_sardal %>%
+ mutate(year = year(obs_date)) %>%
+ group_by(year) %>%
+ summarize(across(where(is.numeric), ~ any(!is.na(.x)))) %>%
+ summarize(across(-year, sum)) %>%
+ pivot_longer(everything())
+
+sums_table <- merge(sum_daily, sum_yearly, by = 'name') %>%
+ rename(days = value.x, years = value.y)
+
+sums_table name days years
1 C14_F_Various 72 54
@@ -2707,9 +2645,9 @@ dbl (3): year, sellafield_TBq, la_hague_TBq
Here is a visualization of the above information, showing the gaps in measured activity concentration (on a yearly scale):
df_long_sardal %>%
- filter(!(variable %in% c("I129_F_Serratus_Winter", "I129_F_Serratus_Summer"))) %>%
- plot_na_values()df_long_sardal %>%
+ filter(!(variable %in% c("I129_F_Serratus_Winter", "I129_F_Serratus_Summer"))) %>%
+ plot_na_values()Autocorrelation is a measure of correlation of a time series with a lagged (delayed) version of itself. This means it can quantify if, and how much, past values affect the present value.
-This is very important to be aware of
-lapply(names(df_discharges)[-1], function(col) {
- col_data <- na.omit(df_discharges[[col]])
- acf(col_data, plot = TRUE, main = paste("ACF Plot for", format_site_label(col)))
-})[[1]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.670 0.462 0.254 0.107 0.190 0.163 0.304 0.293 0.125 -0.017
- 11 12 13 14 15 16
--0.149 -0.152 -0.136 -0.153 -0.155 -0.269
-
-[[2]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.948 0.877 0.801 0.727 0.647 0.566 0.487 0.413 0.339 0.260
- 11 12 13 14 15 16
- 0.183 0.119 0.059 0.010 -0.043 -0.096
-
-[[3]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.517 0.385 0.223 -0.230 -0.408 -0.547 -0.496 -0.453 -0.289 0.003
- 11 12 13 14
- 0.059 0.256 0.316 0.209
-
-[[4]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.780 0.507 0.328 0.168 0.074 0.025 -0.044 -0.098 -0.124 -0.156
- 11 12 13 14
--0.178 -0.186 -0.192 -0.205
-
-[[5]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.414 0.156 -0.012 0.291 0.195 0.108 -0.247 -0.216 0.003 0.246
- 11 12 13 14
- 0.074 -0.127 -0.120 0.039
-
-[[6]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.744 0.404 0.303 0.259 0.178 0.123 0.048 -0.012 -0.045 -0.074
- 11 12 13 14
--0.111 -0.134 -0.146 -0.158
-
-[[7]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.838 0.636 0.510 0.393 0.279 0.200 0.131 0.074 0.026 -0.005
- 11 12 13 14 15 16
--0.027 -0.044 -0.052 -0.059 -0.064 -0.069
-
-[[8]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.383 0.200 0.255 0.179 0.182 0.225 0.204 0.122 0.088 0.113
- 11 12 13 14 15 16
- 0.162 0.099 0.065 0.109 0.003 -0.035
-
-[[9]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.906 0.796 0.705 0.580 0.451 0.348 0.235 0.134 0.047 0.002
- 11 12 13 14 15 16 17
--0.030 -0.054 -0.066 -0.075 -0.080 -0.087 -0.092
-
-[[10]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.465 0.344 0.333 0.255 0.214 0.240 0.219 0.158 0.139 0.154
- 11 12 13 14 15 16 17
- 0.192 0.109 0.083 0.073 0.006 -0.021 -0.028
-
-[[11]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.749 0.558 0.249 0.070 -0.044 -0.114 -0.173 -0.309 -0.361 -0.332
- 11 12 13 14 15
--0.202 -0.045 0.017 0.056 -0.024
-
-[[12]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.796 0.575 0.385 0.180 -0.037 -0.202 -0.276 -0.237 -0.206 -0.208
- 11 12 13 14 15
--0.138 -0.049 -0.045 -0.019 0.026
-
-[[13]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.904 0.809 0.709 0.586 0.485 0.335 0.171 0.007 -0.138 -0.250
- 11 12 13 14 15 16
--0.360 -0.477 -0.560 -0.593 -0.584 -0.572
-
-[[14]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.783 0.465 0.286 0.217 0.180 0.177 0.086 -0.097 -0.227 -0.286
- 11 12 13 14 15
--0.309 -0.302 -0.261 -0.233 -0.219
-
-[[15]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.812 0.742 0.506 0.361 0.220 0.092 0.022 -0.024 -0.036 -0.046
- 11 12 13 14 15
--0.054 -0.059 -0.063 -0.066 -0.073
+Output is suppressed for the autocorrelation plots because there are many. To see them, consult the written report or clone the repo and run this notebook.
lapply(names(df_sardal)[-1], function(col) {
- col_data <- na.omit(df_sardal[[col]])
- acf(col_data, plot = TRUE, main = paste("ACF Plot for", format_species_label(col)))
-})lapply(names(df_discharges)[-1], function(col) {
+ col_data <- na.omit(df_discharges[[col]])
+ acf(col_data, plot = TRUE, main = paste("ACF Plot for", format_site_label(col)))
+})lapply(names(df_sardal)[-1], function(col) {
+ col_data <- na.omit(df_sardal[[col]])
+ acf(col_data, plot = TRUE, main = paste("ACF Plot for", format_species_label(col)))
+})[[1]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.979 0.958 0.935 0.912 0.889 0.865 0.842 0.819 0.795 0.770 0.744 0.718
- 13 14 15 16 17 18 19 20
-0.695 0.672 0.648 0.625 0.601 0.577 0.552 0.528
-
-[[2]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.696 0.654 0.526 0.499 0.432 0.425 0.373 0.331 0.198 0.226 0.154 0.195
- 13 14 15 16 17 18 19 20 21
-0.161 0.136 0.114 0.168 0.158 0.116 0.091 0.141 0.098
-
-[[3]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.673 0.614 0.628 0.649 0.698 0.590 0.521 0.551 0.513 0.521 0.487 0.413
- 13 14 15 16 17 18 19 20
-0.401 0.415 0.388 0.361 0.322 0.316 0.274 0.288
-
-[[4]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.719 0.617 0.466 0.419 0.302 0.280 0.224 0.201 0.146 0.148
- 11 12 13 14 15 16 17 18 19
- 0.143 0.146 0.138 0.089 0.115 0.079 0.050 0.018 -0.001
-
-[[5]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.485 0.280 0.274 0.285 0.170 -0.033 -0.140 -0.203 -0.139 -0.242
- 11 12 13 14
--0.459 -0.318 -0.285 -0.234
-
-[[6]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.846 0.787 0.731 0.714 0.694 0.689 0.679 0.669 0.678 0.669 0.712 0.708
- 13 14 15 16 17 18 19 20 21 22 23 24
-0.709 0.693 0.684 0.660 0.655 0.630 0.588 0.588 0.580 0.586 0.580 0.588
-
-[[7]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.845 0.786 0.763 0.753 0.738 0.742 0.720 0.689 0.701 0.649 0.602 0.607
- 13 14 15 16 17 18 19 20 21 22 23 24
-0.629 0.628 0.663 0.658 0.602 0.575 0.583 0.588 0.579 0.584 0.556 0.540
-
-[[8]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10 11 12
-1.000 0.614 0.367 0.336 0.294 0.319 0.370 0.382 0.460 0.340 0.188 0.231 0.237
- 13 14 15 16 17 18 19 20 21
-0.157 0.075 0.077 0.107 0.078 0.101 0.157 0.086 0.041
-
-[[9]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.474 0.537 0.325 0.137 0.300 0.232 0.315 0.256 0.210 0.127
- 11 12 13 14 15 16 17 18
- 0.096 0.035 0.099 0.049 0.056 0.037 -0.036 -0.042
-
-[[10]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.463 0.398 0.456 0.248 0.368 0.342 0.212 0.180 0.057 0.098
- 11 12 13 14 15 16 17 18 19
--0.044 -0.135 -0.073 -0.072 -0.141 -0.194 -0.208 -0.279 -0.254
-
-[[11]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.544 0.381 0.380 0.213 0.009 0.071 0.027 -0.099 -0.118 -0.207
- 11 12
--0.276 -0.240
-
-[[12]]
-
-Autocorrelations of series 'col_data', by lag
-
- 0 1 2 3 4 5 6 7 8 9 10
- 1.000 0.768 0.677 0.465 0.399 0.305 0.207 0.159 0.073 0.110 0.037
- 11 12 13 14
- 0.009 -0.067 -0.113 -0.177
-We can see that there is significant autocorrelation for almost all data. A good reason for the Fucus is that radionuclides stay stored for a while, only exiting the compartment by radioactive decay or biological processes. This means we have to somehow capture this “memory” of past values in the model. One way is to add an autoregressive component
-\[ -y_t = \rho y_{t-1} + \dots -\]
-This will be done in the model specification.
+We can see that there is some autocorrelation for almost all data. However, explicitly adding an AR component is not always useful. The best case to apply it is Carbon-14, since there is a known external effect (the bomb pulse) This means we have to somehow capture this “memory” of past values in the model. One way is to add an autoregressive component. This will be done in the model specification.
Now, we will combine the discharge and activity concentration measurements for a given isotope, so that we can use that dataframe in the model.
-Due to limitations in the mvgam package, predictors cannot have NA values. Therefore, where missing years are present in the discharges we must address this. Let’s see if we have this issue:
Due to limitations in the mvgam package, predictors cannot have NaN values. Therefore, where missing years are present in the discharges we must address this. Let’s see if we have this issue:
plot_na_values(df_long_discharges)plot_na_values(df_long_discharges)We can see that for the caesium isotopes, there is some years without any discharge data available. We will therefore interpolate the data in these gaps in order to make fitting a model possible for caesium as well. The interpolate_data function will take care of this.
Now, let’s plot an example and see if the interpolation looks reasonable:
vars <- df_long_discharges %>%
- filter(str_detect(variable, "Cs137")) %>%
- distinct(variable)
-
-# Uninterpolated data
-uninterp <- df_long_discharges %>%
- filter(str_detect(variable, "Cs137"),
- year > 2010) %>%
- mutate(type = "Uninterpolated")
-
-# Interpolated ONCE using fuzzy isotope string
-interp <- interpolate_data("Cs137", df_long_discharges) %>%
- filter(year > 2010) %>%
- mutate(type = "Interpolated")
-
-
-# Combine
-plot_df <- bind_rows(interp, uninterp)
-
-# Plot
-ggplot(plot_df, aes(x = year, y = value, color = type)) +
- geom_point() +
- geom_line() +
- facet_wrap(~ variable, scales = "free_y") +
- labs(
- x = "Year",
- y = "Discharge [TBq/y]",
- color = "Data type"
- ) +
- theme_minimal()vars <- df_long_discharges %>%
+ filter(str_detect(variable, "Cs137")) %>%
+ distinct(variable)
+
+# Uninterpolated data
+uninterp <- df_long_discharges %>%
+ filter(str_detect(variable, "Cs137"),
+ year > 2010) %>%
+ mutate(type = "Uninterpolated")
+
+# Interpolated ONCE using fuzzy isotope string
+interp <- interpolate_data("Cs137", df_long_discharges) %>%
+ filter(year > 2010) %>%
+ mutate(type = "Interpolated")
+
+
+# Combine
+plot_df <- bind_rows(interp, uninterp)
+
+# Plot
+ggplot(plot_df, aes(x = year, y = value, color = type)) +
+ geom_point() +
+ geom_line() +
+ facet_wrap(~ variable, scales = "free_y") +
+ labs(
+ x = "Year",
+ y = "Discharge [TBq/y]",
+ color = "Data type"
+ ) +
+ theme_minimal()Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
@@ -3240,14 +2752,14 @@ y_t = \rho y_{t-1} + \dots
-Summary: The chapter goes through creating an isotope dataset with the hand of carbon-14. Discharge and Särdal measurements are combined into one isotope-specific dataset. Tensor smooths are introduced as the foundational component for the distributed lag model. Hierarchical structure is explained and the model is also fitted.
+Summary: The chapter goes through creating an isotope dataset with the hand of carbon-14. Discharge and Särdal measurements are combined into one isotope-specific dataset. Tensor smooths are introduced as the foundational component for the distributed lag model.
Now it is time to combine the data. The combine_data function will, for a given isotope string (e.g. “C14”), combine the Särdal measurement data as well as the discharge data for said isotope, returning one comprehensive dataframe.
data_C14 <- combine_data("C14", df_long_sardal, df_long_discharges, scale_data = FALSE)
-plot_isotope(data_C14)data_C14 <- combine_data("C14", df_long_sardal, df_long_discharges, scale_data = FALSE)
+plot_isotope(data_C14)For Carbon-14 in particular, there is a strong and relatively well-defined underlying trend, which comes from the atmospheric C-14, known as the ‘bomb pulse’, which is the result of atmospheric nuclear weapons testing. To fit a useful model, it is important to either include the atmospheric C-14 as a predictor, or to subtract it from the time series in order to get a more representative marine level of F14C that does not include the bomb pulse. The latter approach is taken here. Atmospheric C-14 time series is taken from
---Hua, Quan, Jocelyn C. Turnbull, Guaciara M. Santos, et al. “ATMOSPHERIC RADIOCARBON FOR THE PERIOD 1950–2019.” Radiocarbon 64, no. 4 (2022): 723–45. https://doi.org/10.1017/RDC.2021.95.
-
and linearly extrapolated to 2020 to fit with the Särdal data. We subtract it by a constant so that the which was visually identified around 0.68 for this particular data set. This gives an estimated predictor
-\[ -F^{14}C^* = F^{14}C_{särdal} - 0.68\cdot F^{14}C_{bomb} -\]
+One thing that may be relevant is the correlation between Sellafield and La Hague here. This is to avoid co-linearity issues in the model. Basically, if the time series are highly correlated, adding them as separate predictors in a regression model can lead to inaccurate results. To do this, we must first make the series quasi-stationary by taking the first difference, before checking the correlation.
bomb_pulse <- read_excel("~/RadiologyProject/sardal-radioactivity-analysis/S0033822221000953sup002.xls", col_names = FALSE, skip = 314)New names:
-• `` -> `...1`
-• `` -> `...2`
-• `` -> `...3`
-• `` -> `...4`
-• `` -> `...5`
-bomb_pulse <- bomb_pulse %>%
- mutate(year = floor(.data[[names(bomb_pulse)[1]]])) %>%
- group_by(year) %>%
- summarise(F14C = mean(.data[[names(bomb_pulse)[4]]], na.rm = TRUE))
-
-bomb_pulse <- bomb_pulse %>%
- bind_rows(
- bomb_pulse %>%
- slice_tail(n = 2) %>%
- mutate(slope = (F14C - lag(F14C)) / (year - lag(year))) %>%
- slice_tail(n = 1) %>%
- transmute(
- year = 2020,
- F14C = F14C + slope
- )
- )
-
-bomb_pulse <- bomb_pulse %>%
- filter(year %in% data_C14$year)
-
-plot(bomb_pulse)data_C14$sardal <- data_C14$sardal - 0.68*bomb_pulse$F14C
-plot(data_C14$sardal)One thing that may be relevant is the correlation between Sellafield and La Hague here. This is to avoid colinearity issues in the model. Basically, if the time series are highly correlated, adding them as separate predictors in a regression model can lead to inaccurate results. To do this, we must first make the series quasi-stationary by taking the first difference, before checking the correlation.
-cor(diff(data_C14$sellafield), diff(data_C14$la_hague))cor(diff(data_C14$sellafield), diff(data_C14$la_hague))[1] 0.1693971
This is not a significant value to worry about, so we do not take any special steps to avoid co-linearity in the predictors.
-See Wood (2017) - Generalized Additive Models: An Introduction with R, section 5.6 for a detailed explanation on tensor smooths.
The lag matrix is is a matrix of the form
+The lag matrix is a matrix of the form
0 1 2 3 4 ...
0 1 2 3 4 ...
0 1 2 3 4 ...
@@ -3342,8 +2799,8 @@ f(x_{t-l}, l) = \sum_{k=0}^K \sum_{m=0}^M \beta_{km} b_k(x_{t-l}) c_m(l)
and \(K\) and \(M\) specify the allowed complexity (number of terms) that are allowed to represent those basis functions. For more details, see Wood (2017) and the written report.
-data_C14 <- data_C14 %>%
- add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG)
+data_C14 <- data_C14 %>%
+ add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG)
\[ \text{Activity} \sim (1 \vert \text{species}) + \text{Discharge} \]
-We essentially add a dummy variable to allow variable intercept / magnit
+We essentially add a dummy variable to allow variable intercept that depends on the discharge.
measured activity concentrations, or doing something arbitrary like taking the average, a hierarchical modelling structure combines the information by taking both datasets in the regression, but allowing some statistical variation between them.
-Example
-Suppose a model which predicts temperature from \(\text{CO}_2\) levels
-\[ -\text{Temperature} \sim \text{CO}_2 -\]
-Instead of fitting a model for each country, you could do something like
-\[ -\text{mean(Temperature)} \sim \text{CO}_2 -\]
To achieve this, we apply weight or ‘mask’ matrices that are the size of the lag matrices, but consist of zeroes and ones. Simply, weights_s matrix is 1 when the species is F. Serratus, 0 otherwise, and the same is true for the F. Vesiculosus weight matrix weights_v.
Note: We are not actually going to use these for the Carbon-14 model, but the procedure is just explained by the hand of C-14.
data_C14 <- data_C14 %>%
- apply_weights()data_C14 <- data_C14 %>%
+ apply_weights()With the functions defined and explained, we now specify the model:
\[ -g(E[y^{\text{sp}}_t]) = \underbrace{\alpha}_{\text{intercept}} + \underbrace{\alpha^{sp}}_{\text{species effect}} + \underbrace{\rho y^{\text{sp}}_{t-1}}_{\text{AR(1)}} + \underbrace{\sum_{l=0}^L f(\text{SF}_{t-l},l)}_{\text{Sellafield contribution}} + \underbrace{\sum_{l=0}^L f(\text{LH}_{t-l},l)}_{\text{La Hague contribution}} + \underbrace{\varepsilon_t}_{\text{error term}} +g(y_t^*) = \underbrace{\alpha}_{\text{intercept}} + \underbrace{\sum_{l=0}^L f(\text{SF}_{t-l},l)}_{\text{Sellafield contribution}} + \underbrace{\sum_{l=0}^L f(\text{LH}_{t-l},l)}_{\text{La Hague contribution}} + \underbrace{u_t}_{\text{AR(1)}} + \underbrace{\varepsilon_t}_{\text{error term}} \]
-where
+where \(u_t = \rho u_{t-1}\) and
\(\text{sp}\) represents the species of Fucus spp.
\(y_t^{sp}\) is the measured activity concentration in \(\text{Bq / kg d.w.}\)
\(l \in \{0, L\}\) is the lag in years.
\(g(\cdot)\) is the link function, that links the linear combination of predictors to the expected value of \(y_t^{sp}\).
\(f(x_{t-l},l)\) is a tensor product smooth as defined before.
We can now fit the model. Note log-link relation between the predictors and response (for more details on why, see report).
-The fitting can now be done by first formulating the model in mgcv/mvgam convention, then calling the functions. Note that the family of response is now assumed to be gaussian since we took the log of the response. The line #| include: false simply suppresses the rather verbose logging of the model fit, in order to keep this notebook clean.
And that’s it! The model_C14_* are now complete fitted model objects containing estimated parameters, original data, predicted (fitted) data, STAN code, posterior information and more. We save the models for future use.
The fitting can now be done by first formulating the model in mgcv/mvgam convention, then calling the functions. Note that the family of response is now assumed to be \(y_t \sim \text{LogNormal}(\mu, \sigma^2)\), which in terms of distribution of \(y_t\) is equivalent to \(log(y_t) \sim \text{Normal}(\mu, \sigma^2)\), with the small benefit that we do not have to transform the data back to the proper scale after fitting.
+if(FULL_RUN) {
+ formula <- sardal ~
+ te(sellafield, lag, k=4) +
+ te(la_hague, lag, k=4)
+
+ priors <- get_mvgam_priors(
+ formula = formula,
+ family = lognormal(),
+ data = data_C14,
+ trend_model = AR(1)
+ )
+
+ priors$prior[3] <- 'ar1 ~ beta(2, 2);'
+
+ model_C14_AR_mvgam <- mvgam(formula = formula,
+ family = lognormal(),
+ data = data_C14,
+ trend_model = AR(1),
+ burnin = 2000,
+ samples = 4000,
+ parallel = TRUE,
+ threads = 6,
+ noncentered = TRUE,
+ priors = priors
+ )
+
+ save(model_C14_AR_mvgam, file = "results/models/C14_AR.mvgam.Rdata")
+}And that’s it! The C14.mvgam.Rdata* is now a complete fitted model object containing estimated parameters, original data, predicted (fitted) data, STAN code, posterior information and more. We save the model.
Before we fit the other models we will now look at the summary of objects created above
summary(model_C14_mvgam)summary(model_C14_AR_mvgam)Loading required namespace: rstan
mcmc_plot(model_C14_AR_mvgam, variable='ar1', regex=TRUE, type="areas")We see that the AR(1) component is very strong, indicating a strong temporal trend. This is the residual of the carbon-14 bomb pulse that is being captured by the AR(1) trend component. The tensor product smooth posteriors are a bit difficult to interpret directly. Instead, we can look at the final estimated interaction effect surface from the fitted tensor product smooths:
+plot_mvgam_smooth(model_C14_AR_mvgam, series = 1, smooth = 1)plot_mvgam_smooth(model_C14_AR_mvgam, series = 1, smooth = 2)Again, these surfaces represent \(f(x_{t-l}, l)\), essentially the ‘joint effect’ of lag and discharge on the measured activity concentration in Fucus at Särdal. Sellafield is showing a pattern in line with expectations: a higher discharge from Sellafield results in a higher (more positive) effect on Särdal activity concentrations. Regarding lag, we see that the highest values are centered around 3-5 years, with a very broad delayed effect up to 8 years as well, with the effect becoming closer to 0 near 9-10 years. This seems in line with estimates of Sellafield transport time being approximately 3-4 years, corresponding to the initial ‘hit’, and the prolonged effect across many lags could be due to the delayed uptake as other environmental processes (which? ask Kristina). La Hague does not really match expectation.
+We can also inspect what the predictions are from this fitted model (note the log scale!):
+plot(hindcast(model_C14_AR_mvgam))No non-missing values in test_observations; cannot calculate forecast score
+Some model diagnostics that will inform us about the fit are the residual plots and the prior-posterior check:
+plot_mvgam_resids(model_C14_AR_mvgam)ppc(model_C14_AR_mvgam)The residual plots, such as the Q-Q plot, indicate a good model fit, suggesting that the lognormal family is appropriately describing the response. There is some slight residual autocorrelation which is not captured by the AR(1) process, overall the AR(1) has proved effective at capturing both the effect of the bomb pulse and the ‘memory’ effect of radiocarbon being stored in the Fucus over time.
+The posterior predictive check (PPC) shows how well the estimated posterior \(\hat{y}\) represents the distribution of the observed data \(y\). We can see a rather good fit. However, because the actual surfaces look rather complicated it’s possible that the model is overfitting to noise here.
+ +Above we tried to compensate for the atmospheric Carbon-14 from the nuclear weapons testing by applying an AR(1) component. Another approach could be to modify the time series of measurements instead, detrending it. We load some atmospheric C-14 measurements that are in the same unit, and subtract a scaled version to attempt to make an approximately flat time series (save for any peaks due to e.g. marine discharges).
+data_C14 <- combine_data("C14", df_long_sardal, df_long_discharges, scale_data = FALSE)
+
+bomb_pulse <- read_excel("~/RadiologyProject/sardal-radioactivity-analysis/S0033822221000953sup002.xls", col_names = FALSE, skip = 314)New names:
+• `` -> `...1`
+• `` -> `...2`
+• `` -> `...3`
+• `` -> `...4`
+• `` -> `...5`
+bomb_pulse <- bomb_pulse %>%
+ mutate(year = floor(.data[[names(bomb_pulse)[1]]])) %>%
+ group_by(year) %>%
+ summarise(F14C = mean(.data[[names(bomb_pulse)[4]]], na.rm = TRUE))
+
+bomb_pulse <- bomb_pulse %>%
+ bind_rows(
+ bomb_pulse %>%
+ slice_tail(n = 2) %>%
+ mutate(slope = (F14C - lag(F14C)) / (year - lag(year))) %>%
+ slice_tail(n = 1) %>%
+ transmute(
+ year = 2020,
+ F14C = F14C + slope
+ )
+ )
+
+bomb_pulse <- bomb_pulse %>%
+ filter(year %in% data_C14$year)
+
+data_C14$sardal <- data_C14$sardal - 0.68*bomb_pulse$F14C
+plot(data_C14$sardal)data_C14 <- data_C14 %>%
+ add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
+ apply_weights()We can now make a model again
+if(FULL_RUN) {
+ formula <- sardal ~
+ te(sellafield, lag, k=4) +
+ te(la_hague, lag, k=4)
+
+ model_C14_Corrected_mvgam <- mvgam(formula = formula,
+ family = lognormal(),
+ data = data_C14,
+ trend_model = 'None',
+ burnin = 2000,
+ samples = 4000,
+ parallel = TRUE,
+ threads = 6,
+ noncentered = TRUE
+ )
+
+ save(model_C14_Corrected_mvgam, file = "results/models/C14_Corrected.mvgam.Rdata")
+}summary(model_C14_Corrected_mvgam)GAM formula:
+sardal ~ te(sellafield, lag, k = 4) + te(la_hague, lag, k = 4)
+
+Family:
+lognormal
+
+Link function:
+identity
+
Trend model:
None
@@ -3435,47 +3143,47 @@ Fitted using Stan
Total post-warmup draws = 16000
log(observation error) parameter estimates:
- 2.5% 50% 97.5% Rhat n_eff
-sigma_obs[1] 0.02 0.028 0.042 1 5006
+ 2.5% 50% 97.5% Rhat n_eff
+sigma_obs[1] 0.019 0.027 0.041 1 4697
GAM coefficient (beta) estimates:
- 2.5% 50% 97.5% Rhat n_eff
-(Intercept) -0.990 -0.98000 -0.980 1 16941
-te(sellafield,lag).1 -0.075 0.00170 0.081 1 5741
-te(sellafield,lag).2 -0.100 -0.00078 0.095 1 4617
-te(sellafield,lag).3 -0.150 0.00890 0.170 1 4122
-te(sellafield,lag).4 -0.150 -0.01400 0.130 1 3558
-te(sellafield,lag).5 -0.061 0.00890 0.080 1 4785
-te(sellafield,lag).6 -0.079 0.00600 0.089 1 3974
-te(sellafield,lag).7 -0.160 -0.00220 0.150 1 3920
-te(sellafield,lag).8 -0.110 -0.00590 0.100 1 3773
-te(sellafield,lag).9 -0.093 0.00660 0.100 1 4529
-te(sellafield,lag).10 -0.130 0.01400 0.160 1 3644
-te(sellafield,lag).11 -0.190 -0.00470 0.170 1 3801
-te(sellafield,lag).12 -0.100 0.05600 0.210 1 4334
-te(sellafield,lag).13 -0.070 0.01000 0.087 1 5822
-te(sellafield,lag).14 -0.078 0.01200 0.099 1 4345
-te(sellafield,lag).15 -0.140 0.02900 0.190 1 4382
-te(la_hague,lag).1 -0.230 -0.02500 0.170 1 7199
-te(la_hague,lag).2 -0.220 -0.00860 0.200 1 6453
-te(la_hague,lag).3 -0.260 0.00580 0.270 1 7536
-te(la_hague,lag).4 -0.220 -0.02400 0.180 1 5108
-te(la_hague,lag).5 -0.160 0.00840 0.180 1 4973
-te(la_hague,lag).6 -0.220 -0.03000 0.150 1 4879
-te(la_hague,lag).7 -0.200 0.01500 0.240 1 5656
-te(la_hague,lag).8 -0.210 -0.00850 0.200 1 6944
-te(la_hague,lag).9 -0.250 -0.03100 0.200 1 5368
-te(la_hague,lag).10 -0.220 0.00190 0.230 1 4898
-te(la_hague,lag).11 -0.230 -0.00570 0.220 1 6001
-te(la_hague,lag).12 -0.230 -0.00310 0.230 1 5663
-te(la_hague,lag).13 -0.140 0.01600 0.180 1 5253
-te(la_hague,lag).14 -0.160 0.00560 0.170 1 5136
-te(la_hague,lag).15 -0.260 -0.03300 0.200 1 5648
+ 2.5% 50% 97.5% Rhat n_eff
+(Intercept) -0.990 -0.9800 -0.980 1 18117
+te(sellafield,lag).1 -0.077 0.0024 0.081 1 5764
+te(sellafield,lag).2 -0.100 -0.0014 0.097 1 3518
+te(sellafield,lag).3 -0.160 0.0075 0.170 1 2768
+te(sellafield,lag).4 -0.160 -0.0120 0.130 1 2629
+te(sellafield,lag).5 -0.063 0.0097 0.080 1 4958
+te(sellafield,lag).6 -0.082 0.0054 0.090 1 3251
+te(sellafield,lag).7 -0.160 -0.0052 0.150 1 2654
+te(sellafield,lag).8 -0.110 -0.0044 0.110 1 2719
+te(sellafield,lag).9 -0.094 0.0060 0.110 1 5107
+te(sellafield,lag).10 -0.140 0.0120 0.160 1 2800
+te(sellafield,lag).11 -0.190 -0.0076 0.180 1 2483
+te(sellafield,lag).12 -0.100 0.0580 0.220 1 3079
+te(sellafield,lag).13 -0.072 0.0100 0.087 1 5795
+te(sellafield,lag).14 -0.078 0.0110 0.100 1 3600
+te(sellafield,lag).15 -0.140 0.0260 0.190 1 2941
+te(la_hague,lag).1 -0.220 -0.0230 0.180 1 5650
+te(la_hague,lag).2 -0.210 -0.0073 0.200 1 5700
+te(la_hague,lag).3 -0.260 0.0047 0.270 1 5698
+te(la_hague,lag).4 -0.220 -0.0250 0.160 1 4198
+te(la_hague,lag).5 -0.160 0.0099 0.190 1 3860
+te(la_hague,lag).6 -0.210 -0.0280 0.160 1 4111
+te(la_hague,lag).7 -0.210 0.0170 0.240 1 4092
+te(la_hague,lag).8 -0.210 -0.0110 0.190 1 5755
+te(la_hague,lag).9 -0.250 -0.0290 0.210 1 4139
+te(la_hague,lag).10 -0.210 0.0051 0.230 1 4094
+te(la_hague,lag).11 -0.240 -0.0048 0.220 1 4447
+te(la_hague,lag).12 -0.230 -0.0048 0.220 1 4640
+te(la_hague,lag).13 -0.140 0.0170 0.180 1 3937
+te(la_hague,lag).14 -0.150 0.0075 0.170 1 4232
+te(la_hague,lag).15 -0.260 -0.0320 0.190 1 4024
Approximate significance of GAM smooths:
edf Ref.df Chi.sq p-value
-te(sellafield,lag) 1.797 15 0.376 1.000
-te(la_hague,lag) 1.647 15 0.662 0.999
+te(sellafield,lag) 1.423 15 0.525 1.000
+te(la_hague,lag) 1.793 15 0.867 0.999
Stan MCMC diagnostics:
✔ No issues with effective samples per iteration
@@ -3489,119 +3197,97 @@ Samples were drawn using sampling(hmc). For each parameter, n_eff is a
Use how_to_cite() to get started describing this model
mcmc_plot(model_C14_mvgam)plot_mvgam_smooth(model_C14_Corrected_mvgam, series = 1, smooth = 1)We see that the AR(1) component is very strong, indicating a strong temporal trend. This is the residual of the carbon-14 bomb pulse that is being captured by the AR(1) trend component. The tensor product smooth posteriors are a bit difficult to interpret directly. Instead, we can look at the final estimated interaction effect surface from the fitted tensor product smooths:
-plot_mvgam_smooth(model_C14_mvgam, series = 1, smooth = 1)plot_mvgam_smooth(model_C14_Corrected_mvgam, series = 1, smooth = 2)plot_mvgam_smooth(model_C14_mvgam, series = 1, smooth = 2)Again, these surfaces represent \(f(x_{t-l}, l)\), essentially the ‘joint effect’ of lag and discharge on the measured activity concentration in Fucus at Särdal. Sellafield is showing a pattern in line with expectations: a higher discharge from Sellafield results in a higher (more positive) effect on Särdal activity concentrations. Regarding lag, we see that the highest values are centered around 3-5 years, with a very broad delayed effect up to 8 years as well, with the effect becoming closer to 0 near 9-10 years. This seems in line with estimates of Sellafield transport time being approximately 3-4 years, corresponding to the initial ‘hit’, and the prolonged effect across many lags could be due to the delayed uptake as other environmental processes (which? ask Kristina). La Hague does not really match expectation.
-We can also inspect what the predictions are from this fitted model (note the log scale!):
-plot(hindcast(model_C14_mvgam))plot(hindcast(model_C14_Corrected_mvgam))No non-missing values in test_observations; cannot calculate forecast score
Some model diagnostics that will inform us about the fit are the residual plots and the prior-posterior check:
-plot_mvgam_resids(model_C14_mvgam)plot_mvgam_resids(model_C14_Corrected_mvgam)ppc(model_C14_mvgam)ppc(model_C14_Corrected_mvgam)The residual plots, such as the Q-Q plot, indicate a good model fit, suggesting that the log-link function is appropriately describing the relation between the predictors and response. There is some slight residual autocorrelation which is not captured by the AR(1) process, overall the AR(1) has proved very effective at capturing both the effect of the bomb pulse and the ‘memory’ effect of radiocarbon being stored in the Fucus over several years.
-The posterior predictive check (PPC) shows how well the estimated posterior \(\hat{y}\) prepresents the distribution of the observed data \(y\). We can see a rather good fit, indicating that \(y\) is normally distributed under log-link relation with the predictors, with all observed data falling within approximately \(2\sigma\) of the mean of the posterior distribution.
Now, caesium is a unique case, because the influence from the 1986 Chernobyl accident on Ceasium concentrations in the Nordics has been well studied. In particular, outflow from the Baltic sea is predicted to be the primary contributor of ceasium activity concentrations on the Swedish west coast. Thus, we must include it. We include here a Baltic sea caesium-137 simulation based on information from HELCOM (2009) Radioactivity in the Baltic Sea, 1999-2006 HELCOM thematic assessment. Balt. Sea Environ. Proc. No. 117, pages 21 and 22.
simulate_baltic_cs137 <- function(data_Cs137) {
-
- # take a vector of years from the original dataset
- years <- sort(unique(data_Cs137$year))
- total_years <- length(years)
- # number of data points after the incident
- n_after <- length(years[years >= 1986])
-
- t <- seq_len(n_after) - 1
-
- # effective half lives used from HELCOM 2009 pp. 21-22
- k_1 <- log(2)/2.5 # 1986-1988
- k_2 <- log(2)/9 # 1990 - end
- y_0 <- 650 # approximate peak value Bq / m^-3
-
- # prefill empty array
- x <- array(5, total_years)
-
- # expoenential decay with piecewise half lives
- data <- y_0 * exp(- (k_1 * pmin(t, 3) + k_2 * pmax(0, t - 3)))
-
- # replace the tail (from 1986 to end) by sim data
- x[(total_years - n_after + 1):total_years] <- data
-
- # add noise
- set.seed(123)
- x <- x + rnorm(total_years, sd = 0.1 * mean(x))
- return(as.vector(x))
-}simulate_baltic_cs137 <- function(data_Cs137) {
+
+ # take a vector of years from the original dataset
+ years <- sort(unique(data_Cs137$year))
+ total_years <- length(years)
+ # number of data points after the incident
+ n_after <- length(years[years >= 1986])
+
+ t <- seq_len(n_after) - 1
+
+ # effective half lives used from HELCOM 2009 pp. 21-22
+ k_1 <- log(2)/2.5 # 1986-1988
+ k_2 <- log(2)/9 # 1990 - end
+ y_0 <- 650 # approximate peak value Bq / m^-3
+
+ # prefill empty array
+ x <- array(5, total_years)
+
+ # expoenential decay with piecewise half lives
+ data <- y_0 * exp(- (k_1 * pmin(t, 3) + k_2 * pmax(0, t - 3)))
+
+ # replace the tail (from 1986 to end) by sim data
+ x[(total_years - n_after + 1):total_years] <- data
+
+ # add noise
+ set.seed(123)
+ x <- x + rnorm(total_years, sd = 0.1 * mean(x))
+ return(as.vector(x))
+}Adding baltic sea simulation to the data:
data_Cs137 <- combine_data("Cs137", df_long_sardal, df_long_discharges, scale_data = FALSE)
-
-baltic_Cs137 <- simulate_baltic_cs137(data_Cs137)
-
-data_Cs137 <- data_Cs137 %>%
- group_by(series) %>%
- mutate(baltic = baltic_Cs137) %>%
- ungroup()
-
-plot_isotope(data_Cs137)data_Cs137 <- combine_data("Cs137", df_long_sardal, df_long_discharges, scale_data = FALSE)
+
+baltic_Cs137 <- simulate_baltic_cs137(data_Cs137)
+
+data_Cs137 <- data_Cs137 %>%
+ group_by(series) %>%
+ mutate(baltic = baltic_Cs137) %>%
+ ungroup()
+
+plot_isotope(data_Cs137)Let’s check correlation once more
cor(diff(data_Cs137$sellafield), diff(data_Cs137$baltic))cor(diff(data_Cs137$sellafield), diff(data_Cs137$baltic))[1] -0.06012205
We prepare the data as before
data_Cs137 <- data_Cs137 %>%
- add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
- apply_weights()data_Cs137 <- data_Cs137 %>%
+ add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
+ apply_weights()Note the extra terms below in the formula. It looks like we have a much bigger model, but this is the hierarchical model at play through applying the terms only when the weights matrix contains a 1. Anyways, let’s fit
if(FULL_RUN) {
+ formula <- sardal ~
+ te(sellafield, lag,
+ k = 4) +
+ te(baltic, lag,
+ k = 4)
+
+ model_Cs137_mvgam <- mvgam(formula = formula,
+ family = lognormal(),
+ data = data_Cs137,
+ burnin = 2000,
+ samples = 4000,
+ parallel = TRUE,
+ threads = 6,
+ noncentered = TRUE
+ )
+
+ save(model_Cs137_mvgam, file = "results/models/Cs137.mvgam.Rdata")
+}We can make the same plots and summaries as before:
summary(model_Cs137_mvgam)summary(model_Cs137_mvgam)GAM formula:
sardal ~ te(sellafield, lag, k = 4) + te(baltic, lag, k = 4)
@@ -3716,7 +3423,7 @@ Samples were drawn using sampling(hmc). For each parameter, n_eff is a
Use how_to_cite() to get started describing this model
mcmc_plot(model_Cs137_mvgam)mcmc_plot(model_Cs137_mvgam)There appears to be no significant contribution from La Hague, but, the model finds predictive power for both Sellafield discharges and Baltic sea activity concentrations.
-Unlike for Carbon-14 we have separate time series available for both Fucus Serratus and Fucus Vesiculosus, so we also have two different possible responses.
+By p-values, it appears the model is finding predictive power for both Sellafield discharges and Baltic sea activity concentrations.
plot_mvgam_smooth(model_Cs137_mvgam, smooth = 1)plot_mvgam_smooth(model_Cs137_mvgam, smooth = 1)plot_mvgam_smooth(model_Cs137_mvgam, smooth = 2)plot_mvgam_smooth(model_Cs137_mvgam, smooth = 2)In both species we see a reasonable pattern for Baltic outflow, with increasing concentrations in the Baltic sea predicting higher activity concentrations in the Fucus. The effect seems to be mainly around 1-2 years delay with a long tail of influence towards higher lags. The hypothesis that the majority of Caesium-137 after 1986 originates from Baltic sea outflow into the Kattegat has been described and supported in Mattsson et al. (2025) by effective half life calculations. Our simulated series of activity concentrations based on reported effective half lives seems to corroborate this hypothesis.
-That Sellafield has predictive power in the model, could indicate that the peak in activity concentrations around the 1980s before the Chernobyl disaster is caused by the significant discharges from Sellafield in the late 1970s. However, the smooth does not seem to be quite the expected shape.
+The effect seems to be mainly around 1-2 years delay with a long tail of influence towards higher lags. The hypothesis that the majority of Caesium-137 after 1986 originates from Baltic sea outflow into the Kattegat has been described and supported in Mattsson et al. (2025) by effective half life calculations. Our simulated series of activity concentrations based on reported effective half lives seems to corroborate this hypothesis.
+That Sellafield has predictive power in the model, could indicate that the peak in activity concentrations around the 1980s before the Chernobyl disaster is caused by the significant discharges from Sellafield in the late 1970s. The smooth shows a peak effect much later than initial arrival time, closer to 6-7 years.
plot(hindcast(model_Cs137_mvgam))plot(hindcast(model_Cs137_mvgam))No non-missing values in test_observations; cannot calculate forecast score
plot(hindcast(model_Cs137_mvgam), series = 2)plot(hindcast(model_Cs137_mvgam), series = 2)No non-missing values in test_observations; cannot calculate forecast score
The trends look slightly different. We can see what the difference is:
plot(model_Cs137_mvgam, type = "re")plot(model_Cs137_mvgam, type = "re")No random effect smooths (bs = "re") in model formula
It seems like the uncertainties are rather large and we cannot say anything conclusive from this model whether F. Serratus and F. Vesiculosus have different uptake of Caesium-137 in the Swedish west coast marine environment.
And the diagnostics:
plot_mvgam_resids(model_Cs137_mvgam)plot_mvgam_resids(model_Cs137_mvgam)ppc(model_Cs137_mvgam)ppc(model_Cs137_mvgam)data_Co60 <- combine_data("Co60", df_long_sardal, df_long_discharges, scale_data = FALSE)
-plot_isotope(data_Co60)data_Co60 <- combine_data("Co60", df_long_sardal, df_long_discharges, scale_data = FALSE)
+plot_isotope(data_Co60)cor(diff(data_Co60$barseback), diff(data_Co60$la_hague))cor(diff(data_Co60$barseback), diff(data_Co60$la_hague))[1] -0.04478878
cor(diff(data_Co60$barseback), diff(data_Co60$winfrith))cor(diff(data_Co60$barseback), diff(data_Co60$winfrith))[1] -0.1521441
cor(diff(data_Co60$winfrith), diff(data_Co60$la_hague))cor(diff(data_Co60$winfrith), diff(data_Co60$la_hague))[1] 0.2544252
Now, for Cobalt-60 we have rather limited data available, and in order to get a useful model, one cannot have too many parameters compared to number of datapoints. Thus, since the signal for Sellafield is rather weak compared to the rest we ignore it. We prepare the data as before
data_Co60 <- data_Co60 %>%
- add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
- apply_weights()data_Co60 <- data_Co60 %>%
+ add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
+ apply_weights()Let’s fit as before
if(FULL_RUN) {
+ formula <- sardal ~
+ te(la_hague, lag,
+ k = 4) +
+ te(barseback, lag,
+ k = 4)
+
+ model_Co60_mvgam <- mvgam(formula = formula,
+ family = lognormal(),
+ trend_model = 'None',
+ data = data_Co60,
+ burnin = 2000,
+ samples = 4000,
+ parallel = TRUE,
+ threads = 6,
+ noncentered = TRUE
+ )
+
+ save(model_Co60_mvgam, file = "results/models/Co60.mvgam.Rdata")
+}We can make the same plots and summaries as before:
summary(model_Co60_mvgam)summary(model_Co60_mvgam)GAM formula:
sardal ~ te(la_hague, lag, k = 4) + te(barseback, lag, k = 4)
@@ -3873,47 +3601,47 @@ Total post-warmup draws = 16000
log(observation error) parameter estimates:
2.5% 50% 97.5% Rhat n_eff
-sigma_obs[1] 0.16 0.25 0.40 1 10067
-sigma_obs[2] 0.25 0.35 0.51 1 13298
+sigma_obs[1] 0.16 0.25 0.40 1 10761
+sigma_obs[2] 0.25 0.35 0.52 1 13834
GAM coefficient (beta) estimates:
- 2.5% 50% 97.5% Rhat n_eff
-(Intercept) 0.130 0.2600 0.360 1 11282
-te(la_hague,lag).1 -0.110 -0.0490 0.026 1 4381
-te(la_hague,lag).2 -0.230 -0.0700 0.094 1 4156
-te(la_hague,lag).3 -0.340 -0.0970 0.150 1 3928
-te(la_hague,lag).4 -0.260 -0.0370 0.180 1 3814
-te(la_hague,lag).5 -0.180 -0.0560 0.067 1 3858
-te(la_hague,lag).6 -0.130 -0.0810 -0.035 1 7558
-te(la_hague,lag).7 -0.260 -0.0940 0.073 1 3902
-te(la_hague,lag).8 -0.350 -0.0610 0.230 1 5966
-te(la_hague,lag).9 -0.120 0.0140 0.160 1 7105
-te(la_hague,lag).10 -0.042 0.0840 0.230 1 3266
-te(la_hague,lag).11 -0.140 0.1200 0.370 1 4926
-te(la_hague,lag).12 -0.360 0.0140 0.390 1 7555
-te(la_hague,lag).13 -0.290 -0.0230 0.240 1 7800
-te(la_hague,lag).14 -0.330 -0.0510 0.200 1 7283
-te(la_hague,lag).15 -0.410 -0.0520 0.280 1 9968
-te(barseback,lag).1 -0.490 -0.2200 0.049 1 9395
-te(barseback,lag).2 -0.590 -0.2600 0.061 1 8288
-te(barseback,lag).3 -0.740 -0.3200 0.083 1 9527
-te(barseback,lag).4 -0.250 -0.0075 0.220 1 7405
-te(barseback,lag).5 -0.150 0.0500 0.270 1 7280
-te(barseback,lag).6 -0.210 0.0037 0.220 1 6959
-te(barseback,lag).7 -0.300 -0.0260 0.270 1 7888
-te(barseback,lag).8 -0.200 0.1300 0.440 1 7633
-te(barseback,lag).9 0.011 0.2400 0.480 1 7468
-te(barseback,lag).10 0.020 0.3100 0.600 1 6301
-te(barseback,lag).11 0.024 0.3400 0.660 1 7447
-te(barseback,lag).12 -0.410 -0.0120 0.480 1 9162
-te(barseback,lag).13 -0.520 -0.1400 0.250 1 7847
-te(barseback,lag).14 -0.480 -0.1300 0.270 1 8535
-te(barseback,lag).15 -0.550 -0.1700 0.210 1 9907
+ 2.5% 50% 97.5% Rhat n_eff
+(Intercept) 0.1300 0.2600 0.360 1 11248
+te(la_hague,lag).1 -0.1100 -0.0490 0.015 1 7373
+te(la_hague,lag).2 -0.2300 -0.0680 0.089 1 4419
+te(la_hague,lag).3 -0.3500 -0.0960 0.150 1 4333
+te(la_hague,lag).4 -0.2600 -0.0380 0.180 1 4102
+te(la_hague,lag).5 -0.1700 -0.0560 0.067 1 4412
+te(la_hague,lag).6 -0.1200 -0.0810 -0.036 1 8025
+te(la_hague,lag).7 -0.2600 -0.0930 0.075 1 4489
+te(la_hague,lag).8 -0.3600 -0.0600 0.240 1 6331
+te(la_hague,lag).9 -0.1200 0.0160 0.150 1 7462
+te(la_hague,lag).10 -0.0450 0.0830 0.230 1 4140
+te(la_hague,lag).11 -0.1400 0.1200 0.370 1 5220
+te(la_hague,lag).12 -0.3600 0.0070 0.400 1 7996
+te(la_hague,lag).13 -0.2900 -0.0240 0.240 1 7559
+te(la_hague,lag).14 -0.3100 -0.0480 0.190 1 6713
+te(la_hague,lag).15 -0.4100 -0.0520 0.290 1 6727
+te(barseback,lag).1 -0.4800 -0.2200 0.055 1 10097
+te(barseback,lag).2 -0.5900 -0.2500 0.065 1 9667
+te(barseback,lag).3 -0.7400 -0.3200 0.086 1 9045
+te(barseback,lag).4 -0.2400 -0.0039 0.220 1 7101
+te(barseback,lag).5 -0.1500 0.0520 0.270 1 5968
+te(barseback,lag).6 -0.2100 0.0038 0.220 1 6818
+te(barseback,lag).7 -0.3000 -0.0270 0.250 1 7720
+te(barseback,lag).8 -0.1900 0.1300 0.450 1 7362
+te(barseback,lag).9 0.0064 0.2400 0.490 1 6144
+te(barseback,lag).10 0.0052 0.3000 0.600 1 6200
+te(barseback,lag).11 0.0084 0.3300 0.650 1 7904
+te(barseback,lag).12 -0.4100 -0.0052 0.500 1 7929
+te(barseback,lag).13 -0.5000 -0.1400 0.280 1 7552
+te(barseback,lag).14 -0.4700 -0.1200 0.270 1 9181
+te(barseback,lag).15 -0.5600 -0.1700 0.220 1 10409
Approximate significance of GAM smooths:
edf Ref.df Chi.sq p-value
-te(la_hague,lag) 2.083 15 4.991 0.795
-te(barseback,lag) 2.616 15 5.722 0.809
+te(la_hague,lag) 2.163 15 5.318 0.626
+te(barseback,lag) 2.189 15 6.593 0.953
Stan MCMC diagnostics:
✔ No issues with effective samples per iteration
@@ -3927,78 +3655,78 @@ Samples were drawn using sampling(hmc). For each parameter, n_eff is a
Use how_to_cite() to get started describing this model
mcmc_plot(model_Co60_mvgam)mcmc_plot(model_Co60_mvgam)plot_mvgam_smooth(model_Co60_mvgam, smooth = 1)plot_mvgam_smooth(model_Co60_mvgam, smooth = 1)plot_mvgam_smooth(model_Co60_mvgam, smooth = 2)plot_mvgam_smooth(model_Co60_mvgam, smooth = 2)plot(hindcast(model_Co60_mvgam), series = 1)plot(hindcast(model_Co60_mvgam), series = 1)No non-missing values in test_observations; cannot calculate forecast score
plot(hindcast(model_Co60_mvgam), series = 2)plot(hindcast(model_Co60_mvgam), series = 2)No non-missing values in test_observations; cannot calculate forecast score
plot(model_Co60_mvgam, type = "re")plot(model_Co60_mvgam, type = "re")No random effect smooths (bs = "re") in model formula
And the diagnostics:
plot_mvgam_resids(model_Co60_mvgam)plot_mvgam_resids(model_Co60_mvgam)ppc(model_Co60_mvgam)ppc(model_Co60_mvgam)For Iodine-129, Mattsson et al. (2025) report significant seasonal differences between summer and winter data, up to 3 times higher concentrations in summer. Because this is a signifcant effect, and we have the collection season available as data, we chose here to treat F. Serratus in summer and winter as two separate ‘species’. We drop the mean measurement which is denoted as just F. Serratus:
+For Iodine-129, Mattsson et al. (2025) report significant seasonal differences between summer and winter data. Because this is a significant effect, and we have the collection season available as data, we chose here to treat F. Serratus in summer and winter as two separate ‘species’. We drop the mean measurement which is denoted as just F. Serratus:
data_I129 <- combine_data("I129", df_long_sardal, df_long_discharges, scale_data = FALSE)
-
-plot_isotope(data_I129)data_I129 <- combine_data("I129", df_long_sardal, df_long_discharges, scale_data = FALSE)
+
+plot_isotope(data_I129)data_I129 <- droplevels(data_I129[!data_I129$series == 'I129_F_Serratus',])
-
-data_I129 <- data_I129 %>%
- add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
- apply_weights()cor(diff(data_I129$sellafield), diff(data_I129$la_hague))cor(diff(data_I129$sellafield), diff(data_I129$la_hague)) [,1] [,2] [,3] [,4] [,5] [,6]
-[1,] -0.07828751 0.060352359 -6.357254e-02 0.12275478 -0.10940554 -0.01345432
-[2,] 0.27178874 -0.083966716 3.257249e-02 0.01134332 0.16576190 -0.07617355
-[3,] -0.03416310 0.215037036 -1.417841e-01 0.07517303 0.02267189 0.13696328
-[4,] 0.31062333 -0.003021157 2.294285e-01 0.13793005 0.24347159 0.16170400
-[5,] -0.26254457 0.294653614 -5.486252e-05 0.14958757 0.08257034 0.18290961
-[6,] -0.15065752 -0.186315045 3.459510e-01 0.31796790 0.33216314 0.24695259
-[7,] -0.11756144 -0.075103717 -2.747584e-02 0.13453915 0.21887870 0.28641298
- [,7]
-[1,] 0.04351449
-[2,] 0.01720057
-[3,] -0.08331044
-[4,] 0.28591400
-[5,] 0.10137770
-[6,] 0.35550436
-[7,] 0.18193231
+[1] -0.04429647
data_I129 <- droplevels(data_I129[!data_I129$series == 'I129_F_Serratus',])
+
+data_I129 <- data_I129 %>%
+ add_lagged_matrices(n_max = MAX_LAG, n_min = MIN_LAG) %>%
+ apply_weights()Let’s fit as before
+if(FULL_RUN) {
+ formula <- sardal ~
+ s(series, bs = "re") +
+ te(sellafield, lag, k = 4, by=weights_ss) +
+ te(sellafield, lag, k = 4, by=weights_sw) +
+ te(la_hague, lag, k = 4, by=weights_ss) +
+ te(la_hague, lag, k = 4, by=weights_sw)
+
+ model_I129_mvgam <- mvgam(formula = formula,
+ family = lognormal(),
+ data = data_I129,
+ trend_model = 'None',
+ burnin = 2000,
+ samples = 4000,
+ parallel = TRUE,
+ threads = 6,
+ noncentered = TRUE,
+ priors = priors
+ )
+
+ save(model_I129_mvgam, file = "results/models/I129.mvgam.Rdata")
+}We can make the same plots and summaries as before:
summary(model_I129_mvgam)summary(model_I129_mvgam)GAM formula:
sardal ~ te(sellafield, lag, k = 4, by = weights_ss) + te(sellafield,
@@ -4065,7 +3800,7 @@ Link function:
identity
Trend model:
-AR(p = 1)
+None
N series:
2
@@ -4080,112 +3815,96 @@ Total post-warmup draws = 16000
log(observation error) parameter estimates:
2.5% 50% 97.5% Rhat n_eff
-sigma_obs[1] 0.16 0.52 0.95 1.02 313
-sigma_obs[2] 0.12 0.47 0.83 1.01 294
+sigma_obs[1] 0.50 0.71 1.00 1 10876
+sigma_obs[2] 0.45 0.62 0.92 1 13362
GAM coefficient (beta) estimates:
2.5% 50% 97.5% Rhat n_eff
-(Intercept) -2.60 -0.0490 2.40 1.00 467
-te(sellafield,lag):weights_ss.1 -0.56 -0.0640 0.41 1.00 1830
-te(sellafield,lag):weights_ss.2 -0.58 -0.0640 0.35 1.03 169
-te(sellafield,lag):weights_ss.3 -0.65 -0.1300 0.30 1.02 252
-te(sellafield,lag):weights_ss.4 -0.81 -0.2200 0.51 1.03 183
-te(sellafield,lag):weights_ss.5 -0.50 -0.0580 0.64 1.04 102
-te(sellafield,lag):weights_ss.6 -0.57 0.0600 0.37 1.04 102
-te(sellafield,lag):weights_ss.7 -0.40 0.1200 0.45 1.04 120
-te(sellafield,lag):weights_ss.8 -0.35 0.0900 0.61 1.03 134
-te(sellafield,lag):weights_ss.9 -0.45 0.0420 0.71 1.02 170
-te(sellafield,lag):weights_ss.10 -0.36 0.0910 0.46 1.02 234
-te(sellafield,lag):weights_ss.11 -0.30 0.1800 0.59 1.02 194
-te(sellafield,lag):weights_ss.12 -0.26 0.2400 0.84 1.01 377
-te(sellafield,lag):weights_ss.13 -0.37 0.1200 0.66 1.00 1801
-te(sellafield,lag):weights_ss.14 -0.74 -0.1500 0.27 1.03 178
-te(sellafield,lag):weights_ss.15 -1.00 -0.2500 0.15 1.03 126
-te(sellafield,lag):weights_ss.16 -0.64 -0.1200 0.68 1.03 159
-te(sellafield,lag):weights_sw.1 -0.51 -0.0310 0.40 1.00 2533
-te(sellafield,lag):weights_sw.2 -0.43 -0.0490 0.35 1.01 1066
-te(sellafield,lag):weights_sw.3 -0.46 -0.0720 0.35 1.00 2229
-te(sellafield,lag):weights_sw.4 -0.68 -0.1500 0.33 1.01 1129
-te(sellafield,lag):weights_sw.5 -0.43 -0.0420 0.33 1.00 2624
-te(sellafield,lag):weights_sw.6 -0.21 0.0650 0.39 1.01 556
-te(sellafield,lag):weights_sw.7 -0.17 0.1000 0.42 1.00 2008
-te(sellafield,lag):weights_sw.8 -0.28 0.1000 0.47 1.00 2500
-te(sellafield,lag):weights_sw.9 -0.40 0.0610 0.49 1.01 1092
-te(sellafield,lag):weights_sw.10 -0.24 0.0930 0.45 1.01 713
-te(sellafield,lag):weights_sw.11 -0.20 0.1400 0.51 1.01 1597
-te(sellafield,lag):weights_sw.12 -0.26 0.2000 0.72 1.00 1941
-te(sellafield,lag):weights_sw.13 -0.43 0.0560 0.56 1.00 2152
-te(sellafield,lag):weights_sw.14 -0.51 -0.0860 0.30 1.00 1519
-te(sellafield,lag):weights_sw.15 -0.54 -0.1400 0.24 1.01 885
-te(sellafield,lag):weights_sw.16 -0.60 -0.1200 0.36 1.00 3155
-te(la_hague,lag):weights_ss.1 -0.54 0.0110 0.56 1.00 1298
-te(la_hague,lag):weights_ss.2 -0.46 -0.0200 0.40 1.01 1193
-te(la_hague,lag):weights_ss.3 -0.56 -0.0670 0.31 1.01 977
-te(la_hague,lag):weights_ss.4 -0.83 -0.1300 0.31 1.00 1358
-te(la_hague,lag):weights_ss.5 -0.58 -0.0520 0.33 1.02 258
-te(la_hague,lag):weights_ss.6 -0.38 -0.0160 0.25 1.00 343
-te(la_hague,lag):weights_ss.7 -0.27 0.0210 0.30 1.00 2512
-te(la_hague,lag):weights_ss.8 -0.34 0.0530 0.47 1.00 1205
-te(la_hague,lag):weights_ss.9 -0.49 -0.0260 0.31 1.02 277
-te(la_hague,lag):weights_ss.10 -0.34 -0.0170 0.23 1.00 309
-te(la_hague,lag):weights_ss.11 -0.27 0.0140 0.25 1.00 896
-te(la_hague,lag):weights_ss.12 -0.27 0.0590 0.44 1.01 685
-te(la_hague,lag):weights_ss.13 -0.46 0.0180 0.51 1.00 2421
-te(la_hague,lag):weights_ss.14 -0.35 0.0052 0.38 1.00 941
-te(la_hague,lag):weights_ss.15 -0.35 0.0029 0.35 1.01 665
-te(la_hague,lag):weights_ss.16 -0.48 -0.0170 0.59 1.01 533
-te(la_hague,lag):weights_sw.1 -0.50 -0.0057 0.62 1.01 480
-te(la_hague,lag):weights_sw.2 -0.43 -0.0320 0.41 1.01 550
-te(la_hague,lag):weights_sw.3 -0.44 -0.0840 0.25 1.01 1128
-te(la_hague,lag):weights_sw.4 -0.61 -0.1300 0.31 1.01 790
-te(la_hague,lag):weights_sw.5 -0.36 0.0170 0.45 1.00 747
-te(la_hague,lag):weights_sw.6 -0.20 0.0740 0.36 1.00 1792
-te(la_hague,lag):weights_sw.7 -0.15 0.1200 0.43 1.00 1062
-te(la_hague,lag):weights_sw.8 -0.26 0.1400 0.56 1.01 352
-te(la_hague,lag):weights_sw.9 -0.33 0.0180 0.38 1.00 788
-te(la_hague,lag):weights_sw.10 -0.22 0.0280 0.28 1.01 800
-te(la_hague,lag):weights_sw.11 -0.18 0.0470 0.30 1.00 1404
-te(la_hague,lag):weights_sw.12 -0.21 0.1000 0.45 1.01 621
-te(la_hague,lag):weights_sw.13 -0.46 -0.0260 0.49 1.01 327
-te(la_hague,lag):weights_sw.14 -0.35 -0.0380 0.29 1.01 876
-te(la_hague,lag):weights_sw.15 -0.36 -0.0560 0.30 1.00 1965
-te(la_hague,lag):weights_sw.16 -0.54 -0.0920 0.38 1.01 685
-s(series).1 -2.30 -0.1500 2.10 1.01 550
-s(series).2 -2.10 -0.0410 2.30 1.00 387
+(Intercept) -2.30 -0.2000 2.20 1.01 654
+te(sellafield,lag):weights_ss.1 -0.59 -0.0830 0.39 1.00 9934
+te(sellafield,lag):weights_ss.2 -0.52 -0.0880 0.33 1.00 7692
+te(sellafield,lag):weights_ss.3 -0.63 -0.1600 0.29 1.00 4931
+te(sellafield,lag):weights_ss.4 -0.84 -0.2500 0.30 1.00 7858
+te(sellafield,lag):weights_ss.5 -0.50 -0.0650 0.33 1.00 6516
+te(sellafield,lag):weights_ss.6 -0.25 0.0730 0.40 1.00 6123
+te(sellafield,lag):weights_ss.7 -0.17 0.1500 0.48 1.00 5630
+te(sellafield,lag):weights_ss.8 -0.30 0.1200 0.52 1.00 7406
+te(sellafield,lag):weights_ss.9 -0.42 0.0480 0.54 1.00 7035
+te(sellafield,lag):weights_ss.10 -0.25 0.1200 0.49 1.00 6137
+te(sellafield,lag):weights_ss.11 -0.15 0.2300 0.64 1.00 5503
+te(sellafield,lag):weights_ss.12 -0.23 0.2700 0.85 1.00 6259
+te(sellafield,lag):weights_ss.13 -0.40 0.0900 0.67 1.00 6490
+te(sellafield,lag):weights_ss.14 -0.62 -0.1700 0.21 1.00 5636
+te(sellafield,lag):weights_ss.15 -0.76 -0.2800 0.10 1.00 2658
+te(sellafield,lag):weights_ss.16 -0.68 -0.1600 0.40 1.00 6406
+te(sellafield,lag):weights_sw.1 -0.52 -0.0580 0.38 1.00 9113
+te(sellafield,lag):weights_sw.2 -0.41 -0.0420 0.35 1.00 7492
+te(sellafield,lag):weights_sw.3 -0.48 -0.0700 0.34 1.00 7402
+te(sellafield,lag):weights_sw.4 -0.68 -0.1500 0.35 1.00 7436
+te(sellafield,lag):weights_sw.5 -0.44 -0.0500 0.32 1.00 6211
+te(sellafield,lag):weights_sw.6 -0.20 0.0840 0.38 1.00 4840
+te(sellafield,lag):weights_sw.7 -0.15 0.1200 0.42 1.00 5306
+te(sellafield,lag):weights_sw.8 -0.27 0.0980 0.48 1.00 6529
+te(sellafield,lag):weights_sw.9 -0.36 0.0610 0.51 1.00 6741
+te(sellafield,lag):weights_sw.10 -0.21 0.1200 0.46 1.00 6030
+te(sellafield,lag):weights_sw.11 -0.17 0.1600 0.51 1.00 5742
+te(sellafield,lag):weights_sw.12 -0.26 0.1900 0.67 1.00 6809
+te(sellafield,lag):weights_sw.13 -0.42 0.0570 0.58 1.00 8426
+te(sellafield,lag):weights_sw.14 -0.48 -0.0870 0.28 1.00 7450
+te(sellafield,lag):weights_sw.15 -0.52 -0.1500 0.21 1.00 7014
+te(sellafield,lag):weights_sw.16 -0.60 -0.1300 0.34 1.00 3830
+te(la_hague,lag):weights_ss.1 -0.57 0.0082 0.55 1.00 8723
+te(la_hague,lag):weights_ss.2 -0.47 -0.0210 0.41 1.00 6200
+te(la_hague,lag):weights_ss.3 -0.57 -0.0830 0.29 1.00 4778
+te(la_hague,lag):weights_ss.4 -0.85 -0.1600 0.30 1.00 4600
+te(la_hague,lag):weights_ss.5 -0.48 -0.0390 0.37 1.00 5619
+te(la_hague,lag):weights_ss.6 -0.31 -0.0160 0.27 1.00 4805
+te(la_hague,lag):weights_ss.7 -0.27 0.0250 0.31 1.00 4561
+te(la_hague,lag):weights_ss.8 -0.33 0.0720 0.50 1.00 4392
+te(la_hague,lag):weights_ss.9 -0.41 -0.0180 0.34 1.00 4810
+te(la_hague,lag):weights_ss.10 -0.30 -0.0190 0.24 1.00 4659
+te(la_hague,lag):weights_ss.11 -0.27 0.0110 0.27 1.00 4168
+te(la_hague,lag):weights_ss.12 -0.27 0.0740 0.47 1.00 4603
+te(la_hague,lag):weights_ss.13 -0.47 0.0099 0.49 1.00 6458
+te(la_hague,lag):weights_ss.14 -0.36 -0.0180 0.32 1.00 5808
+te(la_hague,lag):weights_ss.15 -0.37 -0.0180 0.33 1.00 5829
+te(la_hague,lag):weights_ss.16 -0.49 -0.0120 0.50 1.00 6616
+te(la_hague,lag):weights_sw.1 -0.61 -0.0180 0.54 1.00 1188
+te(la_hague,lag):weights_sw.2 -0.47 -0.0430 0.38 1.00 2017
+te(la_hague,lag):weights_sw.3 -0.45 -0.0910 0.25 1.00 4502
+te(la_hague,lag):weights_sw.4 -0.65 -0.1400 0.29 1.00 9352
+te(la_hague,lag):weights_sw.5 -0.34 0.0460 0.45 1.00 7144
+te(la_hague,lag):weights_sw.6 -0.18 0.0960 0.39 1.00 5600
+te(la_hague,lag):weights_sw.7 -0.14 0.1300 0.41 1.00 4613
+te(la_hague,lag):weights_sw.8 -0.22 0.1500 0.55 1.00 6994
+te(la_hague,lag):weights_sw.9 -0.32 0.0320 0.39 1.00 6737
+te(la_hague,lag):weights_sw.10 -0.21 0.0430 0.30 1.00 4987
+te(la_hague,lag):weights_sw.11 -0.18 0.0620 0.31 1.00 4041
+te(la_hague,lag):weights_sw.12 -0.22 0.1100 0.47 1.00 6193
+te(la_hague,lag):weights_sw.13 -0.47 -0.0410 0.43 1.00 3398
+te(la_hague,lag):weights_sw.14 -0.37 -0.0470 0.30 1.00 5712
+te(la_hague,lag):weights_sw.15 -0.37 -0.0560 0.30 1.00 5963
+te(la_hague,lag):weights_sw.16 -0.56 -0.0850 0.39 1.00 8213
+s(series).1 -2.50 -0.1400 1.80 1.01 561
+s(series).2 -2.20 0.0140 2.00 1.01 497
GAM group-level estimates:
- 2.5% 50% 97.5% Rhat n_eff
-mean(s(series)) -2.000 -0.10 2.0 1.01 418
-sd(s(series)) 0.097 0.33 1.7 1.00 1417
+ 2.5% 50% 97.5% Rhat n_eff
+mean(s(series)) -1.800 -0.058 1.7 1.00 9811
+sd(s(series)) 0.095 0.340 2.2 1.02 233
Approximate significance of GAM smooths:
- edf Ref.df Chi.sq p-value
-te(sellafield,lag):weights_ss 11.4965 16 2.920 0.995
-te(sellafield,lag):weights_sw 4.9732 16 2.439 0.783
-te(la_hague,lag):weights_ss 8.6088 16 0.374 1.000
-te(la_hague,lag):weights_sw 3.4468 16 1.268 0.864
-s(series) 0.4608 2 0.008 0.928
-
-standard deviation:
- 2.5% 50% 97.5% Rhat n_eff
-sigma[1] 0.11 0.43 0.92 1.03 232
-sigma[2] 0.10 0.39 0.79 1.01 445
-
-precision parameter:
- 2.5% 50% 97.5% Rhat n_eff
-tau[1] 1.2 5.4 82 1.03 218
-tau[2] 1.6 6.6 92 1.01 675
-
-autoregressive coef 1:
- 2.5% 50% 97.5% Rhat n_eff
-ar1[1] -0.71 0.28 0.93 1.01 664
-ar1[2] -0.70 0.23 0.91 1.00 1279
+ edf Ref.df Chi.sq p-value
+te(sellafield,lag):weights_ss 3.5137 16 2.376 0.438
+te(sellafield,lag):weights_sw 3.1936 16 1.540 0.599
+te(la_hague,lag):weights_ss 1.5923 16 0.099 0.930
+te(la_hague,lag):weights_sw 1.5375 16 0.926 0.616
+s(series) 0.4746 2 0.003 0.955
Stan MCMC diagnostics:
✔ No issues with effective samples per iteration
✔ Rhat looks good for all parameters
-✖ 2322 of 16000 iterations ended with a divergence (14.5125%)
- Try a larger adapt_delta to remove divergences
+✔ No issues with divergences
✔ No issues with maximum tree depth
Samples were drawn using sampling(hmc). For each parameter, n_eff is a
@@ -4194,51 +3913,51 @@ Samples were drawn using sampling(hmc). For each parameter, n_eff is a
Use how_to_cite() to get started describing this model
mcmc_plot(model_I129_mvgam)mcmc_plot(model_I129_mvgam)plot_mvgam_smooth(model_I129_mvgam, smooth = 1)plot_mvgam_smooth(model_I129_mvgam, smooth = 1)plot_mvgam_smooth(model_I129_mvgam, smooth = 2)plot_mvgam_smooth(model_I129_mvgam, smooth = 2)plot_mvgam_smooth(model_I129_mvgam, smooth = 3)plot_mvgam_smooth(model_I129_mvgam, smooth = 3)plot_mvgam_smooth(model_I129_mvgam, smooth = 4)plot_mvgam_smooth(model_I129_mvgam, smooth = 4)plot(hindcast(model_I129_mvgam), series = 1)plot(hindcast(model_I129_mvgam), series = 1)No non-missing values in test_observations; cannot calculate forecast score
plot(hindcast(model_I129_mvgam), series = 2)plot(hindcast(model_I129_mvgam), series = 2)No non-missing values in test_observations; cannot calculate forecast score
plot_mvgam_randomeffects(model_I129_mvgam)plot_mvgam_randomeffects(model_I129_mvgam)And the diagnostics:
plot_mvgam_resids(model_I129_mvgam)plot_mvgam_resids(model_I129_mvgam)ppc(model_I129_mvgam)ppc(model_I129_mvgam)