Check model goodness of fit via AUC, TSS and confusion matrices

I will plot the AUC curves and variable contribution graphs by combining all 4 models into one plot. This will allow for a direct comparison of the AUC (goodness of fit) and var contribution between models.

Setup

# general tools
library(tidyverse)  #data manipulation
library(here) #making directory pathways easier on different instances
# here::here() starts at the root folder of this package.
library(devtools)

# SDMtune and dependencies
library(SDMtune) # main package used to run SDMs
library(dismo) # package underneath SDMtune
library(rJava) # for running MaxEnt
library(plotROC) # plots ROCs

# html tools
library(kableExtra)
library(webshot)
library(webshot2)

These plots depict the activity of the variables put into the MaxEnt models. I will create a combined figure for the 3 models in the regional_ensemble

mypath <- file.path(here::here() %>% 
                       dirname(),
                     "maxent/models")

ensemble_colors <- c(
  "Rn (native)" = "#4daf4a",
  "Ri.NAmerica" =  "#e41a1c",
  "Ri.Asia" = "#377eb8"
)

regional_native_model <- read_rds(file = file.path(mypath, "slf_regional_native_v3", "regional_native_model.rds"))

regional_invaded_model <- read_rds(file = file.path(mypath, "slf_regional_invaded_v7", "regional_invaded_model.rds"))

regional_invaded_asian_model <- read_rds(file = file.path(mypath, "slf_regional_invaded_asian_v2", "regional_invaded_asian_model.rds"))

regional_native_test <- read_rds(file = file.path(mypath, "slf_regional_native_v3", "regional_native_test.rds"))

regional_invaded_test <- read_rds(file = file.path(mypath, "slf_regional_invaded_v7", "regional_invaded_test.rds"))

regional_invaded_asian_test <- read_rds(file = file.path(mypath, "slf_regional_invaded_asian_v2", "regional_invaded_asian_test.rds"))

ROC regional ensemble

regional_native_ROC <- SDMtune::plotROC(
  model = regional_native_model,
  test = regional_native_test
) %>%
  ggplot_build()


regional_invaded_ROC <- SDMtune::plotROC(
  model = regional_invaded_model,
  test = regional_invaded_test
) %>%
  ggplot_build()

regional_invaded_asian_ROC <- SDMtune::plotROC(
  model = regional_invaded_asian_model,
  test = regional_invaded_asian_test
) %>%
  ggplot_build()

ROC_ensemble <- ggplot() +
  # native model data
  geom_line(data = regional_native_ROC$data[[1]], aes(x = x, y = y, color = "Rn (native)", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # invaded model data
  geom_line(data = regional_invaded_ROC$data[[1]], aes(x = x, y = y, color = "Ri.NAmerica", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # invaded_asian model data
  geom_line(data = regional_invaded_asian_ROC$data[[1]], aes(x = x, y = y, color = "Ri.Asia", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # midpoint line
  geom_abline(slope = 1, linetype = "dashed") +
  # scales
  scale_x_continuous(name = "False positive rate (1 - specificity)", breaks = c(0, 0.25, 0.5, 0.75, 1), limits = c(0, 1)) +
  scale_y_continuous(name = "True positive rate (sensitivity)", breaks = c(0, 0.25, 0.5, 0.75, 1), limits = c(0, 1)) +
  labs(
    title = "ROC curve (true vs false positive rate) for 'regional_ensemble' models"
    ) +
  theme_bw() +
  # aes
  scale_color_manual(
    name = "model",
    values = ensemble_colors,
    aesthetics = "color"
  ) +
  scale_linetype_manual(
    name = "Data segment",
    values = c("solid", "dotted"),
    labels = c("Test", "Train"),
    guide = guide_legend(reverse = TRUE)
  ) +
  theme(legend.position = "bottom") +
  coord_fixed(ratio = 1)

ggsave(
  ROC_ensemble, 
  filename = file.path(
    here::here(), "vignette-outputs", "figures", "ROC_plot_regional_ensemble.jpg"
    ),
  height = 8, 
  width = 8,
  device = jpeg,
  dpi = "retina"
  )

Add global model to AUC plot

ROC

global_model <- read_rds(file = file.path(mypath, "slf_global_v3", "global_model.rds"))
global_test <- read_rds(file = file.path(mypath, "slf_global_v3", "global_test.rds"))

ensemble_colors <- c(
  "Rn (native)" = "#4daf4a",
  "Ri.NAmerica" =  "#e41a1c",
  "Ri.Asia" = "#377eb8",
  "global" = "black"
)

global_ROC <- SDMtune::plotROC(
  model = global_model@models[[5]], #pick one because they did not vary too much
  test = global_test
) %>%
  ggplot_build()

ROC_ensemble_global <- ggplot() +
  # global
  geom_line(data = global_ROC$data[[1]], aes(x = x, y = y, color = "global", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # native model data
  geom_line(data = regional_native_ROC$data[[1]], aes(x = x, y = y, color = "Rn (native)", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # invaded model data
  geom_line(data = regional_invaded_ROC$data[[1]], aes(x = x, y = y, color = "Ri.NAmerica", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # invaded_asian model data
  geom_line(data = regional_invaded_asian_ROC$data[[1]], aes(x = x, y = y, color = "Ri.Asia", group = group, linetype = as.factor(group)), linewidth = 0.8) +
  # midpoint line
  geom_abline(slope = 1, linetype = "dashed") +
  # scales
  scale_x_continuous(name = "False positive rate (1 - specificity)", breaks = c(0, 0.25, 0.5, 0.75, 1), limits = c(0, 1)) +
  scale_y_continuous(name = "True positive rate (sensitivity)", breaks = c(0, 0.25, 0.5, 0.75, 1), limits = c(0, 1)) +
  labs(
    title = "ROC curve (true vs false positive rate) for 'regional_ensemble' and 'global' models"
    ) +
  theme_bw() +
  # aes
  scale_color_manual(
    name = "model",
    values = ensemble_colors,
    aesthetics = "color"
  ) +
  scale_linetype_manual(
    name = "Data segment",
    values = c("solid", "dotted"),
    labels = c("Test", "Train"),
    guide = guide_legend(reverse = TRUE)
  ) +
  theme(legend.position = "bottom") +
  coord_fixed(ratio = 1)

ROC_ensemble_global

ggsave(
  ROC_ensemble_global, 
  filename = file.path(
    here::here(), "vignette-outputs", "figures", "ROC_plot_regional_ensemble_global.jpg"
    ),
  height = 8, 
  width = 8,
  device = jpeg,
  dpi = "retina"
  )

I will create a table for some important summary statistics for the models. These include sensitivity, specificity, commission error, and omission error. These are calculated according to the cloglog MTSS threshold per model.

Sensitivity and Specificity of Models

mypath <- file.path(here::here() %>% 
                       dirname(),
                     "maxent/models")

We calculate these according to the cloglog MTSS threshold per model

# global
global_conf_matr <- read_csv(file = file.path(mypath, "slf_global_v3", "global_thresh_confusion_matrix_all_iterations.csv")) %>%
  # only MTSS threshold
  slice(7)

## Rows: 9 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): threshold_type
## dbl (5): threshold_value, tp_mean, fp_mean, fn_mean, tn_mean
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# regional models
# native
regional_native_conf_matr <- read_csv(file = file.path(mypath, "slf_regional_native_v3", "regional_native_thresh_confusion_matrix.csv")) %>%
  slice(7)

## Rows: 9 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): threshold_type
## dbl (5): threshold_value, tp, fp, fn, tn
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# invaded_asia
regional_invaded_asian_conf_matr <- read_csv(file = file.path(mypath, "slf_regional_invaded_asian_v2", "regional_invaded_asian_thresh_confusion_matrix.csv")) %>%
  slice(7)

## Rows: 9 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): threshold_type
## dbl (5): threshold_value, tp, fp, fn, tn
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# invaded_NAmerica
regional_invaded_conf_matr <- read_csv(file = file.path(mypath, "slf_regional_invaded_v7", "regional_invaded_thresh_confusion_matrix.csv")) %>%
  slice(7)

## Rows: 9 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): threshold_type
## dbl (5): threshold_value, tp, fp, fn, tn
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Be sure to change the number of test positives and background negatives for each model if this changes.

model_sens_spec <- tibble(
  metric = rep(c("sensitivity", "specificity", "commission error", "omission error"), 4),
  description = rep(c("true_positive rate", "true_negative rate", "1 - true_negative rate", "false_negative rate"), 4),
    model = c(
    rep("global", 4),
    rep("regional\nnative", 4),
    rep("regional\ninvaded NAmerica", 4),
    rep("regional\ninvaded Asia", 4)
  ),
  thresh = "MTSS.cloglog",
  thresh_value = c(
    rep(global_conf_matr$threshold_value, 4), 
    rep(regional_native_conf_matr$threshold_value, 4), 
    rep(regional_invaded_conf_matr$threshold_value, 4),
    rep(regional_invaded_asian_conf_matr$threshold_value, 4)
    ),
  test_pos = c(
    rep(155, 4), # global
    rep(54, 4),  # native
    rep(68, 4), # invaded_NAmerica
    rep(40, 4) # invaded_Asia
  ),
  bg_neg = c(
    rep(20000, 4), # global
    rep(10000, 12) # regional
  ),
  value = c(
    # global
    round(global_conf_matr$tp_mean / 155, 4), round(global_conf_matr$tn_mean / 20000, 4), round(1 - (global_conf_matr$tn_mean / 20000), 4), round((global_conf_matr$fn_mean / 20000), 4),
    # native
    round(regional_native_conf_matr$tp / 54, 4), round(regional_native_conf_matr$tn / 10000, 4), round(1 - (regional_native_conf_matr$tn / 10000), 4), round((regional_native_conf_matr$fn / 10000), 4),
    # invaded_NAmerica
    round(regional_invaded_conf_matr$tp / 68, 4), round(regional_invaded_conf_matr$tn / 10000, 4), round(1 - (regional_invaded_conf_matr$tn / 10000), 4), round((regional_invaded_conf_matr$fn / 10000), 4),
    # invaded_asia
    round(regional_invaded_asian_conf_matr$tp / 40, 4), round(regional_invaded_asian_conf_matr$tn / 10000, 4), round(1 - (regional_invaded_asian_conf_matr$tn / 10000), 4), round((regional_invaded_asian_conf_matr$fn / 10000), 4)
  )
)

write_csv(model_sens_spec, file.path(here::here(), "vignette-outputs", "data-tables", "model_sensitivity_specificity.csv"))

I will also write it to a kable so it can be printed

# add column formatting
model_sens_spec <- mutate(model_sens_spec, bg_neg = scales::label_comma() (bg_neg))

# convert to kable
model_sens_spec_kable <- knitr::kable(x = model_sens_spec, format = "html", escape = FALSE) %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = TRUE)

# save as .html
kableExtra::save_kable(
  model_sens_spec_kable, 
  file = file.path(here::here(), "vignette-outputs", "figures", "model_sensitivity_specificity.html"),
  self_contained = TRUE,
  bs_theme = "simplex"
  )

# initialize webshot by 
# webshot::install_phantomjs()
# convert to pdf
webshot::webshot(
  url = file.path(here::here(), "vignette-outputs", "figures", "model_sensitivity_specificity.html"),
  file = file.path(here::here(), "vignette-outputs", "figures", "model_sensitivity_specificity.jpg"),
  zoom = 2
)

# model extents

model_sens_spec_kable

References

Jiménez‐Valverde, A. (2012). Insights into the area under the receiver operating characteristic curve (AUC) as a discrimination measure in species distribution modelling. Global Ecology and Biogeography, 21(4), 498–507. https://doi.org/10.1111/j.1466-8238.2011.00683.x
Zou, K. H., O’Malley, A. J., & Mauri, L. (2007). Receiver-Operating Characteristic Analysis for Evaluating Diagnostic Tests and Predictive Models. Circulation, 115(5), 654–657. https://doi.org/10.1161/CIRCULATIONAHA.105.594929

Samuel M. Owens¹

2024-08-16

Setup

ROC regional ensemble

Add global model to AUC plot

ROC

Sensitivity and Specificity of Models

References

Check model goodness of fit via AUC, TSS and confusion matrices

Samuel M. Owens1

2024-08-16

Setup

ROC regional ensemble

Add global model to AUC plot

ROC

Sensitivity and Specificity of Models

References

Samuel M. Owens¹