Skip to contents

dissmapr

A Novel Framework for Automated Compositional Dissimilarity and Biodiversity Turnover Analysis


1. Predict current Zeta Diversity (zeta2) using predict_dissim()

In this step we use our fitted order-2 GDM (zeta2) to generate a spatial map of pairwise compositional turnover (ζ₂) under current conditions. By calling predict_dissim() we

  1. compute each site’s sampling‐effort‐adjusted species richness and mean distance to all other sites;
  2. apply the same environmental I-spline transformations used in the model;
  3. predict the Jaccard-scaled turnover (ζ₂) on the 0–1 scale;
  4. optionally plot the resulting heatmap with your study boundary overlaid.

We set a random seed for reproducibility (so Monte Carlo sampling inside predict_dissim() yields the same results each time), pull out just the species columns once, then inspect the returned predictors_df to confirm its dimensions, column names, and a quick peek at the key model outputs.

# Predict current zeta diversity using `predict_dissim` with sampling effort, geographic distance and environmental variables
# Only non-colinear environmental variables used in `zeta2` model
set.seed(123) # set.seed to generate exactly the same random results i.e. sam=100
spp_cols = names(grid_spp_pa[,-(1:7)])
predictors_df   = predict_dissim(
  grid_spp      = grid_spp_pa,
  species_cols  = spp_cols,
  env_vars      = env_vars_reduced,# env_vars_reduced[,-8]
  zeta_model    = zeta2, # From simple order 2 run above
  # zeta_model = ispline_gdm_tab$zeta_gdm_list[[1]], # From list of Zeta.msgdm models
  grid_xy       = grid_env,
  x_col         = "centroid_lon",
  y_col         = "centroid_lat",
  bndy_fc       = rsa, # Optional feature collection to plot as boundary
  show_plot     = TRUE
)


# Check results
dim(predictors_df)
#> [1] 415  14
names(predictors_df)
#>  [1] "temp_mean"        "iso"              "temp_wetQ"        "temp_dryQ"       
#>  [5] "rain_dry"         "rain_warmQ"       "obs_sum"          "distance"        
#>  [9] "richness"         "pred_zeta"        "pred_zetaExp"     "log_pred_zetaExp"
#> [13] "centroid_lon"     "centroid_lat"
head(predictors_df[,5:11])
#>    rain_dry  rain_warmQ    obs_sum distance richness  pred_zeta pred_zetaExp
#> 1 -1.227648 -0.17604221 -0.2926985 903.0437        2 0.01930100    0.5048251
#> 2 -1.223151 -0.11642238 -0.2340532 915.4655       31 0.02632131    0.5065799
#> 3 -1.154108  0.06618959 -0.2818954 931.1100       10 0.01570100    0.5039252
#> 4 -1.149209  0.38933728 -0.2865253 949.9390        7 0.01506016    0.5037650
#> 5 -1.080356  0.62285852 -0.2880686 971.8672        6 0.01472191    0.5036804
#> 6 -1.048567  0.57126837 -0.1321955 996.7561       76 0.01438702    0.5035967

2. Predict future Zeta Diversity using predict_dissim()

Below we expand our workflow to forecast how ζ₂ respond to three extreme climate futures (2030, 2040, 2050) alongside the current scenario.

First, we define and center-scale each future by adding large temperature increments and rainfall multipliers, then bundle them into a named list of four environmental data frames:

# 1. Identify species & env columns
# spp_cols  = names(grid_spp_pa)[-(1:7)]
all_vars  = names(env_vars_reduced)
temp_vars = grep("^temp", all_vars, value = TRUE)
rain_vars = grep("^rain", all_vars, value = TRUE)
iso_vars = grep("^iso", all_vars, value = TRUE)
obs_var = "obs_sum"

# 2. Extreme future shifts
horizons     = c("2030","2040","2050")
mean_delta   = list(
  temp  = c("2030"= +2,  "2040"= +4,  "2050"= +6),
  iso   = c("2030"= +0.5,"2040"= +1.0,"2050"= +1.5),
  rain  = c("2030"= 0.9, "2040"= 0.8, "2050"= 0.7),
  effort= c("2030"= 1.3, "2040"= 1.6, "2050"= 2.0)
)
exagg_factor = c("2030"=1.5, "2040"=2.0, "2050"=2.5)

# 3) helper to amplify deviation from the mean
amplify = function(x, factor) {
  m = mean(x, na.rm=TRUE)
  m + (x - m) * factor
}

# # 4. Save original scaling parameters
# sc_params = scale(env_vars_reduced)
# mu    = attr(sc_params, "scaled:center")
# sigma = attr(sc_params, "scaled:scale")

# 4. Build list of future env tibbles
env_scenarios = c(
  list(current = env_vars_reduced),
  map(horizons, function(yr) {
    df = env_vars_reduced
    # mean shifts
    df[temp_vars] = df[temp_vars] + mean_delta$temp[yr]
    df[iso_vars]  = df[iso_vars]  + mean_delta$iso[yr]
    df[rain_vars] = df[rain_vars] * mean_delta$rain[yr]
    df[[obs_var]] = df[[obs_var]] * mean_delta$effort[yr]
    # amplify spatial variation
    df[temp_vars] = map_dfc(df[temp_vars], amplify, factor = exagg_factor[yr])
    df[iso_vars]  = map_dfc(df[iso_vars],  amplify, factor = exagg_factor[yr])
    # clamp obs_sum
    df[[obs_var]] = pmin(pmax(df[[obs_var]], 50), 8000)
    df
  }) |> set_names(horizons)
)
# names(env_scenarios) = names(horizons)

# 5. Prepend current conditions
# env_scenarios = c(list(current = env_vars_reduced), env_futures)
str(env_scenarios, max.level = 1)
#> List of 4
#>  $ current: tibble [415 × 7] (S3: tbl_df/tbl/data.frame)
#>  $ 2030   : tibble [415 × 7] (S3: tbl_df/tbl/data.frame)
#>  $ 2040   : tibble [415 × 7] (S3: tbl_df/tbl/data.frame)
#>  $ 2050   : tibble [415 × 7] (S3: tbl_df/tbl/data.frame)

Next, we loop through each scenario, re-apply the original centering and scaling, and call our predict_dissim() helper to compute ζ₂. We tag each result with its scenario name and combine them into one tidy data frame:

set.seed(123)

scenario_dfs = purrr::imap(env_scenarios, ~ {
  df = predict_dissim(
    grid_spp     = grid_spp,
    species_cols = spp_cols,
    env_vars     = .x,
    zeta_model   = zeta2,
    grid_xy      = grid_env,
    x_col        = "centroid_lon",
    y_col        = "centroid_lat",
    skip_scale   = FALSE,
    show_plot    = FALSE
  )
  df$scenario = .y
  df
})
str(scenario_dfs, max.level=1)
#> List of 4
#>  $ current:'data.frame': 415 obs. of  15 variables:
#>  $ 2030   :'data.frame': 415 obs. of  15 variables:
#>  $ 2040   :'data.frame': 415 obs. of  15 variables:
#>  $ 2050   :'data.frame': 415 obs. of  15 variables:

# all_preds = bind_rows(scenario_dfs) %>%
#   mutate(scenario = factor(scenario, levels = c("current", names(temp_shifts))))
all_preds = bind_rows(scenario_dfs) 
head(all_preds)
#>   temp_mean         iso temp_wetQ temp_dryQ  rain_dry  rain_warmQ    obs_sum
#> 1  1.841044  0.05124962  1.426512 0.5759178 -1.227648 -0.17604221 -0.2926985
#> 2  1.770287 -0.16040417  1.434249 0.5279913 -1.223151 -0.11642238 -0.2340532
#> 3  1.888447  0.23118933  1.406438 0.6828768 -1.154108  0.06618959 -0.2818954
#> 4  2.222843  0.75512585  1.510132 0.9797353 -1.149209  0.38933728 -0.2865253
#> 5  2.424011  1.38701985  1.551582 1.1832058 -1.080356  0.62285852 -0.2880686
#> 6  2.809129  1.80976342  1.766238 1.4127484 -1.048567  0.57126837 -0.1321955
#>   distance richness  pred_zeta pred_zetaExp log_pred_zetaExp centroid_lon
#> 1 903.0437        3 0.01930100    0.5048251       -0.6835432        28.75
#> 2 915.4655       41 0.02632131    0.5065799       -0.6800731        29.25
#> 3 931.1100       10 0.01570100    0.5039252       -0.6853275        29.75
#> 4 949.9390        7 0.01506016    0.5037650       -0.6856454        30.25
#> 5 971.8672        6 0.01472191    0.5036804       -0.6858133        30.75
#> 6 996.7561      107 0.01438702    0.5035967       -0.6859795        31.25
#>   centroid_lat scenario
#> 1    -22.25004  current
#> 2    -22.25004  current
#> 3    -22.25004  current
#> 4    -22.25004  current
#> 5    -22.25004  current
#> 6    -22.25004  current

We can then quickly compare the spatial ζ₂ surfaces under each future:

ggplot(all_preds,
       aes(centroid_lon, centroid_lat, fill = pred_zetaExp)) +
  geom_tile() +
  facet_wrap(~ scenario, ncol = 2) +
  scale_fill_viridis_c(direction = -1, name = expression(zeta[2])) +
  geom_sf(data = rsa, fill = NA, color = "black", inherit.aes = FALSE) +
  coord_sf() +
  labs(x = "Longitude", y = "Latitude",
       title = expression("Predicted ζ"[2] * " under current & future scenarios")) +
  theme_minimal() +
  theme(strip.text = element_text(face = "bold"),
        panel.grid = element_blank())