1 Introduction

This script calculates genomic offset (GO) predictions for populations planted in the common garden. The aim is to evaluate whether there is a relationship between population fitness and GO predictions, which will allow us to assess the accuracy of the model. Genomic offset is defined as the distance between the genomic composition predicted by the GEA model under the climate of origin and under the climate of the common garden. Populations with higher genomic offset (i.e., those predicted to be less well adapted to the common garden environment) are expected to exhibit lower fitness in the common garden.

For this validation, we use an independent set of populations grown in a clonal bank located in central Spain.

2 Data

meta_data <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/Populations_common_garden.csv",h=T,sep=";",dec=",")

2.1 Climatic data

list_climate <- c("clim_df_scale_CG_2012","clim_df_scale_CG_2021","clim_df_natural_pop")

for(i in 1:length(list_climate)){
  
  climate <- list_climate[i]
  load(file=paste0("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/validation_GO/climatic_data/",climate,".Rdata"))
}

We need to add the climatic data for each populations for the CG df:

clim_data_replicated_2012 <- clim_df_scale_CG_2012[rep(1, 26), ] 
clim_df_tot_scale_CG_2012 <- data.frame(Population=meta_data$Population,clim_data_replicated_2012)

clim_data_replicated_2021 <- clim_df_scale_CG_2021[rep(1, 26), ] 
clim_df_tot_scale_CG_2021 <- data.frame(Population=meta_data$Population,clim_data_replicated_2021)

3 Genomic offset using Redundancy analysis

3.1 GEA models

We predicted the genomic composition for the populations both in their natural habitat and in the common garden.

Load GEA models

list_models <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_CG","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")

for(x in 1:length(list_models)){
  
  name_model<- list_models[x]
  load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/RDA/data/models/",name_model,".Rdata"))
}

3.2 Initial genomic data

We also need to load the original genomic dataset used to build the model, along with the set of outlier SNPs. This step is necessary because the predict function does not store the input data within the model object. Instead, it requires the initial dataset and outlier set in order to generate predictions with new data.

#genomic data
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")

genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616  


list_models <- c("random_neutral_set_SNPs_T_adapcon_gentree_bis","random_set_taxus","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","random_set_taxus_not_overlap_both_dataset_V2","unique_outliers")
for(i in 1:length(list_models)){
  model <- list_models[i]
  load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/",model,".Rdata"))
}

list_random_SNPs <- colnames(random_neutral_set_SNPs_T_adapcon_gentree_bis)
list_random_SNPs_same_AF <- random_set_taxus$name_snps
list_random_SNPs_same_AF_V2 <- random_set_taxus_not_overlap_both_dataset_V2$name_snps
#CG common
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/gen_matrix_imp_CG.Rdata")
CG_common_set <- intersect(outliers_set_final_overlapping_no_LD_LC_new_var, colnames(gen_matrix_imp_CG))
save(CG_common_set,file="C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/CG_common_set.Rdata")

Now, based on these models, we calculated the two genomic compositions. We can use the predict function to do this.

list_rda <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_CG","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")
list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")

K = 2 # number of rda axes retained

for(i in 1:length(list_rda)){
  rda_model <- get(list_rda[i])
  set <- list_set[i]
  # Natural populations
  clim_natural_pop <- get("clim_df_natural_pop")
  predictions_natural_pop <- data.frame(Population = meta_data$Population, predict(rda_model, clim_natural_pop, type = "lc", rank = K, scaling = "none"))
  assign(paste0("adaptive_values_", set, "_natural_pop"), predictions_natural_pop)
  
  # Common garden 2012
  clim_common_garden_2012 <- get("clim_data_replicated_2012")
  predictions_common_garden_2012 <- data.frame(Population = meta_data$Population, predict(rda_model, clim_common_garden_2012, type = "lc", rank = K, scaling = "none"))
  assign(paste0("adaptive_values_", set, "_common_garden_2012"), predictions_common_garden_2012)
  
  # Common garden 2021
  clim_common_garden_2021 <- get("clim_data_replicated_2021")
  predictions_common_garden_2021 <- data.frame(Population = meta_data$Population, predict(rda_model, clim_common_garden_2021, type = "lc", rank = K, scaling = "none"))
  assign(paste0("adaptive_values_", set, "_common_garden_2021"), predictions_common_garden_2021)
}

3.2.1 Genomic offset

Now we calculated the genomic offset with these genomic compositions.

genomic_offset_pop <- function(RDA, K, Past_score, Future_score,meta_data){
# Weights based on axis eigen values
  weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
  
  # Weighing the current and future adaptive indices based on the eigen values of the associated axes
  Past_score_df <- Past_score[,-c(1)]
  Proj_offset_past <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Past_score_df[,x]*weights[x])))
  Future_score_df <- Future_score[,-c(1)]
  Proj_offset_fut <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Future_score_df[,x]*weights[x])))
  
  
  #Now we want to calculate the distance between present and future for each RDA axis before doing it for both axis simultaneously
  Proj_offset <- list() 
  for(i in 1:K){
  Proj_offset[[i]] <- abs(Proj_offset_past[[i]] - Proj_offset_fut[[i]])
      names(Proj_offset)[i] <- paste0("RDA", as.character(i))
  }
  
  
  # Predict a global genetic offset, incorporating the K first axes weighted by their eigen values
  ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
  ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_past), function(x) dist(rbind(Proj_offset_past[x,], Proj_offset_fut[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
  #the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
  names(ras) <- "Global_offset"
  Proj_offset_global <- ras
  
  # Return prediction of genetic offset for each RDA axis and a global genetic offset for each population
  return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}

We ran the function on the predicted genomic composition:

list_rda <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_CG","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")
list_name <- c("all","random", "random_same_AF", "LC", "MC", "CG","random_AF_V2","all_outliers")

K = 2  # Number of RDA axes retained

# Loop over the RDA models
for (x in 1:length(list_rda)) {
  RDA <- get(list_rda[x])
  name <- list_name[x]
  
  # Load adaptive values for natural populations
  adaptive_natural_score <- get(paste0("adaptive_values_", name, "_natural_pop"))
  
  # Load adaptive values for common garden 2012
  CG_2012_score <- get(paste0("adaptive_values_", name, "_common_garden_2012"))
  
  # Load adaptive values for common garden 2021
  CG_2021_score <- get(paste0("adaptive_values_", name, "_common_garden_2021"))
  
  # Run the genomic offset calculation for Natural Populations with CG_2012
  Run_genomic_offset_pop_CG2012 <- genomic_offset_pop(RDA, K, adaptive_natural_score, CG_2012_score, meta_data)
  
  # Create a dataframe for CG_2012 results
  genomic_offset_df_CG2012 <- data.frame(
    Population = unlist(Run_genomic_offset_pop_CG2012$Population),
    Genomic_offset_CG2012 = unlist(Run_genomic_offset_pop_CG2012$Proj_offset_global)
  )
  
  # Order the dataframe by population
  genomic_offset_df_CG2012 <- genomic_offset_df_CG2012[order(genomic_offset_df_CG2012$Population),]
  colnames(genomic_offset_df_CG2012) <- c("Population", paste0("GO_", name, "_CG2012"))
  
  # Save the results for CG_2012
  assign(paste0("GO_RDA_", name, "_CG2012_Taxus"), genomic_offset_df_CG2012)
  save(list = paste0("GO_RDA_", name, "_CG2012_Taxus"), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/Standard_GO/RDA/data/CG_period/GO_RDA_", name, "_CG2012_Taxus.Rdata"))
  
  # Run the genomic offset calculation for Natural Populations with CG_2021
  Run_genomic_offset_pop_CG2021 <- genomic_offset_pop(RDA, K, adaptive_natural_score, CG_2021_score, meta_data)
  
  # Create a dataframe for CG_2021 results
  genomic_offset_df_CG2021 <- data.frame(
    Population = unlist(Run_genomic_offset_pop_CG2021$Population),
    Genomic_offset_CG2021 = unlist(Run_genomic_offset_pop_CG2021$Proj_offset_global)
  )
  
  # Order the dataframe by population
  genomic_offset_df_CG2021 <- genomic_offset_df_CG2021[order(genomic_offset_df_CG2021$Population),]
  colnames(genomic_offset_df_CG2021) <- c("Population", paste0("GO_", name, "_CG2021"))
  
  # Save the results for CG_2021
  assign(paste0("GO_RDA_", name, "_CG2021_Taxus"), genomic_offset_df_CG2021)
  save(list = paste0("GO_RDA_", name, "_CG2021_Taxus"), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/Standard_GO/RDA/data/CG_period/GO_RDA_", name, "_CG2021_Taxus.Rdata"))
}

4 Genomic offset Gradient forest

4.1 GEA models

Load the GEA models

list_models <- c("all","random_bis","random_same_AF_bis","LC","CG","random_same_AF_V2","all_outliers")

for(i in 1:length(list_models)){
  model <- list_models[i]
  load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
}

4.2 Interpolate/extrapolate the GEA relationship and calculate genomic offset

########## calculate adaptive offset for populations in space or time
genomic_offset_function <- function(gfMod, vars, env1, env2, combined=F,
                       pops = envPop$pop_code, weighted=FALSE){
  
  #gfMod = gf model for prediction
  #vars = names of env variables
  #env2 = new environment (new place / time)
    transEnv2 <- predict(gfMod, env2[,vars]) #new env
    transEnv1 <- predict(gfMod, env1[,vars]) #current env
  
  #calculate Euclidean distance in transformed env space
  num <- nrow(transEnv1)
  dOut <- lapply(1:num, function(x, tEnv1, tEnv2){
    as.numeric(pdist(tEnv1[x,],  tEnv2[x,])@dist)}, tEnv2=transEnv2, tEnv1=transEnv1)
  return(dOut)
}

We can apply this function to our dataset:

list_set <- c("Run_GEA_GF_all","Run_GEA_GF_random_bis","Run_GEA_GF_random_same_AF_bis","Run_GEA_GF_LC","Run_GEA_GF_CG","Run_GEA_GF_random_same_AF_V2","Run_GEA_GF_all_outliers")
list_CG <- c("2012","2021")
vars <- colnames(clim_df_natural_pop)
list_name <- c("all","random","random_same_AF","LC","CG","random_AF_V2","all_outliers")

#vars = past climatic data used in GEA

for(i in 1: length(list_set)){
  
  GF_run <- get(list_set[i])
  
  name <- list_name[i]

for(x in 1:length(list_CG)){
  
  
  name_clim <- list_CG[x]
  climate_CG <- get(paste0("clim_df_tot_scale_CG_",name_clim))

  climate_pop <- clim_df_natural_pop
  
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env1=climate_pop , env2=climate_CG, combined=F,
                       pops = clim_df_tot_scale_CG_2012$Population, weighted=FALSE)

#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)
genomic_offset_GF <- data.frame(Population=clim_df_tot_scale_CG_2012$Population,GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name)
assign(paste0("GO_standard_GF_",name,"_",name_clim),genomic_offset_GF)
#save
 save(list = paste0("GO_standard_GF_",name,"_",name_clim),file=paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/standard_GO/GF/data/CG_period/GO_standard_GF_",name,"_",name_clim,".Rdata"))
  }
}

devtools::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31 ucrt)
##  os       Windows 11 x64 (build 26100)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  French_France.utf8
##  ctype    French_France.utf8
##  tz       Europe/Paris
##  date     2025-09-22
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package        * version    date (UTC) lib source
##  bslib            0.6.1      2023-11-28 [1] CRAN (R 4.3.2)
##  cachem           1.0.8      2023-05-01 [1] CRAN (R 4.3.1)
##  class            7.3-22     2023-05-03 [2] CRAN (R 4.3.2)
##  classInt         0.4-10     2023-09-05 [1] CRAN (R 4.3.2)
##  cli              3.6.1      2023-03-23 [1] CRAN (R 4.3.1)
##  cluster          2.1.6      2023-12-01 [2] CRAN (R 4.3.2)
##  codetools        0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace       2.1-0      2023-01-23 [1] CRAN (R 4.3.1)
##  corrplot       * 0.92       2021-11-18 [1] CRAN (R 4.3.2)
##  DBI              1.2.2      2024-02-16 [1] CRAN (R 4.3.2)
##  devtools         2.4.5      2022-10-11 [1] CRAN (R 4.3.3)
##  digest           0.6.33     2023-07-07 [1] CRAN (R 4.3.1)
##  dplyr          * 1.1.4      2023-11-17 [1] CRAN (R 4.3.2)
##  e1071            1.7-14     2023-12-06 [1] CRAN (R 4.3.2)
##  ellipsis         0.3.2      2021-04-29 [1] CRAN (R 4.3.1)
##  evaluate         0.23       2023-11-01 [1] CRAN (R 4.3.2)
##  extendedForest   1.6.2      2023-12-12 [1] R-Forge (R 4.3.2)
##  fastmap          1.1.1      2023-02-24 [1] CRAN (R 4.3.1)
##  fs               1.6.3      2023-07-20 [1] CRAN (R 4.3.1)
##  generics         0.1.4      2025-05-09 [1] CRAN (R 4.3.2)
##  ggcorrplot     * 0.1.4.1    2023-09-05 [1] CRAN (R 4.3.3)
##  ggplot2        * 3.5.2      2025-04-09 [1] CRAN (R 4.3.2)
##  glue             1.7.0      2024-01-09 [1] CRAN (R 4.3.2)
##  gradientForest * 0.1-37     2023-08-19 [1] R-Forge (R 4.3.1)
##  gtable           0.3.6      2024-10-25 [1] CRAN (R 4.3.3)
##  htmltools        0.5.7      2023-11-03 [1] CRAN (R 4.3.2)
##  htmlwidgets      1.6.4      2023-12-06 [1] CRAN (R 4.3.2)
##  httpuv           1.6.13     2023-12-06 [1] CRAN (R 4.3.2)
##  httr             1.4.7      2023-08-15 [1] CRAN (R 4.3.2)
##  jquerylib        0.1.4      2021-04-26 [1] CRAN (R 4.3.1)
##  jsonlite         1.8.8      2023-12-04 [1] CRAN (R 4.3.2)
##  KernSmooth       2.23-22    2023-07-10 [2] CRAN (R 4.3.2)
##  knitr            1.45       2023-10-30 [1] CRAN (R 4.3.2)
##  later            1.3.2      2023-12-06 [1] CRAN (R 4.3.2)
##  lattice        * 0.22-5     2023-10-24 [2] CRAN (R 4.3.2)
##  lifecycle        1.0.4      2023-11-07 [1] CRAN (R 4.3.2)
##  magrittr         2.0.3      2022-03-30 [1] CRAN (R 4.3.1)
##  MASS             7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.2)
##  Matrix           1.6-5      2024-01-11 [1] CRAN (R 4.3.2)
##  memoise          2.0.1      2021-11-26 [1] CRAN (R 4.3.1)
##  mgcv             1.9-1      2023-12-21 [2] CRAN (R 4.3.2)
##  mime             0.12       2021-09-28 [1] CRAN (R 4.3.1)
##  miniUI           0.1.1.1    2018-05-18 [1] CRAN (R 4.3.2)
##  munsell          0.5.1      2024-04-01 [1] CRAN (R 4.3.3)
##  nlme             3.1-164    2023-11-27 [2] CRAN (R 4.3.2)
##  pdist          * 1.2.1      2022-05-02 [1] CRAN (R 4.3.1)
##  permute        * 0.9-7      2022-01-27 [1] CRAN (R 4.3.2)
##  pillar           1.10.2     2025-04-05 [1] CRAN (R 4.3.3)
##  pkgbuild         1.4.3      2023-12-10 [1] CRAN (R 4.3.2)
##  pkgconfig        2.0.3      2019-09-22 [1] CRAN (R 4.3.2)
##  pkgload          1.3.4      2024-01-16 [1] CRAN (R 4.3.2)
##  profvis          0.3.8      2023-05-02 [1] CRAN (R 4.3.2)
##  promises         1.2.1      2023-08-10 [1] CRAN (R 4.3.2)
##  proxy            0.4-27     2022-06-09 [1] CRAN (R 4.3.2)
##  purrr            1.0.2      2023-08-10 [1] CRAN (R 4.3.2)
##  R6               2.6.1      2025-02-15 [1] CRAN (R 4.3.3)
##  Rcpp             1.0.11     2023-07-06 [1] CRAN (R 4.3.1)
##  remotes          2.5.0      2024-03-17 [1] CRAN (R 4.3.3)
##  rlang            1.1.3      2024-01-10 [1] CRAN (R 4.3.2)
##  rmarkdown        2.25       2023-09-18 [1] CRAN (R 4.3.1)
##  rnaturalearth  * 1.0.1      2023-12-15 [1] CRAN (R 4.3.2)
##  rstudioapi       0.15.0     2023-07-07 [1] CRAN (R 4.3.2)
##  sass             0.4.8      2023-12-06 [1] CRAN (R 4.3.2)
##  scales           1.3.0      2023-11-28 [1] CRAN (R 4.3.2)
##  sessioninfo      1.2.2      2021-12-06 [1] CRAN (R 4.3.2)
##  sf               1.0-15     2023-12-18 [1] CRAN (R 4.3.2)
##  shiny            1.8.0      2023-11-17 [1] CRAN (R 4.3.2)
##  stringi          1.8.3      2023-12-11 [1] CRAN (R 4.3.2)
##  stringr          1.5.1      2023-11-14 [1] CRAN (R 4.3.2)
##  terra            1.7-71     2024-01-31 [1] CRAN (R 4.3.2)
##  tibble           3.2.1      2023-03-20 [1] CRAN (R 4.3.2)
##  tidyselect       1.2.1      2024-03-11 [1] CRAN (R 4.3.3)
##  units            0.8-5      2023-11-28 [1] CRAN (R 4.3.2)
##  urlchecker       1.0.1      2021-11-30 [1] CRAN (R 4.3.2)
##  usethis          2.2.3      2024-02-19 [1] CRAN (R 4.3.2)
##  vctrs            0.6.5      2023-12-01 [1] CRAN (R 4.3.2)
##  vegan          * 2.6-4      2022-10-11 [1] CRAN (R 4.3.2)
##  withr            3.0.2      2024-10-28 [1] CRAN (R 4.3.3)
##  xfun             0.41       2023-11-01 [1] CRAN (R 4.3.2)
##  xtable           1.8-4      2019-04-21 [1] CRAN (R 4.3.2)
##  yaml             2.3.8      2023-12-11 [1] CRAN (R 4.3.2)
## 
##  [1] C:/Users/tfrancisco/AppData/Local/R/win-library/4.3
##  [2] C:/Users/tfrancisco/AppData/Local/Programs/R/R-4.3.2/library
## 
## ──────────────────────────────────────────────────────────────────────────────

Genomic_offset_Clonal_bank

Thomas Francisco

2024-09-10