This script calculates genomic offset (GO) predictions for populations planted in the common garden. The aim is to evaluate whether there is a relationship between population fitness and GO predictions, which will allow us to assess the accuracy of the model. Genomic offset is defined as the distance between the genomic composition predicted by the GEA model under the climate of origin and under the climate of the common garden. Populations with higher genomic offset (i.e., those predicted to be less well adapted to the common garden environment) are expected to exhibit lower fitness in the common garden.
For this validation, we use an independent set of populations grown in a clonal bank located in central Spain.
meta_data <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/Populations_common_garden.csv",h=T,sep=";",dec=",")
list_climate <- c("clim_df_scale_CG_2012","clim_df_scale_CG_2021","clim_df_natural_pop")
for(i in 1:length(list_climate)){
climate <- list_climate[i]
load(file=paste0("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/validation_GO/climatic_data/",climate,".Rdata"))
}
We need to add the climatic data for each populations for the CG df:
clim_data_replicated_2012 <- clim_df_scale_CG_2012[rep(1, 26), ]
clim_df_tot_scale_CG_2012 <- data.frame(Population=meta_data$Population,clim_data_replicated_2012)
clim_data_replicated_2021 <- clim_df_scale_CG_2021[rep(1, 26), ]
clim_df_tot_scale_CG_2021 <- data.frame(Population=meta_data$Population,clim_data_replicated_2021)
We predicted the genomic composition for the populations both in their natural habitat and in the common garden.
Load GEA models
list_models <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_CG","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")
for(x in 1:length(list_models)){
name_model<- list_models[x]
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/RDA/data/models/",name_model,".Rdata"))
}
We also need to load the original genomic dataset used to build the model, along with the set of outlier SNPs. This step is necessary because the predict function does not store the input data within the model object. Instead, it requires the initial dataset and outlier set in order to generate predictions with new data.
#genomic data
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")
genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616
list_models <- c("random_neutral_set_SNPs_T_adapcon_gentree_bis","random_set_taxus","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","random_set_taxus_not_overlap_both_dataset_V2","unique_outliers")
for(i in 1:length(list_models)){
model <- list_models[i]
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/",model,".Rdata"))
}
list_random_SNPs <- colnames(random_neutral_set_SNPs_T_adapcon_gentree_bis)
list_random_SNPs_same_AF <- random_set_taxus$name_snps
list_random_SNPs_same_AF_V2 <- random_set_taxus_not_overlap_both_dataset_V2$name_snps
#CG common
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/gen_matrix_imp_CG.Rdata")
CG_common_set <- intersect(outliers_set_final_overlapping_no_LD_LC_new_var, colnames(gen_matrix_imp_CG))
save(CG_common_set,file="C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/CG_common_set.Rdata")
Now, based on these models, we calculated the two genomic compositions. We can use the predict function to do this.
list_rda <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_CG","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")
list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")
K = 2 # number of rda axes retained
for(i in 1:length(list_rda)){
rda_model <- get(list_rda[i])
set <- list_set[i]
# Natural populations
clim_natural_pop <- get("clim_df_natural_pop")
predictions_natural_pop <- data.frame(Population = meta_data$Population, predict(rda_model, clim_natural_pop, type = "lc", rank = K, scaling = "none"))
assign(paste0("adaptive_values_", set, "_natural_pop"), predictions_natural_pop)
# Common garden 2012
clim_common_garden_2012 <- get("clim_data_replicated_2012")
predictions_common_garden_2012 <- data.frame(Population = meta_data$Population, predict(rda_model, clim_common_garden_2012, type = "lc", rank = K, scaling = "none"))
assign(paste0("adaptive_values_", set, "_common_garden_2012"), predictions_common_garden_2012)
# Common garden 2021
clim_common_garden_2021 <- get("clim_data_replicated_2021")
predictions_common_garden_2021 <- data.frame(Population = meta_data$Population, predict(rda_model, clim_common_garden_2021, type = "lc", rank = K, scaling = "none"))
assign(paste0("adaptive_values_", set, "_common_garden_2021"), predictions_common_garden_2021)
}
Now we calculated the genomic offset with these genomic compositions.
genomic_offset_pop <- function(RDA, K, Past_score, Future_score,meta_data){
# Weights based on axis eigen values
weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
# Weighing the current and future adaptive indices based on the eigen values of the associated axes
Past_score_df <- Past_score[,-c(1)]
Proj_offset_past <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Past_score_df[,x]*weights[x])))
Future_score_df <- Future_score[,-c(1)]
Proj_offset_fut <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Future_score_df[,x]*weights[x])))
#Now we want to calculate the distance between present and future for each RDA axis before doing it for both axis simultaneously
Proj_offset <- list()
for(i in 1:K){
Proj_offset[[i]] <- abs(Proj_offset_past[[i]] - Proj_offset_fut[[i]])
names(Proj_offset)[i] <- paste0("RDA", as.character(i))
}
# Predict a global genetic offset, incorporating the K first axes weighted by their eigen values
ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_past), function(x) dist(rbind(Proj_offset_past[x,], Proj_offset_fut[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
#the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
names(ras) <- "Global_offset"
Proj_offset_global <- ras
# Return prediction of genetic offset for each RDA axis and a global genetic offset for each population
return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}
We ran the function on the predicted genomic composition:
list_rda <- c("model_GEA_RDA_all","model_GEA_RDA_random","model_GEA_RDA_random_same_AF","model_GEA_RDA_outliers_LC","model_GEA_RDA_outliers_MC","model_GEA_RDA_CG","model_GEA_RDA_random_AF_V2","model_GEA_RDA_all_outliers")
list_name <- c("all","random", "random_same_AF", "LC", "MC", "CG","random_AF_V2","all_outliers")
K = 2 # Number of RDA axes retained
# Loop over the RDA models
for (x in 1:length(list_rda)) {
RDA <- get(list_rda[x])
name <- list_name[x]
# Load adaptive values for natural populations
adaptive_natural_score <- get(paste0("adaptive_values_", name, "_natural_pop"))
# Load adaptive values for common garden 2012
CG_2012_score <- get(paste0("adaptive_values_", name, "_common_garden_2012"))
# Load adaptive values for common garden 2021
CG_2021_score <- get(paste0("adaptive_values_", name, "_common_garden_2021"))
# Run the genomic offset calculation for Natural Populations with CG_2012
Run_genomic_offset_pop_CG2012 <- genomic_offset_pop(RDA, K, adaptive_natural_score, CG_2012_score, meta_data)
# Create a dataframe for CG_2012 results
genomic_offset_df_CG2012 <- data.frame(
Population = unlist(Run_genomic_offset_pop_CG2012$Population),
Genomic_offset_CG2012 = unlist(Run_genomic_offset_pop_CG2012$Proj_offset_global)
)
# Order the dataframe by population
genomic_offset_df_CG2012 <- genomic_offset_df_CG2012[order(genomic_offset_df_CG2012$Population),]
colnames(genomic_offset_df_CG2012) <- c("Population", paste0("GO_", name, "_CG2012"))
# Save the results for CG_2012
assign(paste0("GO_RDA_", name, "_CG2012_Taxus"), genomic_offset_df_CG2012)
save(list = paste0("GO_RDA_", name, "_CG2012_Taxus"), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/Standard_GO/RDA/data/CG_period/GO_RDA_", name, "_CG2012_Taxus.Rdata"))
# Run the genomic offset calculation for Natural Populations with CG_2021
Run_genomic_offset_pop_CG2021 <- genomic_offset_pop(RDA, K, adaptive_natural_score, CG_2021_score, meta_data)
# Create a dataframe for CG_2021 results
genomic_offset_df_CG2021 <- data.frame(
Population = unlist(Run_genomic_offset_pop_CG2021$Population),
Genomic_offset_CG2021 = unlist(Run_genomic_offset_pop_CG2021$Proj_offset_global)
)
# Order the dataframe by population
genomic_offset_df_CG2021 <- genomic_offset_df_CG2021[order(genomic_offset_df_CG2021$Population),]
colnames(genomic_offset_df_CG2021) <- c("Population", paste0("GO_", name, "_CG2021"))
# Save the results for CG_2021
assign(paste0("GO_RDA_", name, "_CG2021_Taxus"), genomic_offset_df_CG2021)
save(list = paste0("GO_RDA_", name, "_CG2021_Taxus"), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/Standard_GO/RDA/data/CG_period/GO_RDA_", name, "_CG2021_Taxus.Rdata"))
}
Load the GEA models
list_models <- c("all","random_bis","random_same_AF_bis","LC","CG","random_same_AF_V2","all_outliers")
for(i in 1:length(list_models)){
model <- list_models[i]
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
}
########## calculate adaptive offset for populations in space or time
genomic_offset_function <- function(gfMod, vars, env1, env2, combined=F,
pops = envPop$pop_code, weighted=FALSE){
#gfMod = gf model for prediction
#vars = names of env variables
#env2 = new environment (new place / time)
transEnv2 <- predict(gfMod, env2[,vars]) #new env
transEnv1 <- predict(gfMod, env1[,vars]) #current env
#calculate Euclidean distance in transformed env space
num <- nrow(transEnv1)
dOut <- lapply(1:num, function(x, tEnv1, tEnv2){
as.numeric(pdist(tEnv1[x,], tEnv2[x,])@dist)}, tEnv2=transEnv2, tEnv1=transEnv1)
return(dOut)
}
We can apply this function to our dataset:
list_set <- c("Run_GEA_GF_all","Run_GEA_GF_random_bis","Run_GEA_GF_random_same_AF_bis","Run_GEA_GF_LC","Run_GEA_GF_CG","Run_GEA_GF_random_same_AF_V2","Run_GEA_GF_all_outliers")
list_CG <- c("2012","2021")
vars <- colnames(clim_df_natural_pop)
list_name <- c("all","random","random_same_AF","LC","CG","random_AF_V2","all_outliers")
#vars = past climatic data used in GEA
for(i in 1: length(list_set)){
GF_run <- get(list_set[i])
name <- list_name[i]
for(x in 1:length(list_CG)){
name_clim <- list_CG[x]
climate_CG <- get(paste0("clim_df_tot_scale_CG_",name_clim))
climate_pop <- clim_df_natural_pop
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env1=climate_pop , env2=climate_CG, combined=F,
pops = clim_df_tot_scale_CG_2012$Population, weighted=FALSE)
#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)
genomic_offset_GF <- data.frame(Population=clim_df_tot_scale_CG_2012$Population,GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name)
assign(paste0("GO_standard_GF_",name,"_",name_clim),genomic_offset_GF)
#save
save(list = paste0("GO_standard_GF_",name,"_",name_clim),file=paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/Validation/GO/standard_GO/GF/data/CG_period/GO_standard_GF_",name,"_",name_clim,".Rdata"))
}
}
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31 ucrt)
## os Windows 11 x64 (build 26100)
## system x86_64, mingw32
## ui RTerm
## language (EN)
## collate French_France.utf8
## ctype French_France.utf8
## tz Europe/Paris
## date 2025-09-22
## pandoc 3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## bslib 0.6.1 2023-11-28 [1] CRAN (R 4.3.2)
## cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.1)
## class 7.3-22 2023-05-03 [2] CRAN (R 4.3.2)
## classInt 0.4-10 2023-09-05 [1] CRAN (R 4.3.2)
## cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.1)
## cluster 2.1.6 2023-12-01 [2] CRAN (R 4.3.2)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.1)
## corrplot * 0.92 2021-11-18 [1] CRAN (R 4.3.2)
## DBI 1.2.2 2024-02-16 [1] CRAN (R 4.3.2)
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.3.3)
## digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1)
## dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.2)
## e1071 1.7-14 2023-12-06 [1] CRAN (R 4.3.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.1)
## evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.2)
## extendedForest 1.6.2 2023-12-12 [1] R-Forge (R 4.3.2)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.1)
## fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1)
## generics 0.1.4 2025-05-09 [1] CRAN (R 4.3.2)
## ggcorrplot * 0.1.4.1 2023-09-05 [1] CRAN (R 4.3.3)
## ggplot2 * 3.5.2 2025-04-09 [1] CRAN (R 4.3.2)
## glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.2)
## gradientForest * 0.1-37 2023-08-19 [1] R-Forge (R 4.3.1)
## gtable 0.3.6 2024-10-25 [1] CRAN (R 4.3.3)
## htmltools 0.5.7 2023-11-03 [1] CRAN (R 4.3.2)
## htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.3.2)
## httpuv 1.6.13 2023-12-06 [1] CRAN (R 4.3.2)
## httr 1.4.7 2023-08-15 [1] CRAN (R 4.3.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.1)
## jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.3.2)
## KernSmooth 2.23-22 2023-07-10 [2] CRAN (R 4.3.2)
## knitr 1.45 2023-10-30 [1] CRAN (R 4.3.2)
## later 1.3.2 2023-12-06 [1] CRAN (R 4.3.2)
## lattice * 0.22-5 2023-10-24 [2] CRAN (R 4.3.2)
## lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.2)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.1)
## MASS 7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.2)
## Matrix 1.6-5 2024-01-11 [1] CRAN (R 4.3.2)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.1)
## mgcv 1.9-1 2023-12-21 [2] CRAN (R 4.3.2)
## mime 0.12 2021-09-28 [1] CRAN (R 4.3.1)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.2)
## munsell 0.5.1 2024-04-01 [1] CRAN (R 4.3.3)
## nlme 3.1-164 2023-11-27 [2] CRAN (R 4.3.2)
## pdist * 1.2.1 2022-05-02 [1] CRAN (R 4.3.1)
## permute * 0.9-7 2022-01-27 [1] CRAN (R 4.3.2)
## pillar 1.10.2 2025-04-05 [1] CRAN (R 4.3.3)
## pkgbuild 1.4.3 2023-12-10 [1] CRAN (R 4.3.2)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.2)
## pkgload 1.3.4 2024-01-16 [1] CRAN (R 4.3.2)
## profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.2)
## promises 1.2.1 2023-08-10 [1] CRAN (R 4.3.2)
## proxy 0.4-27 2022-06-09 [1] CRAN (R 4.3.2)
## purrr 1.0.2 2023-08-10 [1] CRAN (R 4.3.2)
## R6 2.6.1 2025-02-15 [1] CRAN (R 4.3.3)
## Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.1)
## remotes 2.5.0 2024-03-17 [1] CRAN (R 4.3.3)
## rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.2)
## rmarkdown 2.25 2023-09-18 [1] CRAN (R 4.3.1)
## rnaturalearth * 1.0.1 2023-12-15 [1] CRAN (R 4.3.2)
## rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.3.2)
## sass 0.4.8 2023-12-06 [1] CRAN (R 4.3.2)
## scales 1.3.0 2023-11-28 [1] CRAN (R 4.3.2)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.2)
## sf 1.0-15 2023-12-18 [1] CRAN (R 4.3.2)
## shiny 1.8.0 2023-11-17 [1] CRAN (R 4.3.2)
## stringi 1.8.3 2023-12-11 [1] CRAN (R 4.3.2)
## stringr 1.5.1 2023-11-14 [1] CRAN (R 4.3.2)
## terra 1.7-71 2024-01-31 [1] CRAN (R 4.3.2)
## tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.2)
## tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.3)
## units 0.8-5 2023-11-28 [1] CRAN (R 4.3.2)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.2)
## usethis 2.2.3 2024-02-19 [1] CRAN (R 4.3.2)
## vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.2)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.2)
## withr 3.0.2 2024-10-28 [1] CRAN (R 4.3.3)
## xfun 0.41 2023-11-01 [1] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.2)
## yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.2)
##
## [1] C:/Users/tfrancisco/AppData/Local/R/win-library/4.3
## [2] C:/Users/tfrancisco/AppData/Local/Programs/R/R-4.3.2/library
##
## ──────────────────────────────────────────────────────────────────────────────