rm(list = ls())
knitr::opts_chunk$set(
    message = FALSE,
    warning = FALSE,
    cache = FALSE
)

meta_data_pop <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")

#alphabetic order
meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]

1 Introduction

This script calculates the Euclidean distances between observed and predicted population scores for the 1901-1950 time period along the retained RDA axes and normalises these values using min–max scaling to a 0-1 range (hereafter referred to as the genomic discrepancy index). It also computes the genomic offset index using the RDA framework.

The genomic discrepancy index is a metric introduced in Francisco et al. 2025 to visualise how far the observed genomic composition of populations deviates from that predicted by the GEA model under the same climatic conditions. Populations are unlikely to exhibit exactly the genomic composition predicted by the model’s linear relationships of local adaptation. Such deviations may indicate that the degree of local adaptation is lower than expected or that adaptation relies on a different genetic basis than that captured by the model. Possible explanations include neutral processes such as genetic drift (leading to fixation of potentially adaptive alleles), restricted gene flow (resulting in the absence of adaptive variants in some populations), or populations experiencing adaptation lag or pre-adaptation to future climates. However, alternative explanations, such as conditional neutrality of alleles or model misspecification, should also be considered.

The genomic offset index, first proposed by Fitzpatrick and Keller (2015), quantifies the amount of genomic change required for populations (or individuals) to maintain fitness under future climatic conditions. Specifically, genomic offset measures the distance between the estimated genomic composition under current (reference) climates and that predicted under future climates, focusing on SNPs potentially associated with adaptation.

To compute genomic offset, climate-associated SNPs can first be identified (alternatively, all SNPs may be used under the assumption that adaptation is polygenic or even omnigenic). GEA methods are then applied to establish relationships between allelic variation and climatic variables. These relationships are used to estimate genomic composition under past or present and future climatic conditions, effectively interpolating or extrapolating the GEA models in space and time. Genomic offset is then quantified as the Euclidean distance between genomic compositions under reference and future climates.

2 Data

2.1 Past/present and future climatic data

#climatic/structure/IBD data
load("Data/Species/Taxus_baccata/GEA_new_var/variance_partitioning/Climatic_data_RDA_pRDA.Rdata")

Here we will also load the raster data not scaled but with the mean and sd values of the past climatic data to scaled the data together to follow the function provided in Capblancq and Forester (2021).

list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

#load past and future raster data
climatic_variables_to_keep <-c("bio1", "bio12", "bio15","bio2","bio4","bio9") 
list_raster <- c("past_climatic_data_raster","present_climatic_data_raster","future_climatic_data_raster")

for(i in 1:length(list_models_clim)){
  
  model <- list_models_clim[i]

for(x in 1:length(list_raster)){
  name <- c("past","present","future")
  name_clim <- name[x]#name final raster
  var <- list_raster[x]#name of the raster
  
  if(var == "future_climatic_data_raster"){
    var= paste0("future_climatic_data_raster_",model)
    
    load(paste0("Data/Species/Taxus_baccata/Genomic_offset/RDA/",var,".Rdata")) #load data
  raster_clim <- get(var)[[climatic_variables_to_keep]]#keep only var in clim_variables to keep
  names(raster_clim)=c("Annual_Tc","Annual_P","P_Seasonality","Diurnal_range_Tc","Tc_Seasonality","Tc_driest_quarter") #change the name to match the names of the scaled and center values
  
  assign(paste0("raster_",name_clim,"_clim_",model),raster_clim) 
  
  } else {
    
    load(paste0("Data/Species/Taxus_baccata/Genomic_offset/RDA/",var,".Rdata"))
  raster_clim <- get(var)[[climatic_variables_to_keep]]
  names(raster_clim)=c("Annual_Tc","Annual_P","P_Seasonality","Diurnal_range_Tc","Tc_Seasonality","Tc_driest_quarter")
  assign(paste0("raster_",name_clim,"_clim"),raster_clim) 
    }
  }
}

We also need the scale and center values:

load("Data/Species/Taxus_baccata/Climatic_data/new_selection/scale_env_value_new_cli.Rdata")
load("Data/Species/Taxus_baccata/Climatic_data/new_selection/center_env_value_new_cli.Rdata")

list_name <- c("past","present","future")
coords <- data.frame(apply(meta_data_pop_order[,c(5:4)], 2, as.numeric))#we need to have longitude then latitude

for(i in 1:length(list_models_clim)){
  model <- list_models_clim[i]
for(x in 1: length(list_name)){
  name <- list_name[x]
  if(name == "future"){
  raster_data <- get(paste0("raster_",name,"_clim_",model))
  
  clim_data <- raster::extract(raster_data, coords)
    clim_data_order <- data.frame(clim_data[,c(1,4,5,6,2,3)])#put it in the same order as center_env and scale object
  # Standardisation of the environmental variables
  clim_data_scale <- as.data.frame(scale(clim_data_order, center=center_env_value_new_cli, scale=scale_env_value_new_cli))
  
  assign(paste0("clim_df_",name,"_scale_",model),clim_data_scale)
  
  } else {
    
   raster_data <- get(paste0("raster_",name,"_clim"))
  
  clim_data <- raster::extract(raster_data, coords)
  clim_data_order <- data.frame(clim_data[,c(1,4,5,6,2,3)])
  clim_data_scale <- as.data.frame(scale(clim_data_order, center=center_env_value_new_cli, scale=scale_env_value_new_cli))
  
  assign(paste0("clim_df_",name,"_scale"),clim_data_scale) 
    }
  }
}

2.2 Genomic data

The genomic data set includes allele frequency corrected for MAC in the 29 populations.

#genomic data
load("Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")

genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616

2.3 Outliers sets

We created 3 set of outliers:

2.3.1 Random SNPs

One neutral set with loci selected at random, while maintaining the same number of SNPs for each class of allelic frequencies as in the outlier set (LC).

load("Results/species/taxus/GEA_new_var/outliers/random_set_taxus.Rdata")
list_random_SNPs_same_AF <- random_set_taxus$name_snps

#write.csv(list_random_SNPs_same_AF,"Results/species/taxus/GEA_new_var/outliers/list_random_SNPs_same_AF.csv")

2.3.2 Overlapping SNPs between at least two GEA methods

We used two sets of thresholds depending on the species: a less conservative threshold (LC; FDR < 10%, BF > 8, and the top 5% of SNPs in GF) and a more conservative threshold (MC; FDR < 5%, BF > 10, and the top 1% of SNPs in GF).

#set of less conservative thresholds
load("Results/species/taxus/GEA_new_var/outliers/outliers_set_final_overlapping_no_LD_LC_new_var.Rdata")

load("Results/species/taxus/GEA_new_var/outliers/outliers_set_final_overlapping_no_LD_new_var.Rdata")

2.3.3 all the outliers

load("Results/species/taxus/GEA_new_var/outliers/unique_outliers.Rdata")

3 Calculate GEA relationship

Our GDI and genomic offset analyses are based on the relationships between SNPs and the climatic variables identified by GEA methods. In this script, we use redundancy analysis (RDA).
First, we perform a GEA analysis on the SNP dataset to identify the relationships that can subsequently be interpolated or extrapolated. To achieve this, we ran an RDA using the SNP data and the selected climatic variables.

3.1 Models

Random set keeping the same AF

RDA_random_same_AF <- rda(formula = genomic_matrix[list_random_SNPs_same_AF] ~  Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)

RsquareAdj(RDA_random_same_AF)

## $r.squared
## [1] 0.2871292
## 
## $adj.r.squared
## [1] 0.09270992

Less conservative outlier set (LC)

RDA_outliers_LC <- rda(formula = genomic_matrix[outliers_set_final_overlapping_no_LD_LC_new_var] ~  Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)

RsquareAdj(RDA_outliers_LC)

## $r.squared
## [1] 0.5173666
## 
## $adj.r.squared
## [1] 0.3857393

More conservative outlier set (MC)

RDA_outliers_MC <- rda(formula = genomic_matrix[outliers_set_final_overlapping_no_LD_new_var] ~  Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)

RsquareAdj(RDA_outliers_MC)

## $r.squared
## [1] 0.5947706
## 
## $adj.r.squared
## [1] 0.4842534

All outliers (all_outliers)

RDA_all_outliers <- rda(formula = genomic_matrix[unique_outliers] ~  Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)

RsquareAdj(RDA_all_outliers)

## $r.squared
## [1] 0.4189863
## 
## $adj.r.squared
## [1] 0.260528

We can save the models to use them later to evaluate the models:

list_models <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")

for(x in 1:length(list_models)){
  
  name_model<- list_models[x]
  file_save<- get(name_model)
  name <- paste0("model_GEA_",name_model)
  assign(name,file_save)
  save(list = paste0("model_GEA_",name_model),file=paste0("Results/species/taxus/Genomic_offset/RDA/data/models/",name,".Rdata"))
}

To conduct further analysis, we will examine the different sets to see if the same patterns emerge.

list_outliers_set_V2 <- c("list_random_SNPs_same_AF","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","unique_outliers")
list_RDA_V2 <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")

3.2 Graphical visualization

We can plot how the outliers are distributed along the RDA axes and the climatic variables.

for(x in 1: length(list_RDA_V2)){
  RDA <- get(list_RDA_V2[x])
  name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
  #screeplot
plot(RDA$CCA$eig, option="screeplot")

#in hist
screeplot_RDA <- screeplot(RDA,main = paste0("Hist screeplot ",name[x]))

#explained variance along each RDA axis
explain_variance <-RDA$CCA$eig*100/sum(RDA$CCA$eig)

assign(paste0("explain_variance",name[x]),explain_variance)

  print(get(paste0("explain_variance",name[x])))
  #save
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/screeplot/Screeplot_RDA_",name[x],".pdf"));screeplot(RDA,main = paste0("Hist screeplot ",name[x]));dev.off()
}

##      RDA1      RDA2      RDA3      RDA4      RDA5      RDA6 
## 59.299725 24.467288  5.571946  4.712949  3.804574  2.143519

As can be seen, the first two RDA axes appear to explain most of the genetic variation of outliers across all sets. We will use these two axes in the further analysis of each set.

We can also plot the loci and climatic variables in the RDA space to visualise their relationships:

for(x in 1:length(list_RDA_V2)){
  
  RDA <- get(list_RDA_V2[x])
  name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
  
  score_loci <- as.data.frame(scores(RDA, choices=c(1:2), display="species", scaling="none"))
score_climatic_var <- as.data.frame(scores(RDA, choices=c(1:2), display="bp"))
explained_variance_round <- round(RDA$CCA$eig*100/sum(RDA$CCA$eig),digits=1)

#Biplot with SNPs and climatic variables along the two first RDA axis. 
 biplot_outliers_RDA<- ggplot() +
  geom_hline(yintercept=0, linetype="dashed", color = gray(.80), size=0.6) +
  geom_vline(xintercept=0, linetype="dashed", color = gray(.80), size=0.6) +
  geom_point(data = score_loci, aes(x=RDA1*15, y=RDA2*15,color="locus"), size = 1.4) +
  geom_segment(data = score_climatic_var, aes(xend=RDA1, yend=RDA2, x=0, y=0), colour="black", size=0.15, linetype=1, arrow=arrow(length = unit(0.02, "npc"))) +
  geom_text(data = score_climatic_var, aes(x=1.1*RDA1, y=1.1*RDA2, label = row.names(score_climatic_var)), size = 2.5) +
  xlab(paste0("RDA 1 (",explained_variance_round[1],"%)")) + 
  ylab(paste0("RDA 2 (",explained_variance_round[2],"%)")) +
   ggtitle(paste0("Biplot RDA ",name[x])) +
  guides(color=guide_legend(title="Locus type")) +
  scale_color_manual(values ="#F9A242FF") +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
 
 print(biplot_outliers_RDA)
 
 #save plots
 
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/biplot/biplot_outliers_RDA_",name[x],".pdf"));print(biplot_outliers_RDA);dev.off()
}

We can plot populations in this space to assess whether their distribution follows the expected climatic pattern.

for(x in 1:length(list_RDA_V2)){
  RDA <- get(list_RDA_V2[x])
  name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
  
  #score along the 2 first RDA axis
score_climatic_var <- as.data.frame(scores(RDA, choices=c(1:2), display="bp"))
#Score_population <- data.frame(RDA$CCA$u[,c(1,2)]) #predicted population scores
observed_scores_populations <- as.data.frame(scores(RDA,choices=c(1,2), display = "sites",scaling="none"))

#explained variance
explained_variance_round <- round(RDA$CCA$eig*100/sum(RDA$CCA$eig),digits=1)

#merge for country info
Score_population_bis <- rownames_to_column(observed_scores_populations,"Population")
score_with_country_info <- merge(Score_population_bis,meta_data_pop_order[,c(1,2)],"Population")
score_with_country_info$Country <- as.factor(score_with_country_info$Country)

group_palette <- c("Bosnia"="orangered3", "France"="gold2","Germany"= "darkorchid3", "Greece"="navyblue", "Italy"="turquoise2", "Norway"="green3", "Slovakia"="blue", "Slovenia"="red", "Spain"="black", "Sweden"="gray", "Switzerland"="orange", "UK"="darkgreen")

##Biplot with populations and climatic variables along the 2 first RDA axis
biplot_populations <- ggplot() +
  geom_hline(yintercept = 0, linetype = "dashed", color = gray(0.80), size = 0.6) +
  geom_vline(xintercept = 0, linetype = "dashed", color = gray(0.80), size = 0.6) +
  geom_point(data = score_with_country_info, aes(x = RDA1 * 3, y = RDA2 * 3, colour = Country), size = 2, alpha = 0.8) +
  geom_segment(data = score_climatic_var, aes(xend = RDA1, yend = RDA2, x = 0, y = 0), colour = "black", size = 0.15, linetype = 1, arrow = arrow(length = unit(0.02, "npc"))) +
  geom_text(data = score_climatic_var, aes(x=1.1*RDA1, y=1.1*RDA2, label = row.names(score_climatic_var)), size = 2.5)+
  xlab(paste0("RDA 1 (",explained_variance_round[1],"%)")) + 
  ylab(paste0("RDA 2 (",explained_variance_round[2],"%)")) +
  ggtitle(paste0("Biplot RDA Populations ",name[x])) +
  scale_color_manual(name = "Countries", values = group_palette, labels = levels(score_with_country_info$Country)) +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))+
  labs(color = "Country")

 print(biplot_populations)
 
#save plots
 
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/biplot/biplot_population_RDA_",name[x],".pdf"));print(biplot_populations);dev.off()
}

4 Interpolate/extrapolate the GEA relationship to space and time to calculate the genomic composition

We can now calculate the predicted genomic composition under past, present, and future climatic conditions. Note that, unlike in the adaptive_index function, climatic variables must be scaled prior to applying this function. These predictions are then used to compute the genomic discrepancy index (GDI) and genomic offset.

list_rda <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
list_period <- c("past","present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")

K=2 #number of rda axes retained

for(i in 1: length(list_rda)){
  
  rda_model <- get(list_rda[i])
  set <- list_set[i]
  
  for(x in 1: length(list_period)){
    
  name <- list_period[x]
  
  if(name == "future"){
  
  for(a in 1:length(list_models_clim)){
  
  models_clim <- list_models_clim[a]
  clim <- get(paste0("clim_df_",name,"_scale_",models_clim))
  predictions <- data.frame(Population=meta_data_pop_order$Population,predict(rda_model,clim, type = "lc", rank = K, scaling = "none"))
  assign(paste0("adaptive_values_",set,"_",name,"_",models_clim),predictions)
  }
  
  }else{
    clim <- get(paste0("clim_df_",name,"_scale"))
  #model
  predictions <- data.frame(Population=meta_data_pop_order$Population,predict(rda_model,clim, type = "lc", rank = K, scaling = "none"))
  
  assign(paste0("adaptive_values_",set,"_",name),predictions)
  }
  }
}

5 Genomic discrepancy index (GDI)

The GDI is an index that quantifies the difference between predicted and observed genomic compositions. This allows the identification of populations that deviate from expectations based on local adaptation relationships. Such deviations may indicate a lower degree of local adaptation or suggest that the relationship is not strictly linear (e.g. conditional neutrality), or that the SNPs used do not fully capture the genetic basis of local adaptation.

We calculated the GDI using the same approach as for genomic offset, by measuring the distance between two genomic compositions: here, the observed and predicted values at the time of establishment (1901–1950, hereafter referred to as “past”, except for F. excelsior, see Main Text).

5.1 Calculation

genomic_discrepancy_index <- function(RDA, K, observed_score, predicted_score,meta_data){
# Weights based on axis eigen values
  weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
  
  # Weighing the current adaptive indices based on the eigen values of the associated axes
  Proj_offset_observed <- as.data.frame(do.call(cbind, lapply(1:K, function(x) observed_score[,x]*weights[x])))
  
  predicted_score_df <- predicted_score[,-1]
  Proj_offset_predicted <- as.data.frame(do.call(cbind, lapply(1:K, function(x) predicted_score_df[,x]*weights[x])))
  
  #Now we want to calculate the distance between present predicted and observed for each RDA axis before doing it for both axis simultaneously
  Proj_offset <- list() 
  for(i in 1:K){
  Proj_offset[[i]] <- abs(Proj_offset_observed[[i]] - Proj_offset_predicted[[i]])
      names(Proj_offset)[i] <- paste0("RDA", as.character(i))
  }
  
  # Predict GDI, incorporating the K first axes weighted by their eigen values
  ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
  ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_observed), function(x) dist(rbind(Proj_offset_observed[x,], Proj_offset_predicted[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
  #the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
  names(ras) <- "GDI"
  Proj_offset_global <- ras
  
  # Return prediction of GDI for each RDA axis and a global GDI for each population
  return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}

list_rda <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
list_predicted_adaptive_values <- c("adaptive_values_random_same_AF_past","adaptive_values_LC_past","adaptive_values_MC_past","adaptive_values_all_outliers_past")

for(x in 1: length(list_rda)){

  rda <- get(list_rda[x])
  observed_values <- scores(rda, display = "sites",scaling="none")
  predicted_values <- data.frame(get(list_predicted_adaptive_values[x]))
  set <- list_set[x]
  
  calculated_GDI <- genomic_discrepancy_index(rda,K,observed_values,predicted_values,meta_data_pop_order)
  
  genomic_offset_df<- data.frame(Population=unlist(calculated_GDI$Population),Genomic_offset_random=unlist(calculated_GDI$Proj_offset_global))

 colnames(genomic_offset_df) <- c("Population",paste0("values_",set)) 
  
  assign(paste0("GDI_index_",set),genomic_offset_df)
}

We can save the values of GDI.

5.2 Plot the GDI values

We can plot the GDI values for each population. As before, we can use the ‘cut_interval’ function (never use ‘cut_number’!) to rescale the values so that they match the colour scale used for other genomic offsets, such as GF.

shapefile <- st_read("Data/Species/Taxus_baccata/shapefiles/Taxus_baccata_plg_clip.shp")

list_period <- c("present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")

  for(x in 1: length(list_set)){
    
    set <- list_set[x]
    GDI_df <- get(paste0("GDI_index_",set))
    
     #first, we need to add the coordinates
GDI_coord <- merge(GDI_df,meta_data_pop_order[,c(2,4,5)],"Population")

#transform longitude and latitude to numeric variables
GDI_coord <- GDI_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))

colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")

#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
      
plot <- ggplot(data = GDI_coord) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_sf(data = shapefile, fill = "lightgrey", color = "black", size = 0.2) +  # Adding shapefile
  geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(GDI_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
  #geom_point(aes(x = Longitude, y = Latitude, fill = cut(GDI_coord[,2], breaks=seq(global_min, global_max, length.out = 6), include.lowest = T)), shape = 21,size=3, color = "black") + #if we want to class with the same values the 4 models to compare color
  scale_fill_manual(
    values = colors,
    labels = c("0","","","","1"),
    drop = FALSE,na.translate = FALSE)+  # Ensure all levels are shown in the legend
  geom_sf(data = admin, fill = NA, size = 0.1) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill = guide_legend(title = NULL)) +
  ggtitle(paste0("Adaptive genomic disruption across populations ",set)) +
  annotation_scale(location = "br", width_hint = 0.3) + # Scale bar at bottom left
  annotation_north_arrow(location = "tl", which_north = "true", 
                         pad_x = unit(-0.2, "cm"), pad_y = unit(0.08, "cm"),
                         style = north_arrow_fancy_orienteering) + # Compass rose
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
      
print(plot) 

#save
     pdf(paste0("Results/species/taxus/GDI/RDA/figures/GDI_across_populations ",set,"_newcolors.pdf"));print(plot);dev.off()
}

list_set <- c("random_same_AF","LC","MC","all_outliers")

# Combine all GDI values into one dataframe
combined_data <- data.frame()

for (set in list_set) {
  GDI_df <- get(paste0("GDI_index_", set))
  colnames(GDI_df)[2] <- "GDI"  # Ensure second column is named GDI
  GDI_df$Set <- set
  combined_data <- rbind(combined_data, GDI_df[, c("GDI", "Set")])
}

# Density plot
density_plot <- ggplot(combined_data, aes(x = GDI, fill = Set)) +
  geom_density(alpha = 0.7, color = "black") +
  scale_fill_manual(values = c("darkred", "#E69F00", "#56B4E9", "darkgreen", "#009E73", "#CC79A7")) +
  #geom_vline(xintercept = 1, color = "red", linetype = "dashed", size = 1) +  # Optional threshold
  facet_wrap(~ Set, scales = "free_y", ncol = 2) +
  labs(
    title = "Density of Genomic Discrepancy Index (GDI) across datasets",
    x = "Genomic Discrepancy Index (GDI)",
    y = "Density"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "italic"),
    strip.text = element_text(size = 10),
    legend.position = "none"
  )

print(density_plot)

# Save to PDF
pdf("Results/species/taxus/GDI/RDA/figures/GDI_density_faceted.pdf")
print(density_plot)
dev.off()

## png 
##   2

list_set <- c("random_same_AF","LC","MC","all_outliers")

# Combine all GDI values into one dataframe
combined_data <- data.frame()

for (set in list_set) {
  GDI_df <- get(paste0("GDI_index_", set))
  colnames(GDI_df)[2] <- "GDI"  # Ensure second column is named GDI
  GDI_df$Set <- set
  combined_data <- rbind(combined_data, GDI_df[, c("GDI", "Set")])
}

# Plot: histogram of GDI values, faceted by set
plot <- ggplot(combined_data, aes(x = GDI)) +
  geom_histogram(aes(fill = Set), binwidth = 0.005, color = "black", alpha = 0.8) +
  scale_fill_manual(values = c("darkred", "#E69F00", "#56B4E9", "darkgreen", "#009E73", "#CC79A7")) +
  #geom_vline(xintercept = 1, color = "red", linetype = "dashed", size = 1) +  # Adjust or remove if not relevant
  facet_wrap(~ Set, scales = "free_y", ncol = 2) +
  labs(
    title = "Distribution of Genomic Discrepancy Index (GDI) across datasets",
    x = "Genomic Discrepancy Index (GDI)",
    y = "Number of Populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "italic"),
    strip.text = element_text(size = 10),
    legend.position = "none"
  )

print(plot)

# #Save
pdf("Results/species/taxus/GDI/RDA/figures/GDI_distribution_histogram_faceted.pdf")
print(plot)
dev.off()

## png 
##   2

# Prepare an empty data frame to collect means
mean_GDI_df <- data.frame(Set = character(), Mean_GDI = numeric(), stringsAsFactors = FALSE)

for (set in list_set) {
  GDI_df <- get(paste0("GDI_index_", set))
  colnames(GDI_df)[2] <- "GDI"
  
  mean_val <- mean(GDI_df$GDI, na.rm = TRUE)
  mean_GDI_df <- rbind(mean_GDI_df, data.frame(Set = set, Mean_GDI = mean_val))
}

# Plot
barplot <- ggplot(mean_GDI_df, aes(x = Set, y = Mean_GDI, fill = Set)) +
  geom_bar(stat = "identity", color = "black") +
  ylab("Mean Genomic Discrepancy Index") +
  xlab("Set") +
  ggtitle("Mean GDI per Dataset") +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "italic"),
    legend.position = "none"
  )

print(barplot)

# Save plot
ggsave(
  filename = "Results/species/taxus/GDI/RDA/figures/Mean_GDI_barplot_across_sets.pdf",
  plot = barplot, width = 8, height = 6
)

#df GDI and elevation
df_GDI_cor <- data.frame(GDI_random_same_AF=GDI_index_random_same_AF$values_random_same_AF,GDI_LC=GDI_index_LC$values_LC,GDI_MC=GDI_index_MC$values_MC,GDI_all_outliers=GDI_index_all_outliers$values_all_outliers,elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.),Latitude=as.numeric(meta_data_pop_order$Latitude))

#correlation
correlation_GDI <- cor(df_GDI_cor)

corrplot(correlation_GDI, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)

pdf("Results/species/taxus/GDI/RDA/figures/comparison/correlation_GDI_values_RDA_taxus.pdf");corrplot(correlation_GDI, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()

## png 
##   2

6 Genomic offset

Genomic offset was not used in the study entitled Demographic history shapes forest tree vulnerability to climate change but it was used in the study Genetic signatures of population marginality in four European conifers.

In the RDA framework, genomic offset is defined as the Euclidean distance between future and reference genomic compositions (or other periods). To calculate this distance, the genomic composition is estimated along the first two RDA axes of the model, and the values for each population are weighted according to the proportion of genetic variation explained by these axes.

We adapted the function from Capblancq and Forester (2021) so that our analysis focuses solely on calculating genomic offset for sampled populations, without interpolating or extrapolating across unsampled areas.

6.1 Model

We modified the function used in Capblancq and Forester (2021) to calculate the genomic offset exclusively for our sampled points.

genomic_offset_pop <- function(RDA, K, Past_score, Future_score,meta_data){
# Weights based on axis eigen values
  weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
  
  # Weighing the current and future adaptive indices based on the eigen values of the associated axes
  Past_score_df <- Past_score[,-c(1)]
  Proj_offset_past <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Past_score_df[,x]*weights[x])))
  Future_score_df <- Future_score[,-c(1)]
  Proj_offset_fut <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Future_score_df[,x]*weights[x])))
  
  #Now we want to calculate the distance between present and future for each RDA axis before doing it for both axis simultaneously
  Proj_offset <- list() 
  for(i in 1:K){
  Proj_offset[[i]] <- abs(Proj_offset_past[[i]] - Proj_offset_fut[[i]])
      names(Proj_offset)[i] <- paste0("RDA", as.character(i))
  }
  
  # Predict a global genetic offset, incorporating the K first axes weighted by their eigen values
  ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
  ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_past), function(x) dist(rbind(Proj_offset_past[x,], Proj_offset_fut[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
  #the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
  names(ras) <- "Global_offset"
  Proj_offset_global <- ras
  
  # Return prediction of genetic offset for each RDA axis and a global genetic offset for each population
  return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}

We can run the GO function for all the set of genetic markers for present and future climate:

list_RDA <- c("RDA_random_same_AF", "RDA_outliers_LC", "RDA_outliers_MC","RDA_all_outliers")
list_past_score <- c("adaptive_values_random_same_AF_past", "adaptive_values_LC_past", "adaptive_values_MC_past","adaptive_values_all_outliers_past")
list_present_score <- c("adaptive_values_random_same_AF_present", "adaptive_values_LC_present", "adaptive_values_MC_present","adaptive_values_all_outliers_present")
list_name_present <- c("random_same_AF_present", "LC_present", "MC_present","all_outliers_present")
list_name_future <- c("random_same_AF_future", "LC_future", "MC_future","all_outliers_future")

# Loop through each RDA model
for (i in 1:length(list_RDA)) {
  # Get the RDA model, past score, present score, and future score
  RDA <- get(list_RDA[i])
  adaptive_past_score <- get(list_past_score[i])
  present_score <- get(list_present_score[i])
  
  # Calculate genomic offset for the present scenario
  name_present <- list_name_present[i]
  Run_genomic_offset_pop_present <- genomic_offset_pop(RDA, 2, adaptive_past_score, present_score, meta_data_pop_order)
  
  # Create a data frame for the present scenario genomic offset
  genomic_offset_df_present <- data.frame(Population = unlist(Run_genomic_offset_pop_present$Population),
                                          Genomic_offset_random = unlist(Run_genomic_offset_pop_present$Proj_offset_global))
  
  # Order the data frame by population and rename columns
  Genomic_offset_Taxus_Adapcon_Gentree_RDA_present <- genomic_offset_df_present[order(genomic_offset_df_present$Population), ]
  colnames(Genomic_offset_Taxus_Adapcon_Gentree_RDA_present) <- c("Population", paste0("GO_", name_present))
  
  # Assign the present result to a new variable
  assign(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_", name_present), Genomic_offset_Taxus_Adapcon_Gentree_RDA_present)
  
  # Calculate genomic offset for the future scenario
  for (j in 1:length(list_models_clim)) {  # Loop through 5 future models
    
    name_clim <- list_models_clim[j]
    name_future <- paste0(list_name_future[i],"_", name_clim)
    future_score <- get(paste0("adaptive_values_",list_name_future[i],"_", name_clim))
  Run_genomic_offset_pop_future <- genomic_offset_pop(RDA, 2, adaptive_past_score, future_score, meta_data_pop_order)
  
  # Create a data frame for the future scenario genomic offset
  genomic_offset_df_future <- data.frame(Population = unlist(Run_genomic_offset_pop_future$Population),
                                         Genomic_offset_random = unlist(Run_genomic_offset_pop_future$Proj_offset_global))
  
  # Order the data frame by population and rename columns
  Genomic_offset_Taxus_Adapcon_Gentree_RDA_future <- genomic_offset_df_future[order(genomic_offset_df_future$Population), ]
  colnames(Genomic_offset_Taxus_Adapcon_Gentree_RDA_future) <- c("Population", paste0("GO_", name_future))
  
  # Assign the future result to a new variable
  assign(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_",name_future), Genomic_offset_Taxus_Adapcon_Gentree_RDA_future)
  }
}

We can save the genomic offset values for downstream analysis.

6.2 Comparison genomic offset predictions across climatic models

list_set <- c("random_same_AF","LC","MC","all_outliers")

for(x in 1:length(list_set)){
  
  set <- list_set[x]
 df_tot_GO_predictions_clim_models_future <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_future_GFDL_ESM4[,1],GFDL_ESM4=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_GFDL_ESM4"))[,2],IPSL_CM6A_LR=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_IPSL_CM6A_LR"))[,2],MPI_ESM1_2_HR=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_MPI_ESM1_2_HR"))[,2],MRI_ESM2_0=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_MRI_ESM2_0"))[,2],UKESM1_0_LL=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_UKESM1_0_LL"))[,2])#elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.

correlation_future <- cor(df_tot_GO_predictions_clim_models_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
  
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GCMs/correlation_future_predictions_GCMs_",set,".pdf"));corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
}

list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

for(x in 1:length(list_set)){
  
  set <- list_set[x]
  all_GO <- data.frame()
  
 for(i in 1:length(list_models_clim)){
   
   model <- list_models_clim[i]
   GO_data <- get(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_", set, "_future_", model))
   colnames(GO_data) <- c("Population","GO")
   all_GO <- rbind(all_GO,GO_data)
 }
    
  mean_GO <- aggregate(GO ~ Population, data = all_GO, FUN = mean, na.rm = TRUE)

colnames(mean_GO) <- c("Population", "Mean_GO")

assign(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean"),mean_GO)

#save
file_save<- get(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean"))
  name <- paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean")
save(list = name,file=paste0("Results/species/taxus/Genomic_offset/RDA/data/",name,".Rdata"))
}

list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

# List of SNP sets
list_sets <- c("random_same_AF","LC","MC","all_outliers")

# Initialize an empty list to store data for all SNP sets
plot_data <- list()

# Load the RColorBrewer package for dynamic color palettes
library(RColorBrewer)


color_palette <- c("#FF69B4","red","blue","orange","black","green")

# Initialize a vector to keep track of top populations across all GCMs

meta_data_pop$pop_abbrev <- c("VIS","BAU","SDF","PAT","CHO","VOU","OLY","ANC","PDT","UMB","VAL","BRA","HAR","GOR","BOM","UNS","BUJA","BUJG","CAR","RAS","SUE","SAV","BLI","HHR","JUR","WAL","CED","RWM","YEW")

meta_data_f <- meta_data_pop[order(meta_data_pop$Population),]
# Loop over each SNP set
for (x in 1:length(list_sets)) {
  
  set <- list_sets[x]
  # Initialize an empty data frame for all models in the current SNP set
  ranks_df <- data.frame()
  
  all_top_populations <- character(0)

  # Loop over each climatic model
  for (i in 1:length(list_models_clim)) {
    
    model <- list_models_clim[i]
    # Get the genomic offset data for the current SNP set and model
    GO_data <- get(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_", model))
    colnames(GO_data) <- c("Population", "GO")
    
    GO_data_V2 <- merge(GO_data,meta_data_f[,c(1,2,7)],"Population")
    
    # Rank the populations by genomic offset (highest GO gets rank 1)
    GO_data_V2$Rank <- rank(-GO_data_V2$GO, ties.method = "first")
    
    # Add columns for the climatic model and SNP set
    GO_data_V2$Model <- model
    GO_data_V2$Set <- set
    
    # Append to ranks dataframe
    ranks_df <- rbind(ranks_df, GO_data_V2)
    
    # Find the top 1 population for the current model (highest GO = rank 1)
    top_1_population <- GO_data_V2 %>% top_n(1, GO) %>% pull(Population)
    
    # Add this top 1 population to the list of all top populations (avoid duplicates)
    all_top_populations <- unique(c(all_top_populations, top_1_population))
  }
  
  # Convert 'Model' and 'Set' to factors for plotting
  ranks_df$Model <- factor(ranks_df$Model, levels = list_models_clim)
  ranks_df$Set <- factor(ranks_df$Set, levels = list_sets)

  num_top_populations <- length(all_top_populations)
  if (num_top_populations > length(color_palette)) {
    color_palette <- colorRampPalette(brewer.pal(9, "Set1"))(num_top_populations)
  }
  
  # Assign a unique color to each of the top populations (those ranked 1 in any model)
  population_colors <- setNames(color_palette[1:num_top_populations], all_top_populations)
  
  # Create a new column to store color assignments (highlight only rank 1 populations for each GCM)
  ranks_df$Highlight <- ifelse(ranks_df$Population %in% all_top_populations, ranks_df$Population, "Other")
  
  # Map colors for top populations and grey for the rest
  ranks_df$Color <- ifelse(ranks_df$Highlight == "Other", "grey", population_colors[ranks_df$Highlight])
  
  # Store the data frame for this SNP set in the plot_data list
  plot_data[[set]] <- ranks_df
  
# Modify the plot generation code
p <- ggplot(ranks_df, aes(x = Model, y = Rank, group = Population)) +
  geom_line(aes(color = Highlight), alpha = 0.7, size = 0.8) +  # Color only top populations
  geom_point(aes(color = Highlight), size = 2) + 
  geom_text(
    data = ranks_df %>% filter(Model == list_models_clim[1]),
    aes(label = meta_data_f$pop_abbrev[match(Population, meta_data_f$Population)]),
    hjust = 1,
    vjust = 0,  
    size = 3.8, 
    nudge_x = -0.25,
    fontface= "italic"
  ) +
  scale_y_reverse() +  # Reverse rank (1 = top)
  scale_color_manual(values = c(population_colors, "Other" = "grey")) +  # Keep top populations colored, others grey
  labs(
    title = paste("RDA Rank Variability for SNP Set:", set),
    x = "Climatic Models", y = "Population Rank"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",  
    panel.grid.minor = element_blank(),
    strip.text = element_text(size = 10, face = "bold"),
    legend.title = element_text(size = 10),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 10),
              axis.text.y = element_text(size = 12),
              legend.text = element_text(size = 12)
  )

print(p)  # Display plot for this SNP set
  #save
     pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GCMs/GO_rank_comparison_",set,".pdf"));print(p);dev.off()
  
}

We are going to use the mean of the GO predictions from the five GCMs as the future GO.

6.2.1 Comparison GO predictions across SNPs sets

6.2.2 Graphical visualization

We can plot the genomic offset values for each population. As before, we can use the ‘cut_interval’ function to rescale the values so that they align with the colour scale used for other genomic offsets, such as GF.

list_period <- c("present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")

for(i in 1:length(list_period)){
  
  period <- list_period[i]
  
  for(x in 1: length(list_set)){
    
    set <- list_set[x]
    
    if(period == "future"){
      
      for (j in 1:length(list_models_clim)) {
    
    model_clim <- list_models_clim[j]
    
    name_file <- paste0(set,"_",period,"_",model_clim)
    GO_df <- get(paste0("GO_T_Adapcon_Gentree_RDA_",name_file))
    
     #first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")

#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))

colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
      
plot <- ggplot(data = Genomic_offset_coord) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
  scale_fill_manual(
    values = colors,
    labels = c("low values","","","","high values"),
    drop = FALSE,na.translate = FALSE)+  # Ensure all levels are shown in the legend
  geom_sf(data = admin, fill = NA, size = 0.1) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill = guide_legend(title = "Genomic offset")) +
  ggtitle(paste0("Genomic offset across populations ",set," ",period,"_",model_clim)) +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
      
print(plot) 

#save
     pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/GO/GCMs_separate_and_mean/future/GO_RDA_standard_across_populations_",set,"_",period,"_",model_clim,".pdf"));print(plot);dev.off()
      }
      
} else {
        
         name_file <- paste0(set,"_",period)
    GO_df <- get(paste0("GO_T_Adapcon_Gentree_RDA_",name_file))
    
     #first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")

#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))


colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
      
plot <- ggplot(data = Genomic_offset_coord) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
  scale_fill_manual(
    values = colors,
    labels = c("low values","","","","high values"),
    drop = FALSE,na.translate = FALSE)+  # Ensure all levels are shown in the legend
  geom_sf(data = admin, fill = NA, size = 0.1) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill = guide_legend(title = "Genomic offset")) +
  ggtitle(paste0("Genomic offset across populations ",set," ",period)) +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
      
print(plot) 

#save
     pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/GO/GCMs_separate_and_mean/present/GO_RDA_standard_across_populations_",set,"_",period,".pdf"));print(plot);dev.off()
    } 
  }
}

6.2.3 Correlation GO values across models

#df
df_tot_GO_predictions_present <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,1],GO_rand_same_AF_pres=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,2],GO_LC_pres=GO_T_Adapcon_Gentree_RDA_LC_present[,2],GO_MC_pres=GO_T_Adapcon_Gentree_RDA_MC_present[,2],GO_all_outliers_pres=GO_T_Adapcon_Gentree_RDA_all_outliers_present[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))

correlation_present <- cor(df_tot_GO_predictions_present[,-1])
corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)

df_tot_GO_predictions_future <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,1],GO_rand_same_AF_Fut=GO_T_Adapcon_Gentree_RDA_random_same_AF_future_mean[,2],GO_LC_Fut=GO_T_Adapcon_Gentree_RDA_LC_future_mean[,2],GO_MC_Fut=GO_T_Adapcon_Gentree_RDA_MC_future_mean[,2],GO_all_outliers_Fut=GO_T_Adapcon_Gentree_RDA_all_outliers_future_mean[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))

correlation_future <- cor(df_tot_GO_predictions_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)

pdf("Results/species/taxus/Genomic_offset/RDA/figures/comparison/correlation_GO_present_values_RDA_T_set_period.pdf");corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()

## png 
##   2

pdf("Results/species/taxus/Genomic_offset/RDA/figures/comparison/correlation_GO_future_values_RDA_T_set_period.pdf");corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()

## png 
##   2

6.2.4 Temporal trend GO

We can also examine the GO trend over time to see if it is consistent.

list_set <- c("random_same_AF","LC","MC","all_outliers")

for(i in 1:length(list_set)){
  
  set <- list_set[i]
  
  period_1 <- c("present")
  period_2 <- c("future_mean")
  
  df_pres <- get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_",period_1))
  df_fut <- get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_",period_2))
  
  #df
df_f_pres <- data.frame(Population = df_pres[,1], Values = df_pres[,2])
df_f_fut <- data.frame(Population = df_fut[,1], Values = df_fut[,2])

#add period
df_f_pres$Period <- "Present"
df_f_fut$Period <- "Future"


#combine them
df_GO <- rbind(df_f_fut,df_f_pres)
df_GO$Period <- factor(df_GO$Period, levels = c("Present", "Future"))

#plot

trend_plot <- ggplot(df_GO, aes(x = Period, y = Values, group = Population)) +
  geom_line(aes(color = Population)) +   # Line plot, connecting the points for each population
  geom_point(aes(color = Population)) +  # Adding points for each value
  theme_minimal() +
  labs(title = paste0("Genomic offset trend across present and future ",set),
       x = "Period",
       y = "Genomic offset prediction") +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
    )

    plot(trend_plot)
#save
    pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GO_pres_future/Scatterplot_comparison_trend_GO_present_future_",set,"_Taxus.pdf"));print(trend_plot);dev.off()
}

While the values may be similar, we should consider that the ranking of populations could differ. We will investigate this further using graphical visualisation of the ranks.

list_period <- c("present","future_mean")
list_set <- c("random_same_AF","LC","MC","all_outliers")

for(i in 1:length(list_period)){
  
  period <- list_period[i]
  
  # Loop through each set and compare it with all subsequent sets
  for(x in 1:(length(list_set)-1)){
    for(y in (x+1):length(list_set)){
      
      set1 <- list_set[x]
      set2 <- list_set[y]
      
      # Merge the two datasets
      GO_RDA_merge <- merge(get(paste0("GO_T_Adapcon_Gentree_RDA_",set1,"_",period)), 
                            get(paste0("GO_T_Adapcon_Gentree_RDA_",set2,"_",period)), 
                            by = "Population")

      # Create a dataframe for comparison
      GO_RDA_set_df <- data.frame(Population = GO_RDA_merge$Population, 
                                  GO_RDA_1 = GO_RDA_merge[,2], 
                                  GO_RDA_2 = GO_RDA_merge[,3])

      # Rank the values
      GO_RDA_set_df$rank_1 <- rank(GO_RDA_set_df$GO_RDA_1)
      GO_RDA_set_df$rank_2 <- rank(GO_RDA_set_df$GO_RDA_2)

      # Merge with metadata to include country information
      GO_RDA_set_df_meta <- merge(GO_RDA_set_df, meta_data_pop_order[,c(1,2)], by = "Population")
      GO_RDA_set_df_meta$Country <- as.factor(GO_RDA_set_df_meta$Country)

      # Create the scatter plot
      Scatterplot <- ggplot(GO_RDA_set_df_meta, aes(x = rank_1, y = rank_2)) +
        geom_point(aes(color = Country), size = 3) +
        scale_colour_manual(name = "Countries",
                            values = c("orangered3","gold2","darkorchid3","navyblue","turquoise2","green3","blue","red","black","gray","orange","darkgreen")) +
        geom_abline(intercept = 0, slope = 1, color = "gray60") +
        ggtitle(paste0("Comparison GO rank of populations RDA ", set1, "/", set2, " ", period)) + 
        theme_bw()
      
      # Print the plot
      print(Scatterplot)

      # Save the plot as a PDF
      pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/Scatterplot_comparison_rank_pop_",set1,"_",set2,"_",period,"_Taxus.pdf"));print(Scatterplot);dev.off()
    }
  }
}

Session info

packages <- c(
  "vegan", "dplyr", "robust", "qvalue",
  "rgdal", "sf","terra","ggplot2","radiant.data",
  "textshape","rnaturalearth","scales","raster","corrplot",
  "writexl","ggspatial"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
  Package = names(packages_used),
  Version = sapply(packages_used, function(x) x$Version)
)

##                     Package Version
## vegan                 vegan   2.6-4
## dplyr                 dplyr   1.1.4
## robust               robust   0.7-4
## qvalue               qvalue  2.34.0
## rgdal                 rgdal   1.6-7
## sf                       sf  1.0-15
## terra                 terra  1.7-71
## ggplot2             ggplot2   3.5.2
## radiant.data   radiant.data   1.6.3
## textshape         textshape   1.7.5
## rnaturalearth rnaturalearth   1.0.1
## scales               scales   1.3.0
## raster               raster  3.6-26
## corrplot           corrplot    0.92
## writexl             writexl   1.5.0
## ggspatial         ggspatial   1.1.9

RDA_Genomic_offset

Thomas Francisco

2024-07-18