1 Introduction

This script uses gradient forest to calculate the genomic offset. The concept of genomic offset has already been explained in the RDA_genomic_offset script.

Here, the GEA relationship has been estimated using the Gradient Forest machine learning method (see the explanations in the GF_candidate_selection script).

2 Data

2.1 Climatic data

meta_data_pop <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
#alphabetic order
meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]

Climatic data need to be in a dataframe:

#Past climatic data
past_climatic <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/new_selection/Past_new_6_Climatic_data_scale_df.csv",sep=";",dec=",")
vars <- colnames(past_climatic[,-c(1:2)])

Present climatic data:

#Present climatic data
present_climatic <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/validation_GO/climatic_data/Present_climatic_data_T_adapcon_gentree_scaled.csv",sep=";",dec=",")

Future climatic data:

list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

for(x in 1:length(list_models_clim)){
  
  clim_model <- list_models_clim[x]
  
  future_climatic <- read.csv(paste0("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/new_selection/future_climatic_data_scaled_",clim_model,".csv"),sep=";",dec=",")

  colnames(future_climatic) <- colnames(past_climatic)
  
  assign(paste0("future_climatic_",clim_model),future_climatic)
}

2.2 Genomic data

#genomic data
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")
genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616

2.3 Set of SNPs

We can also load the sets of retained SNPs.
We have several sets:
- One with random SNPs
- One with random SNPs with the same AF as the set of outliers (LC)
- One with less conservative thresholds
- One with more conservative thresholds
- One with the LC SNPs in common with the genomic data of the clonal bank populations
- One with random V2
- one with all outliers

list_set <- c("random_neutral_set_SNPs_T_adapcon_gentree_bis","random_set_taxus","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","CG_common_set","random_set_taxus_not_overlap_both_dataset_V2","unique_outliers")

for(i in 1:length(list_set)){
  
  set <- list_set[i]
  
  load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/",set,".Rdata"))
}
list_random_SNPs <- colnames(random_neutral_set_SNPs_T_adapcon_gentree_bis)
list_random_SNPs_same_AF <- random_set_taxus$name_snps

list_random_SNPs_same_AF_V2 <- random_set_taxus_not_overlap_both_dataset_V2$name_snps

3 GEA relationship

The next step is to calculate the relationship between the outliers and the climatic variables using the Gradient Forest (GF) nonlinear model. This follows the same principle as candidate selection with GF. However, here we summarise the information in the turnover function for each SNP, using all predictors to create a single turnover function for all response variables.

Model with all the SNPs:

#Run_GEA_GF_all <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix), 
#                             predictor.vars=vars, 
#                             response.vars=colnames(genomic_matrix),
#                            corr.threshold=0.5, ntree=500, trace=T)

#length(Run_GEA_GF_all$result)

Other models:

#list_set <- c("list_random_SNPs","list_random_SNPs_same_AF","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","CG_common_set")
#list_name <- c("random","random_same_AF","LC","MC","CG")

#for(i in 1:length(list_set)){
  
#  set <- get(list_set[i])
#  name <- list_name[i]
  
  #Run_GEA_GF <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[set]), 
#                             predictor.vars=vars, 
#                             response.vars=colnames(genomic_matrix[set]),
#                            corr.threshold=0.5, ntree=500, trace=T)

#length(Run_GEA_GF$result)
  
  #assign(paste0("Run_GEA_GF_",name),Run_GEA_GF)
#}

Model with random_same_AF V2 SNPs:

#Run_GEA_GF_random_same_AF_V2 <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[list_random_SNPs_same_AF_V2]), 
#                             predictor.vars=vars, 
#                             response.vars=colnames(genomic_matrix[list_random_SNPs_same_AF_V2]),
#                            corr.threshold=0.5, ntree=500, trace=T)

#length(Run_GEA_GF_random_same_AF_V2$result)

Model with all outliers SNPs:

#Run_GEA_GF_all_outliers <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[unique_outliers]), 
#                             predictor.vars=vars, 
#                             response.vars=colnames(genomic_matrix[unique_outliers]),
#                            corr.threshold=0.5, ntree=500, trace=T)

#length(Run_GEA_GF_all_outliers$result)

#list_model <- c("all","random","random_same_AF","LC","MC","CG")

#for(i in 1:length(list_model)){
  
#  model <- list_model[i]
  
#  save(list = paste0("Run_GEA_GF_", model), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
#}

list_model <- c("all","random","random_same_AF","LC","MC","CG","random_same_AF_V2","all_outliers")

for(i in 1:length(list_model)){
  
  model <- list_model[i]
  
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
}

Example LC set: We can see that 83 of the 98 outliers are associated with some of the predictors. The next step is to use these 83 SNPs in Gradient Forest (GF) to estimate the genomic offset

4 Interpolate/extrapolate the GEA relationship and calculate genomic offset

Now that we have our GEA models, we can use interpolation and extrapolation to estimate the past and future genomic composition and calculate the genomic offset metric by relating the models to space and time. To achieve this, we adapted the function created by Matthew C. Fitzpatrick (GitHub: https://github.com/fitzLab-AL/geneticOffsetR/blob/main/poplarGBS.gf.supportFunctions.R).

4.1 Function

########## calculate adaptive offset for populations in space or time
genomic_offset_function <- function(gfMod, vars, env2, combined=F,
                       pops = envPop$pop_code, weighted=FALSE){
  
  #gfMod = gf model for prediction
  #vars = names of env variables
  #env2 = new environment (new place / time)
    transEnv2 <- predict(gfMod, env2[,vars]) #new env
    transEnv1 <- predict(gfMod) #current env
  
  #calculate Euclidean distance in transformed env space
  num <- nrow(transEnv1)
  dOut <- lapply(1:num, function(x, tEnv1, tEnv2){
    as.numeric(pdist(tEnv1[x,],  tEnv2[x,])@dist)}, tEnv2=transEnv2, tEnv1=transEnv1)
  return(dOut)
}

4.2 Models

We can apply this function to our dataset:

list_set <- c("Run_GEA_GF_all","Run_GEA_GF_random","Run_GEA_GF_random_same_AF","Run_GEA_GF_LC","Run_GEA_GF_MC","Run_GEA_GF_CG","Run_GEA_GF_random_same_AF_V2","Run_GEA_GF_all_outliers")
list_period <- c("present", "future")

#vars = past climatic data used in GEA

for(i in 1: length(list_period)){
  
  period <- list_period[i]

for(x in 1:length(list_set)){
  GF_run <- get(list_set[x])
  name <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")
  
  if(period== "future"){
    
    list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

for(j in 1:length(list_models_clim)){
  
  clim_model <- list_models_clim[j]
    
    climate_to_calculate_GO <- get(paste0(period,"_climatic_",clim_model))
  
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env2=climate_to_calculate_GO[,vars], combined=F,
                       pops = row.names(genomic_matrix), weighted=FALSE)

#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)

genomic_offset_GF <- data.frame(Population=row.names(genomic_matrix),GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name[x])

assign(paste0("genomic_offset_GF_",name[x],"_",period,"_",clim_model),genomic_offset_GF)
    
}

  } else {
    
    climate_to_calculate_GO <- get(paste0(period,"_climatic"))
  
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env2=climate_to_calculate_GO[,vars], combined=F,
                       pops = row.names(genomic_matrix), weighted=FALSE)

#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)

genomic_offset_GF <- data.frame(Population=row.names(genomic_matrix),GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name[x])

assign(paste0("genomic_offset_GF_",name[x],"_",period),genomic_offset_GF)
    
    }
  }
}

4.2.1 Comparison GO predictions across climatic models

list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")

for(x in 1:length(list_set)){
  
  set <- list_set[x]
  
 df_tot_GO_predictions_clim_models_future <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_future_GFDL_ESM4[,1],GFDL_ESM4=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_GFDL_ESM4"))[,2],IPSL_CM6A_LR=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_IPSL_CM6A_LR"))[,2],MPI_ESM1_2_HR=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_MPI_ESM1_2_HR"))[,2],MRI_ESM2_0=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_MRI_ESM2_0"))[,2],UKESM1_0_LL=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_UKESM1_0_LL"))[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))

correlation_future <- cor(df_tot_GO_predictions_clim_models_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
  
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GCMs/correlation_future_predictions_GCMs_",set,".pdf"));corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
}

list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

for(x in 1:length(list_set)){
  
  set <- list_set[x]
  all_GO <- data.frame()
  
 for(i in 1:length(list_models_clim)){
   
   model <- list_models_clim[i]
   GO_data <- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_", model))
   colnames(GO_data) <- c("Population","GO")
   all_GO <- rbind(all_GO,GO_data)
 }
    
  mean_GO <- aggregate(GO ~ Population, data = all_GO, FUN = mean, na.rm = TRUE)

colnames(mean_GO) <- c("Population", "Mean_GO")  # Rename the columns for clarity

assign(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean"),mean_GO)

file_save<- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean"))
  name <- paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean")
  assign(name,file_save)
  save(list = name,file=paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/data/GCMs_separate/",name,".Rdata"))
}

list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

# List of SNP sets
list_sets <- c("all","random_same_AF","LC")#"random","MC","CG","random_AF_V2","all_outliers"

# Initialize an empty list to store data for all SNP sets
plot_data <- list()

# Load the RColorBrewer package for dynamic color palettes
library(RColorBrewer)


color_palette <- c("#FF69B4","red","blue","orange","black","green")

# Initialize a vector to keep track of top populations across all GCMs

meta_data_pop$pop_abbrev <- c("VIS","BAU","SDF","PAT","CHO","VOU","OLY","ANC","PDT","UMB","VAL","BRA","HAR","GOR","BOM","UNS","BUJA","BUJG","CAR","RAS","SUE","SAV","BLI","HHR","JUR","WAL","CED","RWM","YEW")

meta_data_f <- meta_data_pop[order(meta_data_pop$Population),]
# Loop over each SNP set
for (x in 1:length(list_sets)) {
  
  set <- list_sets[x]
  # Initialize an empty data frame for all models in the current SNP set
  ranks_df <- data.frame()
  
  all_top_populations <- character(0)

  # Loop over each climatic model
  for (i in 1:length(list_models_clim)) {
    
    model <- list_models_clim[i]
    # Get the genomic offset data for the current SNP set and model
    GO_data <- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_", model))
    colnames(GO_data) <- c("Population", "GO")
    
    GO_data_V2 <- merge(GO_data,meta_data_f[,c(1,2,7)],"Population")
    
    # Rank the populations by genomic offset (highest GO gets rank 1)
    GO_data_V2$Rank <- rank(-GO_data_V2$GO, ties.method = "first")
    
    # Add columns for the climatic model and SNP set
    GO_data_V2$Model <- model
    GO_data_V2$Set <- set
    
    # Append to ranks dataframe
    ranks_df <- rbind(ranks_df, GO_data_V2)
    
    # Find the top 1 population for the current model (highest GO = rank 1)
    top_1_population <- GO_data_V2 %>% top_n(1, GO) %>% pull(Population)
    
    # Add this top 1 population to the list of all top populations (avoid duplicates)
    all_top_populations <- unique(c(all_top_populations, top_1_population))
  }
  
  # Convert 'Model' and 'Set' to factors for plotting
  ranks_df$Model <- factor(ranks_df$Model, levels = list_models_clim)
  ranks_df$Set <- factor(ranks_df$Set, levels = list_sets)
  
  # Adjust the number of colors if there are more top populations than the current color palette
  num_top_populations <- length(all_top_populations)
  if (num_top_populations > length(color_palette)) {
    color_palette <- colorRampPalette(brewer.pal(9, "Set1"))(num_top_populations)  # Use a different palette if needed
  }
  
  # Assign a unique color to each of the top populations (those ranked 1 in any model)
  population_colors <- setNames(color_palette[1:num_top_populations], all_top_populations)
  
  # Create a new column to store color assignments (highlight only rank 1 populations for each GCM)
  ranks_df$Highlight <- ifelse(ranks_df$Population %in% all_top_populations, ranks_df$Population, "Other")
  
  # Map colors for top populations and grey for the rest
  ranks_df$Color <- ifelse(ranks_df$Highlight == "Other", "grey", population_colors[ranks_df$Highlight])
  
  # Store the data frame for this SNP set in the plot_data list
  plot_data[[set]] <- ranks_df
  
# Modify the plot generation code
p <- ggplot(ranks_df, aes(x = Model, y = Rank, group = Population)) +
  geom_line(aes(color = Highlight), alpha = 0.7, size = 0.8) +  # Color only top populations
  geom_point(aes(color = Highlight), size = 2) +  # Color only top populations
  geom_text(
    data = ranks_df %>% filter(Model == list_models_clim[1]),  # Label only in first column
    aes(label = meta_data_f$pop_abbrev[match(Population, meta_data_f$Population)]),
    hjust = 1,  # Align to the left
    vjust = 0,  
    size = 3.8,   # Increase text size
    nudge_x = -0.25,
    fontface= "italic"
    # Move text further left
  ) +
  scale_y_reverse() +  # Reverse rank (1 = top)
  scale_color_manual(values = c(population_colors, "Other" = "grey")) +  # Keep top populations colored, others grey
  labs(
    title = paste("GF Rank Variability for SNP Set:", set),
    x = "Climatic Models", y = "Population Rank"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",  
    panel.grid.minor = element_blank(),
    strip.text = element_text(size = 10, face = "bold"),
    legend.title = element_text(size = 10),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 10),
              axis.text.y = element_text(size = 12),
              legend.text = element_text(size = 12)
  )

print(p)  # Display plot for this SNP set
  #save
     pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GCMs/GO_rank_comparison_",set,".pdf"));print(p);dev.off()
}

We are going to use the mean of the GO predictions across the 5 GCMs as future GO

4.3 Graphical representations

list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL") 

list_period <- c("present","future")
list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")

for(i in 1:length(list_period)){
  
  period <- list_period[i]
  
  for(x in 1: length(list_set)){
    
    set <- list_set[x]
    
    if(period == "future"){
      
      for (j in 1:length(list_models_clim)) {
    
    model_clim <- list_models_clim[j]
    
    name_file <- paste0(set,"_",period,"_",model_clim)
    GO_df <- get(paste0("GO_T_Adapcon_Gentree_GF_",name_file))
    
     #first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")

#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))


colors <- c( "darkgreen", "#FDF7B3","#FC4E2A","#BD0026","darkorchid4")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
      
plot <- ggplot(data = Genomic_offset_coord) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_point(aes(x = Longitude, y = Latitude, fill = cut_number(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
  scale_fill_manual(
    values = colors,
    labels = c("low values","","","","high values"),
    drop = FALSE,na.translate = FALSE)+  # Ensure all levels are shown in the legend
  geom_sf(data = admin, fill = NA, size = 0.1) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill = guide_legend(title = "Genomic offset")) +
  ggtitle(paste0("Genomic offset across populations ",set," ",period,"_",model_clim)) +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
      
print(plot) 

#save
     pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/GO_separate_GCM_and_mean/Genomic_offset_across_populations_",set,"_",period,"_",model_clim,".pdf"));print(plot);dev.off()
      }
      
} else {
        
         name_file <- paste0(set,"_",period)
    GO_df <- get(paste0("GO_T_Adapcon_Gentree_GF_",name_file))
    
     #first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")

#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))


colors <- c( "darkgreen", "#FDF7B3","#FC4E2A","#BD0026","darkorchid4")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
      
plot <- ggplot(data = Genomic_offset_coord) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_point(aes(x = Longitude, y = Latitude, fill = cut_number(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
  scale_fill_manual(
    values = colors,
    labels = c("low values","","","","high values"),
    drop = FALSE,na.translate = FALSE)+  # Ensure all levels are shown in the legend
  geom_sf(data = admin, fill = NA, size = 0.1) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  guides(fill = guide_legend(title = "Genomic offset")) +
  ggtitle(paste0("Genomic offset across populations ",set," ",period)) +
  theme_bw(base_size = 11) +
  theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
      
print(plot) 

#save
     pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/GO_separate_GCM_and_mean/Genomic_offset_across_populations_",set,"_",period,".pdf"));print(plot);dev.off()
        
    } 
  }
}

Based on these genomic offset values, we can create similar plots to the ones for RDA GO, displaying the values for each population.

4.4 Correlation across genomic offset prediction

We observed that there is no clear pattern in the genomic offset; it does not correlate with continentality, coastal proximity, or other discernible patterns. We hypothesized that elevation might influence genomic offset, with populations at higher altitudes potentially exhibiting higher genomic offsets.

#load elevation
elevation_data <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/ClimateDT_extraction/extraction_climateDT_29pop_T.csv",h=T,sep=";",dec=",")

#merge elevation data with GO
data_GO_altitude <- data.frame(merge(genomic_offset_GF_LC_present,elevation_data,"Population"))
data_GO_altitude_df <- data_GO_altitude %>% 
  mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude),elevation=as.numeric(Elevation.DEM_90m.))

#df
df_tot_GO_predictions_present <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_present[,1],GO_all_pres=GO_T_Adapcon_Gentree_GF_all_present[,2],GO_rand_pres=GO_T_Adapcon_Gentree_GF_random_present[,2],GO_rand_same_AF_pres=GO_T_Adapcon_Gentree_GF_random_same_AF_present[,2],GO_LC_pres=GO_T_Adapcon_Gentree_GF_LC_present[,2],GO_MC_pres=GO_T_Adapcon_Gentree_GF_MC_present[,2],GO_CG_pres=GO_T_Adapcon_Gentree_GF_CG_present[,2],elevation=data_GO_altitude_df$elevation)

correlation_present <- cor(df_tot_GO_predictions_present[,-1])
corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)

df_tot_GO_predictions_future <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_present[,1],GO_all_Fut=GO_T_Adapcon_Gentree_GF_all_future_mean[,2],GO_rand_Fut=GO_T_Adapcon_Gentree_GF_random_future_mean[,2],GO_rand_same_AF_Fut=GO_T_Adapcon_Gentree_GF_random_same_AF_future_mean[,2],GO_LC_Fut=GO_T_Adapcon_Gentree_GF_LC_future_mean[,2],GO_MC_Fut=GO_T_Adapcon_Gentree_GF_MC_future_mean[,2],GO_CG_Fut=GO_T_Adapcon_Gentree_GF_CG_future_mean[,2],GO_random_AF_V2_Fut=GO_T_Adapcon_Gentree_GF_random_AF_V2_future_mean[,2],GO_all_outliers_Fut=GO_T_Adapcon_Gentree_GF_all_outliers_future_mean[,2],elevation=data_GO_altitude_df$elevation)

correlation_future <- cor(df_tot_GO_predictions_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)

pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/correlation_GO_present_values_GF_T_set_period.pdf");corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()

## png 
##   2

pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/correlation_GO_future_values_GF_T_set_period.pdf");corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()

## png 
##   2

We observed a positive correlation between genomic offset and elevation, which vary depending on the set of SNPs used.

More generally, we can see the same results as for RDA for the correlation between models and periods. For the MC and LC sets, we have almost the same GO values regardless of the period. For the comparison neutral vs outliers set, we have also similar results with an overall correlation of 0.77 and 0.76.
We can investigate the rank of populations according to their genomic offset values to further investigate these findings.

4.5 Rank

list_period <- c("present","future_mean")
list_set <- c("all","random","random_same_AF","LC","MC","CG")

for(i in 1:length(list_period)){
  
  period <- list_period[i]
  
  # Loop through each set and compare it with all subsequent sets
  for(x in 1:(length(list_set)-1)){
    for(y in (x+1):length(list_set)){
      
      set1 <- list_set[x]
      set2 <- list_set[y]
      
      # Merge the two datasets
      GO_RDA_merge <- merge(get(paste0("GO_T_Adapcon_Gentree_GF_",set1,"_",period)), 
                            get(paste0("GO_T_Adapcon_Gentree_GF_",set2,"_",period)), 
                            by = "Population")

      # Create a dataframe for comparison
      GO_RDA_set_df <- data.frame(Population = GO_RDA_merge$Population, 
                                  GO_RDA_1 = GO_RDA_merge[,2], 
                                  GO_RDA_2 = GO_RDA_merge[,3])

      # Rank the values
      GO_RDA_set_df$rank_1 <- rank(GO_RDA_set_df$GO_RDA_1)
      GO_RDA_set_df$rank_2 <- rank(GO_RDA_set_df$GO_RDA_2)

      # Merge with metadata to include country information
      GO_RDA_set_df_meta <- merge(GO_RDA_set_df, meta_data_pop_order[,c(1,2)], by = "Population")
      GO_RDA_set_df_meta$Country <- as.factor(GO_RDA_set_df_meta$Country)

      # Create the scatter plot
      Scatterplot <- ggplot(GO_RDA_set_df_meta, aes(x = rank_1, y = rank_2)) +
        geom_point(aes(color = Country), size = 3) +
        scale_colour_manual(name = "Countries",
                            values = c("orangered3","gold2","darkorchid3","navyblue","turquoise2","green3","blue","red","black","gray","orange","darkgreen")) +
        geom_abline(intercept = 0, slope = 1, color = "gray60") +
        ggtitle(paste0("Comparison GO rank of populations GF ", set1, "/", set2, " ", period)) + 
        theme_bw()
      
      # Print the plot
      print(Scatterplot)

      # Save the plot as a PDF
  pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/Scatterplot_comparison_rank_pop_",set,"_",set2,"_",period,"_Taxus.pdf"));print(Scatterplot);dev.off()
    }
  }
}

4.6 Temporal trend

We can also examine the trend of maladaptation to determine whether populations currently exhibiting higher predicted maladaptation will continue to show the same pattern in the future.

list_set <- c("all","random","random_same_AF","LC","MC","CG")

for(i in 1:length(list_set)){
  
  set <- list_set[i]
  
  period_1 <- c("present")
  period_2 <- c("future_mean")
  
  df_pres <- get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_",period_1))
  df_fut <- get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_",period_2))
  
  #df
df_f_pres <- data.frame(Population = df_pres[,1], Values = df_pres[,2])
df_f_fut <- data.frame(Population = df_fut[,1], Values = df_fut[,2])

#add period
df_f_pres$Period <- "Present"
df_f_fut$Period <- "Future"

#combine them
df_GO <- rbind(df_f_fut,df_f_pres)
df_GO$Period <- factor(df_GO$Period, levels = c("Present", "Future"))

#plot

trend_plot <- ggplot(df_GO, aes(x = Period, y = Values, group = Population)) +
  geom_line(aes(color = Population)) +   # Line plot, connecting the points for each population
  geom_point(aes(color = Population)) +  # Adding points for each value
  theme_minimal() +
  labs(title = paste0("Genomic offset trend across present and future ",set),
       x = "Period",
       y = "Genomic offset prediction") +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
    )

    plot(trend_plot)
#save
    pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GO_pres_future/Scatterplot_comparison_trend_GO_present_future_",set,"_Taxus.pdf"));print(trend_plot);dev.off()
}

5 Global conclusion

As the LC and MC sets are almost identical, we will only perform further analyses on the LC set.

devtools::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31 ucrt)
##  os       Windows 11 x64 (build 26100)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  French_France.utf8
##  ctype    French_France.utf8
##  tz       Europe/Paris
##  date     2025-09-22
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package           * version date (UTC) lib source
##  bslib               0.6.1   2023-11-28 [1] CRAN (R 4.3.2)
##  cachem              1.0.8   2023-05-01 [1] CRAN (R 4.3.1)
##  class               7.3-22  2023-05-03 [2] CRAN (R 4.3.2)
##  classInt            0.4-10  2023-09-05 [1] CRAN (R 4.3.2)
##  cli                 3.6.1   2023-03-23 [1] CRAN (R 4.3.1)
##  codetools           0.2-19  2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace          2.1-0   2023-01-23 [1] CRAN (R 4.3.1)
##  corrplot          * 0.92    2021-11-18 [1] CRAN (R 4.3.2)
##  DBI                 1.2.2   2024-02-16 [1] CRAN (R 4.3.2)
##  devtools            2.4.5   2022-10-11 [1] CRAN (R 4.3.3)
##  digest              0.6.33  2023-07-07 [1] CRAN (R 4.3.1)
##  dplyr             * 1.1.4   2023-11-17 [1] CRAN (R 4.3.2)
##  e1071               1.7-14  2023-12-06 [1] CRAN (R 4.3.2)
##  ellipsis            0.3.2   2021-04-29 [1] CRAN (R 4.3.1)
##  evaluate            0.23    2023-11-01 [1] CRAN (R 4.3.2)
##  extendedForest      1.6.2   2023-12-12 [1] R-Forge (R 4.3.2)
##  farver              2.1.2   2024-05-13 [1] CRAN (R 4.3.3)
##  fastmap             1.1.1   2023-02-24 [1] CRAN (R 4.3.1)
##  fs                  1.6.3   2023-07-20 [1] CRAN (R 4.3.1)
##  generics            0.1.4   2025-05-09 [1] CRAN (R 4.3.2)
##  ggplot2           * 3.5.2   2025-04-09 [1] CRAN (R 4.3.2)
##  glue                1.7.0   2024-01-09 [1] CRAN (R 4.3.2)
##  gradientForest    * 0.1-37  2023-08-19 [1] R-Forge (R 4.3.1)
##  gtable              0.3.6   2024-10-25 [1] CRAN (R 4.3.3)
##  highr               0.10    2022-12-22 [1] CRAN (R 4.3.1)
##  htmltools           0.5.7   2023-11-03 [1] CRAN (R 4.3.2)
##  htmlwidgets         1.6.4   2023-12-06 [1] CRAN (R 4.3.2)
##  httpuv              1.6.13  2023-12-06 [1] CRAN (R 4.3.2)
##  httr                1.4.7   2023-08-15 [1] CRAN (R 4.3.2)
##  jquerylib           0.1.4   2021-04-26 [1] CRAN (R 4.3.1)
##  jsonlite            1.8.8   2023-12-04 [1] CRAN (R 4.3.2)
##  KernSmooth          2.23-22 2023-07-10 [2] CRAN (R 4.3.2)
##  knitr               1.45    2023-10-30 [1] CRAN (R 4.3.2)
##  labeling            0.4.3   2023-08-29 [1] CRAN (R 4.3.1)
##  later               1.3.2   2023-12-06 [1] CRAN (R 4.3.2)
##  lattice             0.22-5  2023-10-24 [2] CRAN (R 4.3.2)
##  lifecycle           1.0.4   2023-11-07 [1] CRAN (R 4.3.2)
##  magrittr            2.0.3   2022-03-30 [1] CRAN (R 4.3.1)
##  memoise             2.0.1   2021-11-26 [1] CRAN (R 4.3.1)
##  mime                0.12    2021-09-28 [1] CRAN (R 4.3.1)
##  miniUI              0.1.1.1 2018-05-18 [1] CRAN (R 4.3.2)
##  munsell             0.5.1   2024-04-01 [1] CRAN (R 4.3.3)
##  pdist             * 1.2.1   2022-05-02 [1] CRAN (R 4.3.1)
##  pillar              1.10.2  2025-04-05 [1] CRAN (R 4.3.3)
##  pkgbuild            1.4.3   2023-12-10 [1] CRAN (R 4.3.2)
##  pkgconfig           2.0.3   2019-09-22 [1] CRAN (R 4.3.2)
##  pkgload             1.3.4   2024-01-16 [1] CRAN (R 4.3.2)
##  profvis             0.3.8   2023-05-02 [1] CRAN (R 4.3.2)
##  promises            1.2.1   2023-08-10 [1] CRAN (R 4.3.2)
##  proxy               0.4-27  2022-06-09 [1] CRAN (R 4.3.2)
##  purrr               1.0.2   2023-08-10 [1] CRAN (R 4.3.2)
##  R6                  2.6.1   2025-02-15 [1] CRAN (R 4.3.3)
##  RColorBrewer      * 1.1-3   2022-04-03 [1] CRAN (R 4.3.1)
##  Rcpp                1.0.11  2023-07-06 [1] CRAN (R 4.3.1)
##  remotes             2.5.0   2024-03-17 [1] CRAN (R 4.3.3)
##  rlang               1.1.3   2024-01-10 [1] CRAN (R 4.3.2)
##  rmarkdown           2.25    2023-09-18 [1] CRAN (R 4.3.1)
##  rnaturalearth     * 1.0.1   2023-12-15 [1] CRAN (R 4.3.2)
##  rnaturalearthdata   1.0.0   2024-02-09 [1] CRAN (R 4.3.2)
##  rstudioapi          0.15.0  2023-07-07 [1] CRAN (R 4.3.2)
##  sass                0.4.8   2023-12-06 [1] CRAN (R 4.3.2)
##  scales              1.3.0   2023-11-28 [1] CRAN (R 4.3.2)
##  sessioninfo         1.2.2   2021-12-06 [1] CRAN (R 4.3.2)
##  sf                  1.0-15  2023-12-18 [1] CRAN (R 4.3.2)
##  shiny               1.8.0   2023-11-17 [1] CRAN (R 4.3.2)
##  stringi             1.8.3   2023-12-11 [1] CRAN (R 4.3.2)
##  stringr             1.5.1   2023-11-14 [1] CRAN (R 4.3.2)
##  terra               1.7-71  2024-01-31 [1] CRAN (R 4.3.2)
##  tibble              3.2.1   2023-03-20 [1] CRAN (R 4.3.2)
##  tidyselect          1.2.1   2024-03-11 [1] CRAN (R 4.3.3)
##  units               0.8-5   2023-11-28 [1] CRAN (R 4.3.2)
##  urlchecker          1.0.1   2021-11-30 [1] CRAN (R 4.3.2)
##  usethis             2.2.3   2024-02-19 [1] CRAN (R 4.3.2)
##  vctrs               0.6.5   2023-12-01 [1] CRAN (R 4.3.2)
##  withr               3.0.2   2024-10-28 [1] CRAN (R 4.3.3)
##  xfun                0.41    2023-11-01 [1] CRAN (R 4.3.2)
##  xtable              1.8-4   2019-04-21 [1] CRAN (R 4.3.2)
##  yaml                2.3.8   2023-12-11 [1] CRAN (R 4.3.2)
## 
##  [1] C:/Users/tfrancisco/AppData/Local/R/win-library/4.3
##  [2] C:/Users/tfrancisco/AppData/Local/Programs/R/R-4.3.2/library
## 
## ──────────────────────────────────────────────────────────────────────────────

GF_genomic_offset

Thomas Francisco

2024-05-15