This script uses gradient forest to calculate the genomic offset. The concept of genomic offset has already been explained in the RDA_genomic_offset script.
Here, the GEA relationship has been estimated using the Gradient Forest machine learning method (see the explanations in the GF_candidate_selection script).
meta_data_pop <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
#alphabetic order
meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]
Climatic data need to be in a dataframe:
#Past climatic data
past_climatic <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/new_selection/Past_new_6_Climatic_data_scale_df.csv",sep=";",dec=",")
vars <- colnames(past_climatic[,-c(1:2)])
#Present climatic data
present_climatic <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/validation_GO/climatic_data/Present_climatic_data_T_adapcon_gentree_scaled.csv",sep=";",dec=",")
list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
for(x in 1:length(list_models_clim)){
clim_model <- list_models_clim[x]
future_climatic <- read.csv(paste0("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/new_selection/future_climatic_data_scaled_",clim_model,".csv"),sep=";",dec=",")
colnames(future_climatic) <- colnames(past_climatic)
assign(paste0("future_climatic_",clim_model),future_climatic)
}
#genomic data
load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")
genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616
We can also load the sets of retained SNPs.
We have several sets:
- One with random SNPs
- One with random SNPs with the same AF as the set of outliers
(LC)
- One with less conservative thresholds
- One with more conservative thresholds
- One with the LC SNPs in common with the genomic data of the clonal
bank populations
- One with random V2
- one with all outliers
list_set <- c("random_neutral_set_SNPs_T_adapcon_gentree_bis","random_set_taxus","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","CG_common_set","random_set_taxus_not_overlap_both_dataset_V2","unique_outliers")
for(i in 1:length(list_set)){
set <- list_set[i]
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/outliers/",set,".Rdata"))
}
list_random_SNPs <- colnames(random_neutral_set_SNPs_T_adapcon_gentree_bis)
list_random_SNPs_same_AF <- random_set_taxus$name_snps
list_random_SNPs_same_AF_V2 <- random_set_taxus_not_overlap_both_dataset_V2$name_snps
The next step is to calculate the relationship between the outliers and the climatic variables using the Gradient Forest (GF) nonlinear model. This follows the same principle as candidate selection with GF. However, here we summarise the information in the turnover function for each SNP, using all predictors to create a single turnover function for all response variables.
#Run_GEA_GF_all <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix),
# predictor.vars=vars,
# response.vars=colnames(genomic_matrix),
# corr.threshold=0.5, ntree=500, trace=T)
#length(Run_GEA_GF_all$result)
#list_set <- c("list_random_SNPs","list_random_SNPs_same_AF","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","CG_common_set")
#list_name <- c("random","random_same_AF","LC","MC","CG")
#for(i in 1:length(list_set)){
# set <- get(list_set[i])
# name <- list_name[i]
#Run_GEA_GF <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[set]),
# predictor.vars=vars,
# response.vars=colnames(genomic_matrix[set]),
# corr.threshold=0.5, ntree=500, trace=T)
#length(Run_GEA_GF$result)
#assign(paste0("Run_GEA_GF_",name),Run_GEA_GF)
#}
#Run_GEA_GF_random_same_AF_V2 <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[list_random_SNPs_same_AF_V2]),
# predictor.vars=vars,
# response.vars=colnames(genomic_matrix[list_random_SNPs_same_AF_V2]),
# corr.threshold=0.5, ntree=500, trace=T)
#length(Run_GEA_GF_random_same_AF_V2$result)
#Run_GEA_GF_all_outliers <- gradientForest(data.frame(past_climatic[,vars],genomic_matrix[unique_outliers]),
# predictor.vars=vars,
# response.vars=colnames(genomic_matrix[unique_outliers]),
# corr.threshold=0.5, ntree=500, trace=T)
#length(Run_GEA_GF_all_outliers$result)
#list_model <- c("all","random","random_same_AF","LC","MC","CG")
#for(i in 1:length(list_model)){
# model <- list_model[i]
# save(list = paste0("Run_GEA_GF_", model), file = paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
#}
list_model <- c("all","random","random_same_AF","LC","MC","CG","random_same_AF_V2","all_outliers")
for(i in 1:length(list_model)){
model <- list_model[i]
load(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/RUNs/Run_GEA_GF_",model,".Rdata"))
}
Example LC set: We can see that 83 of the 98 outliers are associated with some of the predictors. The next step is to use these 83 SNPs in Gradient Forest (GF) to estimate the genomic offset
Now that we have our GEA models, we can use interpolation and extrapolation to estimate the past and future genomic composition and calculate the genomic offset metric by relating the models to space and time. To achieve this, we adapted the function created by Matthew C. Fitzpatrick (GitHub: https://github.com/fitzLab-AL/geneticOffsetR/blob/main/poplarGBS.gf.supportFunctions.R).
########## calculate adaptive offset for populations in space or time
genomic_offset_function <- function(gfMod, vars, env2, combined=F,
pops = envPop$pop_code, weighted=FALSE){
#gfMod = gf model for prediction
#vars = names of env variables
#env2 = new environment (new place / time)
transEnv2 <- predict(gfMod, env2[,vars]) #new env
transEnv1 <- predict(gfMod) #current env
#calculate Euclidean distance in transformed env space
num <- nrow(transEnv1)
dOut <- lapply(1:num, function(x, tEnv1, tEnv2){
as.numeric(pdist(tEnv1[x,], tEnv2[x,])@dist)}, tEnv2=transEnv2, tEnv1=transEnv1)
return(dOut)
}
We can apply this function to our dataset:
list_set <- c("Run_GEA_GF_all","Run_GEA_GF_random","Run_GEA_GF_random_same_AF","Run_GEA_GF_LC","Run_GEA_GF_MC","Run_GEA_GF_CG","Run_GEA_GF_random_same_AF_V2","Run_GEA_GF_all_outliers")
list_period <- c("present", "future")
#vars = past climatic data used in GEA
for(i in 1: length(list_period)){
period <- list_period[i]
for(x in 1:length(list_set)){
GF_run <- get(list_set[x])
name <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")
if(period== "future"){
list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
for(j in 1:length(list_models_clim)){
clim_model <- list_models_clim[j]
climate_to_calculate_GO <- get(paste0(period,"_climatic_",clim_model))
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env2=climate_to_calculate_GO[,vars], combined=F,
pops = row.names(genomic_matrix), weighted=FALSE)
#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)
genomic_offset_GF <- data.frame(Population=row.names(genomic_matrix),GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name[x])
assign(paste0("genomic_offset_GF_",name[x],"_",period,"_",clim_model),genomic_offset_GF)
}
} else {
climate_to_calculate_GO <- get(paste0(period,"_climatic"))
Genomic_offset <- genomic_offset_function(gfMod=GF_run, vars=vars, env2=climate_to_calculate_GO[,vars], combined=F,
pops = row.names(genomic_matrix), weighted=FALSE)
#extraction GO values
Genomic_offset$values <- unlist(Genomic_offset)
genomic_offset_GF <- data.frame(Population=row.names(genomic_matrix),GO=Genomic_offset$values)
names(genomic_offset_GF)[2] <- paste0("genomic_offset_GF_",name[x])
assign(paste0("genomic_offset_GF_",name[x],"_",period),genomic_offset_GF)
}
}
}
list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")
for(x in 1:length(list_set)){
set <- list_set[x]
df_tot_GO_predictions_clim_models_future <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_future_GFDL_ESM4[,1],GFDL_ESM4=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_GFDL_ESM4"))[,2],IPSL_CM6A_LR=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_IPSL_CM6A_LR"))[,2],MPI_ESM1_2_HR=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_MPI_ESM1_2_HR"))[,2],MRI_ESM2_0=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_MRI_ESM2_0"))[,2],UKESM1_0_LL=get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_future_UKESM1_0_LL"))[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))
correlation_future <- cor(df_tot_GO_predictions_clim_models_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GCMs/correlation_future_predictions_GCMs_",set,".pdf"));corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
}
list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
for(x in 1:length(list_set)){
set <- list_set[x]
all_GO <- data.frame()
for(i in 1:length(list_models_clim)){
model <- list_models_clim[i]
GO_data <- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_", model))
colnames(GO_data) <- c("Population","GO")
all_GO <- rbind(all_GO,GO_data)
}
mean_GO <- aggregate(GO ~ Population, data = all_GO, FUN = mean, na.rm = TRUE)
colnames(mean_GO) <- c("Population", "Mean_GO") # Rename the columns for clarity
assign(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean"),mean_GO)
file_save<- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean"))
name <- paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_mean")
assign(name,file_save)
save(list = name,file=paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/data/GCMs_separate/",name,".Rdata"))
}
list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
# List of SNP sets
list_sets <- c("all","random_same_AF","LC")#"random","MC","CG","random_AF_V2","all_outliers"
# Initialize an empty list to store data for all SNP sets
plot_data <- list()
# Load the RColorBrewer package for dynamic color palettes
library(RColorBrewer)
color_palette <- c("#FF69B4","red","blue","orange","black","green")
# Initialize a vector to keep track of top populations across all GCMs
meta_data_pop$pop_abbrev <- c("VIS","BAU","SDF","PAT","CHO","VOU","OLY","ANC","PDT","UMB","VAL","BRA","HAR","GOR","BOM","UNS","BUJA","BUJG","CAR","RAS","SUE","SAV","BLI","HHR","JUR","WAL","CED","RWM","YEW")
meta_data_f <- meta_data_pop[order(meta_data_pop$Population),]
# Loop over each SNP set
for (x in 1:length(list_sets)) {
set <- list_sets[x]
# Initialize an empty data frame for all models in the current SNP set
ranks_df <- data.frame()
all_top_populations <- character(0)
# Loop over each climatic model
for (i in 1:length(list_models_clim)) {
model <- list_models_clim[i]
# Get the genomic offset data for the current SNP set and model
GO_data <- get(paste0("GO_T_Adapcon_Gentree_GF_", set, "_future_", model))
colnames(GO_data) <- c("Population", "GO")
GO_data_V2 <- merge(GO_data,meta_data_f[,c(1,2,7)],"Population")
# Rank the populations by genomic offset (highest GO gets rank 1)
GO_data_V2$Rank <- rank(-GO_data_V2$GO, ties.method = "first")
# Add columns for the climatic model and SNP set
GO_data_V2$Model <- model
GO_data_V2$Set <- set
# Append to ranks dataframe
ranks_df <- rbind(ranks_df, GO_data_V2)
# Find the top 1 population for the current model (highest GO = rank 1)
top_1_population <- GO_data_V2 %>% top_n(1, GO) %>% pull(Population)
# Add this top 1 population to the list of all top populations (avoid duplicates)
all_top_populations <- unique(c(all_top_populations, top_1_population))
}
# Convert 'Model' and 'Set' to factors for plotting
ranks_df$Model <- factor(ranks_df$Model, levels = list_models_clim)
ranks_df$Set <- factor(ranks_df$Set, levels = list_sets)
# Adjust the number of colors if there are more top populations than the current color palette
num_top_populations <- length(all_top_populations)
if (num_top_populations > length(color_palette)) {
color_palette <- colorRampPalette(brewer.pal(9, "Set1"))(num_top_populations) # Use a different palette if needed
}
# Assign a unique color to each of the top populations (those ranked 1 in any model)
population_colors <- setNames(color_palette[1:num_top_populations], all_top_populations)
# Create a new column to store color assignments (highlight only rank 1 populations for each GCM)
ranks_df$Highlight <- ifelse(ranks_df$Population %in% all_top_populations, ranks_df$Population, "Other")
# Map colors for top populations and grey for the rest
ranks_df$Color <- ifelse(ranks_df$Highlight == "Other", "grey", population_colors[ranks_df$Highlight])
# Store the data frame for this SNP set in the plot_data list
plot_data[[set]] <- ranks_df
# Modify the plot generation code
p <- ggplot(ranks_df, aes(x = Model, y = Rank, group = Population)) +
geom_line(aes(color = Highlight), alpha = 0.7, size = 0.8) + # Color only top populations
geom_point(aes(color = Highlight), size = 2) + # Color only top populations
geom_text(
data = ranks_df %>% filter(Model == list_models_clim[1]), # Label only in first column
aes(label = meta_data_f$pop_abbrev[match(Population, meta_data_f$Population)]),
hjust = 1, # Align to the left
vjust = 0,
size = 3.8, # Increase text size
nudge_x = -0.25,
fontface= "italic"
# Move text further left
) +
scale_y_reverse() + # Reverse rank (1 = top)
scale_color_manual(values = c(population_colors, "Other" = "grey")) + # Keep top populations colored, others grey
labs(
title = paste("GF Rank Variability for SNP Set:", set),
x = "Climatic Models", y = "Population Rank"
) +
theme_minimal() +
theme(
legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
legend.title = element_text(size = 10),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
print(p) # Display plot for this SNP set
#save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GCMs/GO_rank_comparison_",set,".pdf"));print(p);dev.off()
}
We are going to use the mean of the GO predictions across the 5 GCMs as future GO
list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
list_period <- c("present","future")
list_set <- c("all","random","random_same_AF","LC","MC","CG","random_AF_V2","all_outliers")
for(i in 1:length(list_period)){
period <- list_period[i]
for(x in 1: length(list_set)){
set <- list_set[x]
if(period == "future"){
for (j in 1:length(list_models_clim)) {
model_clim <- list_models_clim[j]
name_file <- paste0(set,"_",period,"_",model_clim)
GO_df <- get(paste0("GO_T_Adapcon_Gentree_GF_",name_file))
#first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")
#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))
colors <- c( "darkgreen", "#FDF7B3","#FC4E2A","#BD0026","darkorchid4")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
plot <- ggplot(data = Genomic_offset_coord) +
geom_sf(data = admin, fill = gray(0.92), size = 0) +
geom_point(aes(x = Longitude, y = Latitude, fill = cut_number(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
scale_fill_manual(
values = colors,
labels = c("low values","","","","high values"),
drop = FALSE,na.translate = FALSE)+ # Ensure all levels are shown in the legend
geom_sf(data = admin, fill = NA, size = 0.1) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
xlab("Longitude") + ylab("Latitude") +
guides(fill = guide_legend(title = "Genomic offset")) +
ggtitle(paste0("Genomic offset across populations ",set," ",period,"_",model_clim)) +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(plot)
#save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/GO_separate_GCM_and_mean/Genomic_offset_across_populations_",set,"_",period,"_",model_clim,".pdf"));print(plot);dev.off()
}
} else {
name_file <- paste0(set,"_",period)
GO_df <- get(paste0("GO_T_Adapcon_Gentree_GF_",name_file))
#first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")
#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))
colors <- c( "darkgreen", "#FDF7B3","#FC4E2A","#BD0026","darkorchid4")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
plot <- ggplot(data = Genomic_offset_coord) +
geom_sf(data = admin, fill = gray(0.92), size = 0) +
geom_point(aes(x = Longitude, y = Latitude, fill = cut_number(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
scale_fill_manual(
values = colors,
labels = c("low values","","","","high values"),
drop = FALSE,na.translate = FALSE)+ # Ensure all levels are shown in the legend
geom_sf(data = admin, fill = NA, size = 0.1) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
xlab("Longitude") + ylab("Latitude") +
guides(fill = guide_legend(title = "Genomic offset")) +
ggtitle(paste0("Genomic offset across populations ",set," ",period)) +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(plot)
#save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/GO_separate_GCM_and_mean/Genomic_offset_across_populations_",set,"_",period,".pdf"));print(plot);dev.off()
}
}
}
Based on these genomic offset values, we can create similar plots to the ones for RDA GO, displaying the values for each population.
We observed that there is no clear pattern in the genomic offset; it does not correlate with continentality, coastal proximity, or other discernible patterns. We hypothesized that elevation might influence genomic offset, with populations at higher altitudes potentially exhibiting higher genomic offsets.
#load elevation
elevation_data <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Climatic_data/ClimateDT_extraction/extraction_climateDT_29pop_T.csv",h=T,sep=";",dec=",")
#merge elevation data with GO
data_GO_altitude <- data.frame(merge(genomic_offset_GF_LC_present,elevation_data,"Population"))
data_GO_altitude_df <- data_GO_altitude %>%
mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude),elevation=as.numeric(Elevation.DEM_90m.))
#df
df_tot_GO_predictions_present <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_present[,1],GO_all_pres=GO_T_Adapcon_Gentree_GF_all_present[,2],GO_rand_pres=GO_T_Adapcon_Gentree_GF_random_present[,2],GO_rand_same_AF_pres=GO_T_Adapcon_Gentree_GF_random_same_AF_present[,2],GO_LC_pres=GO_T_Adapcon_Gentree_GF_LC_present[,2],GO_MC_pres=GO_T_Adapcon_Gentree_GF_MC_present[,2],GO_CG_pres=GO_T_Adapcon_Gentree_GF_CG_present[,2],elevation=data_GO_altitude_df$elevation)
correlation_present <- cor(df_tot_GO_predictions_present[,-1])
corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
df_tot_GO_predictions_future <- data.frame(Population=GO_T_Adapcon_Gentree_GF_random_present[,1],GO_all_Fut=GO_T_Adapcon_Gentree_GF_all_future_mean[,2],GO_rand_Fut=GO_T_Adapcon_Gentree_GF_random_future_mean[,2],GO_rand_same_AF_Fut=GO_T_Adapcon_Gentree_GF_random_same_AF_future_mean[,2],GO_LC_Fut=GO_T_Adapcon_Gentree_GF_LC_future_mean[,2],GO_MC_Fut=GO_T_Adapcon_Gentree_GF_MC_future_mean[,2],GO_CG_Fut=GO_T_Adapcon_Gentree_GF_CG_future_mean[,2],GO_random_AF_V2_Fut=GO_T_Adapcon_Gentree_GF_random_AF_V2_future_mean[,2],GO_all_outliers_Fut=GO_T_Adapcon_Gentree_GF_all_outliers_future_mean[,2],elevation=data_GO_altitude_df$elevation)
correlation_future <- cor(df_tot_GO_predictions_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/correlation_GO_present_values_GF_T_set_period.pdf");corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
## png
## 2
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/correlation_GO_future_values_GF_T_set_period.pdf");corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
## png
## 2
We observed a positive correlation between genomic offset and elevation, which vary depending on the set of SNPs used.
More generally, we can see the same results as for RDA for the
correlation between models and periods. For the MC and LC sets, we have
almost the same GO values regardless of the period. For the comparison
neutral vs outliers set, we have also similar results with an overall
correlation of 0.77 and 0.76.
We can investigate the rank of populations according to their genomic
offset values to further investigate these findings.
list_period <- c("present","future_mean")
list_set <- c("all","random","random_same_AF","LC","MC","CG")
for(i in 1:length(list_period)){
period <- list_period[i]
# Loop through each set and compare it with all subsequent sets
for(x in 1:(length(list_set)-1)){
for(y in (x+1):length(list_set)){
set1 <- list_set[x]
set2 <- list_set[y]
# Merge the two datasets
GO_RDA_merge <- merge(get(paste0("GO_T_Adapcon_Gentree_GF_",set1,"_",period)),
get(paste0("GO_T_Adapcon_Gentree_GF_",set2,"_",period)),
by = "Population")
# Create a dataframe for comparison
GO_RDA_set_df <- data.frame(Population = GO_RDA_merge$Population,
GO_RDA_1 = GO_RDA_merge[,2],
GO_RDA_2 = GO_RDA_merge[,3])
# Rank the values
GO_RDA_set_df$rank_1 <- rank(GO_RDA_set_df$GO_RDA_1)
GO_RDA_set_df$rank_2 <- rank(GO_RDA_set_df$GO_RDA_2)
# Merge with metadata to include country information
GO_RDA_set_df_meta <- merge(GO_RDA_set_df, meta_data_pop_order[,c(1,2)], by = "Population")
GO_RDA_set_df_meta$Country <- as.factor(GO_RDA_set_df_meta$Country)
# Create the scatter plot
Scatterplot <- ggplot(GO_RDA_set_df_meta, aes(x = rank_1, y = rank_2)) +
geom_point(aes(color = Country), size = 3) +
scale_colour_manual(name = "Countries",
values = c("orangered3","gold2","darkorchid3","navyblue","turquoise2","green3","blue","red","black","gray","orange","darkgreen")) +
geom_abline(intercept = 0, slope = 1, color = "gray60") +
ggtitle(paste0("Comparison GO rank of populations GF ", set1, "/", set2, " ", period)) +
theme_bw()
# Print the plot
print(Scatterplot)
# Save the plot as a PDF
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/Scatterplot_comparison_rank_pop_",set,"_",set2,"_",period,"_Taxus.pdf"));print(Scatterplot);dev.off()
}
}
}
We can also examine the trend of maladaptation to determine whether populations currently exhibiting higher predicted maladaptation will continue to show the same pattern in the future.
list_set <- c("all","random","random_same_AF","LC","MC","CG")
for(i in 1:length(list_set)){
set <- list_set[i]
period_1 <- c("present")
period_2 <- c("future_mean")
df_pres <- get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_",period_1))
df_fut <- get(paste0("GO_T_Adapcon_Gentree_GF_",set,"_",period_2))
#df
df_f_pres <- data.frame(Population = df_pres[,1], Values = df_pres[,2])
df_f_fut <- data.frame(Population = df_fut[,1], Values = df_fut[,2])
#add period
df_f_pres$Period <- "Present"
df_f_fut$Period <- "Future"
#combine them
df_GO <- rbind(df_f_fut,df_f_pres)
df_GO$Period <- factor(df_GO$Period, levels = c("Present", "Future"))
#plot
trend_plot <- ggplot(df_GO, aes(x = Period, y = Values, group = Population)) +
geom_line(aes(color = Population)) + # Line plot, connecting the points for each population
geom_point(aes(color = Population)) + # Adding points for each value
theme_minimal() +
labs(title = paste0("Genomic offset trend across present and future ",set),
x = "Period",
y = "Genomic offset prediction") +
theme(
legend.position = "none",
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
)
plot(trend_plot)
#save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/Genomic_offset/GF/figures/comparison/GO_pres_future/Scatterplot_comparison_trend_GO_present_future_",set,"_Taxus.pdf"));print(trend_plot);dev.off()
}
As the LC and MC sets are almost identical, we will only perform further analyses on the LC set.
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31 ucrt)
## os Windows 11 x64 (build 26100)
## system x86_64, mingw32
## ui RTerm
## language (EN)
## collate French_France.utf8
## ctype French_France.utf8
## tz Europe/Paris
## date 2025-09-22
## pandoc 3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## bslib 0.6.1 2023-11-28 [1] CRAN (R 4.3.2)
## cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.1)
## class 7.3-22 2023-05-03 [2] CRAN (R 4.3.2)
## classInt 0.4-10 2023-09-05 [1] CRAN (R 4.3.2)
## cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.1)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.1)
## corrplot * 0.92 2021-11-18 [1] CRAN (R 4.3.2)
## DBI 1.2.2 2024-02-16 [1] CRAN (R 4.3.2)
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.3.3)
## digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1)
## dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.2)
## e1071 1.7-14 2023-12-06 [1] CRAN (R 4.3.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.1)
## evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.2)
## extendedForest 1.6.2 2023-12-12 [1] R-Forge (R 4.3.2)
## farver 2.1.2 2024-05-13 [1] CRAN (R 4.3.3)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.1)
## fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1)
## generics 0.1.4 2025-05-09 [1] CRAN (R 4.3.2)
## ggplot2 * 3.5.2 2025-04-09 [1] CRAN (R 4.3.2)
## glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.2)
## gradientForest * 0.1-37 2023-08-19 [1] R-Forge (R 4.3.1)
## gtable 0.3.6 2024-10-25 [1] CRAN (R 4.3.3)
## highr 0.10 2022-12-22 [1] CRAN (R 4.3.1)
## htmltools 0.5.7 2023-11-03 [1] CRAN (R 4.3.2)
## htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.3.2)
## httpuv 1.6.13 2023-12-06 [1] CRAN (R 4.3.2)
## httr 1.4.7 2023-08-15 [1] CRAN (R 4.3.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.1)
## jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.3.2)
## KernSmooth 2.23-22 2023-07-10 [2] CRAN (R 4.3.2)
## knitr 1.45 2023-10-30 [1] CRAN (R 4.3.2)
## labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.1)
## later 1.3.2 2023-12-06 [1] CRAN (R 4.3.2)
## lattice 0.22-5 2023-10-24 [2] CRAN (R 4.3.2)
## lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.2)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.1)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.1)
## mime 0.12 2021-09-28 [1] CRAN (R 4.3.1)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.2)
## munsell 0.5.1 2024-04-01 [1] CRAN (R 4.3.3)
## pdist * 1.2.1 2022-05-02 [1] CRAN (R 4.3.1)
## pillar 1.10.2 2025-04-05 [1] CRAN (R 4.3.3)
## pkgbuild 1.4.3 2023-12-10 [1] CRAN (R 4.3.2)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.2)
## pkgload 1.3.4 2024-01-16 [1] CRAN (R 4.3.2)
## profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.2)
## promises 1.2.1 2023-08-10 [1] CRAN (R 4.3.2)
## proxy 0.4-27 2022-06-09 [1] CRAN (R 4.3.2)
## purrr 1.0.2 2023-08-10 [1] CRAN (R 4.3.2)
## R6 2.6.1 2025-02-15 [1] CRAN (R 4.3.3)
## RColorBrewer * 1.1-3 2022-04-03 [1] CRAN (R 4.3.1)
## Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.1)
## remotes 2.5.0 2024-03-17 [1] CRAN (R 4.3.3)
## rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.2)
## rmarkdown 2.25 2023-09-18 [1] CRAN (R 4.3.1)
## rnaturalearth * 1.0.1 2023-12-15 [1] CRAN (R 4.3.2)
## rnaturalearthdata 1.0.0 2024-02-09 [1] CRAN (R 4.3.2)
## rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.3.2)
## sass 0.4.8 2023-12-06 [1] CRAN (R 4.3.2)
## scales 1.3.0 2023-11-28 [1] CRAN (R 4.3.2)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.2)
## sf 1.0-15 2023-12-18 [1] CRAN (R 4.3.2)
## shiny 1.8.0 2023-11-17 [1] CRAN (R 4.3.2)
## stringi 1.8.3 2023-12-11 [1] CRAN (R 4.3.2)
## stringr 1.5.1 2023-11-14 [1] CRAN (R 4.3.2)
## terra 1.7-71 2024-01-31 [1] CRAN (R 4.3.2)
## tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.2)
## tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.3)
## units 0.8-5 2023-11-28 [1] CRAN (R 4.3.2)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.2)
## usethis 2.2.3 2024-02-19 [1] CRAN (R 4.3.2)
## vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.2)
## withr 3.0.2 2024-10-28 [1] CRAN (R 4.3.3)
## xfun 0.41 2023-11-01 [1] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.2)
## yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.2)
##
## [1] C:/Users/tfrancisco/AppData/Local/R/win-library/4.3
## [2] C:/Users/tfrancisco/AppData/Local/Programs/R/R-4.3.2/library
##
## ──────────────────────────────────────────────────────────────────────────────