rm(list = ls())
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
cache = FALSE
)
meta_data_pop <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
#alphabetic order
meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]
This script calculates the Euclidean distances between observed and predicted population scores for the 1901-1950 time period along the retained RDA axes and normalises these values using min–max scaling to a 0-1 range (hereafter referred to as the genomic discrepancy index). It also computes the genomic offset index using the RDA framework.
The genomic discrepancy index is a metric introduced in Francisco et al. 2025 to visualise how far the observed genomic composition of populations deviates from that predicted by the GEA model under the same climatic conditions. Populations are unlikely to exhibit exactly the genomic composition predicted by the model’s linear relationships of local adaptation. Such deviations may indicate that the degree of local adaptation is lower than expected or that adaptation relies on a different genetic basis than that captured by the model. Possible explanations include neutral processes such as genetic drift (leading to fixation of potentially adaptive alleles), restricted gene flow (resulting in the absence of adaptive variants in some populations), or populations experiencing adaptation lag or pre-adaptation to future climates. However, alternative explanations, such as conditional neutrality of alleles or model misspecification, should also be considered.
The genomic offset index, first proposed by Fitzpatrick and Keller (2015), quantifies the amount of genomic change required for populations (or individuals) to maintain fitness under future climatic conditions. Specifically, genomic offset measures the distance between the estimated genomic composition under current (reference) climates and that predicted under future climates, focusing on SNPs potentially associated with adaptation.
To compute genomic offset, climate-associated SNPs can first be identified (alternatively, all SNPs may be used under the assumption that adaptation is polygenic or even omnigenic). GEA methods are then applied to establish relationships between allelic variation and climatic variables. These relationships are used to estimate genomic composition under past or present and future climatic conditions, effectively interpolating or extrapolating the GEA models in space and time. Genomic offset is then quantified as the Euclidean distance between genomic compositions under reference and future climates.
#climatic/structure/IBD data
load("Data/Species/Taxus_baccata/GEA_new_var/variance_partitioning/Climatic_data_RDA_pRDA.Rdata")
Here we will also load the raster data not scaled but with the mean and sd values of the past climatic data to scaled the data together to follow the function provided in Capblancq and Forester (2021).
list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
#load past and future raster data
climatic_variables_to_keep <-c("bio1", "bio12", "bio15","bio2","bio4","bio9")
list_raster <- c("past_climatic_data_raster","present_climatic_data_raster","future_climatic_data_raster")
for(i in 1:length(list_models_clim)){
model <- list_models_clim[i]
for(x in 1:length(list_raster)){
name <- c("past","present","future")
name_clim <- name[x]#name final raster
var <- list_raster[x]#name of the raster
if(var == "future_climatic_data_raster"){
var= paste0("future_climatic_data_raster_",model)
load(paste0("Data/Species/Taxus_baccata/Genomic_offset/RDA/",var,".Rdata")) #load data
raster_clim <- get(var)[[climatic_variables_to_keep]]#keep only var in clim_variables to keep
names(raster_clim)=c("Annual_Tc","Annual_P","P_Seasonality","Diurnal_range_Tc","Tc_Seasonality","Tc_driest_quarter") #change the name to match the names of the scaled and center values
assign(paste0("raster_",name_clim,"_clim_",model),raster_clim)
} else {
load(paste0("Data/Species/Taxus_baccata/Genomic_offset/RDA/",var,".Rdata"))
raster_clim <- get(var)[[climatic_variables_to_keep]]
names(raster_clim)=c("Annual_Tc","Annual_P","P_Seasonality","Diurnal_range_Tc","Tc_Seasonality","Tc_driest_quarter")
assign(paste0("raster_",name_clim,"_clim"),raster_clim)
}
}
}
We also need the scale and center values:
load("Data/Species/Taxus_baccata/Climatic_data/new_selection/scale_env_value_new_cli.Rdata")
load("Data/Species/Taxus_baccata/Climatic_data/new_selection/center_env_value_new_cli.Rdata")
list_name <- c("past","present","future")
coords <- data.frame(apply(meta_data_pop_order[,c(5:4)], 2, as.numeric))#we need to have longitude then latitude
for(i in 1:length(list_models_clim)){
model <- list_models_clim[i]
for(x in 1: length(list_name)){
name <- list_name[x]
if(name == "future"){
raster_data <- get(paste0("raster_",name,"_clim_",model))
clim_data <- raster::extract(raster_data, coords)
clim_data_order <- data.frame(clim_data[,c(1,4,5,6,2,3)])#put it in the same order as center_env and scale object
# Standardisation of the environmental variables
clim_data_scale <- as.data.frame(scale(clim_data_order, center=center_env_value_new_cli, scale=scale_env_value_new_cli))
assign(paste0("clim_df_",name,"_scale_",model),clim_data_scale)
} else {
raster_data <- get(paste0("raster_",name,"_clim"))
clim_data <- raster::extract(raster_data, coords)
clim_data_order <- data.frame(clim_data[,c(1,4,5,6,2,3)])
clim_data_scale <- as.data.frame(scale(clim_data_order, center=center_env_value_new_cli, scale=scale_env_value_new_cli))
assign(paste0("clim_df_",name,"_scale"),clim_data_scale)
}
}
}
The genomic data set includes allele frequency corrected for MAC in the 29 populations.
#genomic data
load("Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_29pop_adapcon_gentree_475_8616.Rdata")
genomic_matrix <- data_allelic_frequencies_29pop_adapcon_gentree_475_8616
We created 3 set of outliers:
One neutral set with loci selected at random, while maintaining the same number of SNPs for each class of allelic frequencies as in the outlier set (LC).
load("Results/species/taxus/GEA_new_var/outliers/random_set_taxus.Rdata")
list_random_SNPs_same_AF <- random_set_taxus$name_snps
#write.csv(list_random_SNPs_same_AF,"Results/species/taxus/GEA_new_var/outliers/list_random_SNPs_same_AF.csv")
We used two sets of thresholds depending on the species: a less conservative threshold (LC; FDR < 10%, BF > 8, and the top 5% of SNPs in GF) and a more conservative threshold (MC; FDR < 5%, BF > 10, and the top 1% of SNPs in GF).
#set of less conservative thresholds
load("Results/species/taxus/GEA_new_var/outliers/outliers_set_final_overlapping_no_LD_LC_new_var.Rdata")
load("Results/species/taxus/GEA_new_var/outliers/outliers_set_final_overlapping_no_LD_new_var.Rdata")
load("Results/species/taxus/GEA_new_var/outliers/unique_outliers.Rdata")
Our GDI and genomic offset analyses are based on the relationships
between SNPs and the climatic variables identified by GEA methods. In
this script, we use redundancy analysis (RDA).
First, we perform a GEA analysis on the SNP dataset to identify the
relationships that can subsequently be interpolated or extrapolated. To
achieve this, we ran an RDA using the SNP data and the selected climatic
variables.
Random set keeping the same AF
RDA_random_same_AF <- rda(formula = genomic_matrix[list_random_SNPs_same_AF] ~ Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)
RsquareAdj(RDA_random_same_AF)
## $r.squared
## [1] 0.2871292
##
## $adj.r.squared
## [1] 0.09270992
Less conservative outlier set (LC)
RDA_outliers_LC <- rda(formula = genomic_matrix[outliers_set_final_overlapping_no_LD_LC_new_var] ~ Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)
RsquareAdj(RDA_outliers_LC)
## $r.squared
## [1] 0.5173666
##
## $adj.r.squared
## [1] 0.3857393
More conservative outlier set (MC)
RDA_outliers_MC <- rda(formula = genomic_matrix[outliers_set_final_overlapping_no_LD_new_var] ~ Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)
RsquareAdj(RDA_outliers_MC)
## $r.squared
## [1] 0.5947706
##
## $adj.r.squared
## [1] 0.4842534
All outliers (all_outliers)
RDA_all_outliers <- rda(formula = genomic_matrix[unique_outliers] ~ Annual_Tc+Diurnal_range_Tc+Tc_Seasonality+Tc_driest_quarter+Annual_P+P_Seasonality, data = Climatic_data_RDA_pRDA, scale=F)
RsquareAdj(RDA_all_outliers)
## $r.squared
## [1] 0.4189863
##
## $adj.r.squared
## [1] 0.260528
We can save the models to use them later to evaluate the models:
list_models <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
for(x in 1:length(list_models)){
name_model<- list_models[x]
file_save<- get(name_model)
name <- paste0("model_GEA_",name_model)
assign(name,file_save)
save(list = paste0("model_GEA_",name_model),file=paste0("Results/species/taxus/Genomic_offset/RDA/data/models/",name,".Rdata"))
}
To conduct further analysis, we will examine the different sets to see if the same patterns emerge.
list_outliers_set_V2 <- c("list_random_SNPs_same_AF","outliers_set_final_overlapping_no_LD_LC_new_var","outliers_set_final_overlapping_no_LD_new_var","unique_outliers")
list_RDA_V2 <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
We can plot how the outliers are distributed along the RDA axes and the climatic variables.
for(x in 1: length(list_RDA_V2)){
RDA <- get(list_RDA_V2[x])
name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
#screeplot
plot(RDA$CCA$eig, option="screeplot")
#in hist
screeplot_RDA <- screeplot(RDA,main = paste0("Hist screeplot ",name[x]))
#explained variance along each RDA axis
explain_variance <-RDA$CCA$eig*100/sum(RDA$CCA$eig)
assign(paste0("explain_variance",name[x]),explain_variance)
print(get(paste0("explain_variance",name[x])))
#save
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/screeplot/Screeplot_RDA_",name[x],".pdf"));screeplot(RDA,main = paste0("Hist screeplot ",name[x]));dev.off()
}
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## 59.299725 24.467288 5.571946 4.712949 3.804574 2.143519
As can be seen, the first two RDA axes appear to explain most of the genetic variation of outliers across all sets. We will use these two axes in the further analysis of each set.
We can also plot the loci and climatic variables in the RDA space to visualise their relationships:
for(x in 1:length(list_RDA_V2)){
RDA <- get(list_RDA_V2[x])
name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
score_loci <- as.data.frame(scores(RDA, choices=c(1:2), display="species", scaling="none"))
score_climatic_var <- as.data.frame(scores(RDA, choices=c(1:2), display="bp"))
explained_variance_round <- round(RDA$CCA$eig*100/sum(RDA$CCA$eig),digits=1)
#Biplot with SNPs and climatic variables along the two first RDA axis.
biplot_outliers_RDA<- ggplot() +
geom_hline(yintercept=0, linetype="dashed", color = gray(.80), size=0.6) +
geom_vline(xintercept=0, linetype="dashed", color = gray(.80), size=0.6) +
geom_point(data = score_loci, aes(x=RDA1*15, y=RDA2*15,color="locus"), size = 1.4) +
geom_segment(data = score_climatic_var, aes(xend=RDA1, yend=RDA2, x=0, y=0), colour="black", size=0.15, linetype=1, arrow=arrow(length = unit(0.02, "npc"))) +
geom_text(data = score_climatic_var, aes(x=1.1*RDA1, y=1.1*RDA2, label = row.names(score_climatic_var)), size = 2.5) +
xlab(paste0("RDA 1 (",explained_variance_round[1],"%)")) +
ylab(paste0("RDA 2 (",explained_variance_round[2],"%)")) +
ggtitle(paste0("Biplot RDA ",name[x])) +
guides(color=guide_legend(title="Locus type")) +
scale_color_manual(values ="#F9A242FF") +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(biplot_outliers_RDA)
#save plots
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/biplot/biplot_outliers_RDA_",name[x],".pdf"));print(biplot_outliers_RDA);dev.off()
}
We can plot populations in this space to assess whether their distribution follows the expected climatic pattern.
for(x in 1:length(list_RDA_V2)){
RDA <- get(list_RDA_V2[x])
name <- c("RDA_random_same_AF","outliers_Lc","Outliers_Mc","all_outliers")
#score along the 2 first RDA axis
score_climatic_var <- as.data.frame(scores(RDA, choices=c(1:2), display="bp"))
#Score_population <- data.frame(RDA$CCA$u[,c(1,2)]) #predicted population scores
observed_scores_populations <- as.data.frame(scores(RDA,choices=c(1,2), display = "sites",scaling="none"))
#explained variance
explained_variance_round <- round(RDA$CCA$eig*100/sum(RDA$CCA$eig),digits=1)
#merge for country info
Score_population_bis <- rownames_to_column(observed_scores_populations,"Population")
score_with_country_info <- merge(Score_population_bis,meta_data_pop_order[,c(1,2)],"Population")
score_with_country_info$Country <- as.factor(score_with_country_info$Country)
group_palette <- c("Bosnia"="orangered3", "France"="gold2","Germany"= "darkorchid3", "Greece"="navyblue", "Italy"="turquoise2", "Norway"="green3", "Slovakia"="blue", "Slovenia"="red", "Spain"="black", "Sweden"="gray", "Switzerland"="orange", "UK"="darkgreen")
##Biplot with populations and climatic variables along the 2 first RDA axis
biplot_populations <- ggplot() +
geom_hline(yintercept = 0, linetype = "dashed", color = gray(0.80), size = 0.6) +
geom_vline(xintercept = 0, linetype = "dashed", color = gray(0.80), size = 0.6) +
geom_point(data = score_with_country_info, aes(x = RDA1 * 3, y = RDA2 * 3, colour = Country), size = 2, alpha = 0.8) +
geom_segment(data = score_climatic_var, aes(xend = RDA1, yend = RDA2, x = 0, y = 0), colour = "black", size = 0.15, linetype = 1, arrow = arrow(length = unit(0.02, "npc"))) +
geom_text(data = score_climatic_var, aes(x=1.1*RDA1, y=1.1*RDA2, label = row.names(score_climatic_var)), size = 2.5)+
xlab(paste0("RDA 1 (",explained_variance_round[1],"%)")) +
ylab(paste0("RDA 2 (",explained_variance_round[2],"%)")) +
ggtitle(paste0("Biplot RDA Populations ",name[x])) +
scale_color_manual(name = "Countries", values = group_palette, labels = levels(score_with_country_info$Country)) +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))+
labs(color = "Country")
print(biplot_populations)
#save plots
#pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/biplot/biplot_population_RDA_",name[x],".pdf"));print(biplot_populations);dev.off()
}
We can now calculate the predicted genomic composition under past, present, and future climatic conditions. Note that, unlike in the adaptive_index function, climatic variables must be scaled prior to applying this function. These predictions are then used to compute the genomic discrepancy index (GDI) and genomic offset.
list_rda <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
list_period <- c("past","present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")
K=2 #number of rda axes retained
for(i in 1: length(list_rda)){
rda_model <- get(list_rda[i])
set <- list_set[i]
for(x in 1: length(list_period)){
name <- list_period[x]
if(name == "future"){
for(a in 1:length(list_models_clim)){
models_clim <- list_models_clim[a]
clim <- get(paste0("clim_df_",name,"_scale_",models_clim))
predictions <- data.frame(Population=meta_data_pop_order$Population,predict(rda_model,clim, type = "lc", rank = K, scaling = "none"))
assign(paste0("adaptive_values_",set,"_",name,"_",models_clim),predictions)
}
}else{
clim <- get(paste0("clim_df_",name,"_scale"))
#model
predictions <- data.frame(Population=meta_data_pop_order$Population,predict(rda_model,clim, type = "lc", rank = K, scaling = "none"))
assign(paste0("adaptive_values_",set,"_",name),predictions)
}
}
}
The GDI is an index that quantifies the difference between predicted and observed genomic compositions. This allows the identification of populations that deviate from expectations based on local adaptation relationships. Such deviations may indicate a lower degree of local adaptation or suggest that the relationship is not strictly linear (e.g. conditional neutrality), or that the SNPs used do not fully capture the genetic basis of local adaptation.
We calculated the GDI using the same approach as for genomic offset, by measuring the distance between two genomic compositions: here, the observed and predicted values at the time of establishment (1901–1950, hereafter referred to as “past”, except for F. excelsior, see Main Text).
genomic_discrepancy_index <- function(RDA, K, observed_score, predicted_score,meta_data){
# Weights based on axis eigen values
weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
# Weighing the current adaptive indices based on the eigen values of the associated axes
Proj_offset_observed <- as.data.frame(do.call(cbind, lapply(1:K, function(x) observed_score[,x]*weights[x])))
predicted_score_df <- predicted_score[,-1]
Proj_offset_predicted <- as.data.frame(do.call(cbind, lapply(1:K, function(x) predicted_score_df[,x]*weights[x])))
#Now we want to calculate the distance between present predicted and observed for each RDA axis before doing it for both axis simultaneously
Proj_offset <- list()
for(i in 1:K){
Proj_offset[[i]] <- abs(Proj_offset_observed[[i]] - Proj_offset_predicted[[i]])
names(Proj_offset)[i] <- paste0("RDA", as.character(i))
}
# Predict GDI, incorporating the K first axes weighted by their eigen values
ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_observed), function(x) dist(rbind(Proj_offset_observed[x,], Proj_offset_predicted[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
#the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
names(ras) <- "GDI"
Proj_offset_global <- ras
# Return prediction of GDI for each RDA axis and a global GDI for each population
return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}
list_rda <- c("RDA_random_same_AF","RDA_outliers_LC","RDA_outliers_MC","RDA_all_outliers")
list_predicted_adaptive_values <- c("adaptive_values_random_same_AF_past","adaptive_values_LC_past","adaptive_values_MC_past","adaptive_values_all_outliers_past")
for(x in 1: length(list_rda)){
rda <- get(list_rda[x])
observed_values <- scores(rda, display = "sites",scaling="none")
predicted_values <- data.frame(get(list_predicted_adaptive_values[x]))
set <- list_set[x]
calculated_GDI <- genomic_discrepancy_index(rda,K,observed_values,predicted_values,meta_data_pop_order)
genomic_offset_df<- data.frame(Population=unlist(calculated_GDI$Population),Genomic_offset_random=unlist(calculated_GDI$Proj_offset_global))
colnames(genomic_offset_df) <- c("Population",paste0("values_",set))
assign(paste0("GDI_index_",set),genomic_offset_df)
}
We can save the values of GDI.
We can plot the GDI values for each population. As before, we can use the ‘cut_interval’ function (never use ‘cut_number’!) to rescale the values so that they match the colour scale used for other genomic offsets, such as GF.
shapefile <- st_read("Data/Species/Taxus_baccata/shapefiles/Taxus_baccata_plg_clip.shp")
list_period <- c("present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")
for(x in 1: length(list_set)){
set <- list_set[x]
GDI_df <- get(paste0("GDI_index_",set))
#first, we need to add the coordinates
GDI_coord <- merge(GDI_df,meta_data_pop_order[,c(2,4,5)],"Population")
#transform longitude and latitude to numeric variables
GDI_coord <- GDI_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))
colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
plot <- ggplot(data = GDI_coord) +
geom_sf(data = admin, fill = gray(0.92), size = 0) +
geom_sf(data = shapefile, fill = "lightgrey", color = "black", size = 0.2) + # Adding shapefile
geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(GDI_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
#geom_point(aes(x = Longitude, y = Latitude, fill = cut(GDI_coord[,2], breaks=seq(global_min, global_max, length.out = 6), include.lowest = T)), shape = 21,size=3, color = "black") + #if we want to class with the same values the 4 models to compare color
scale_fill_manual(
values = colors,
labels = c("0","","","","1"),
drop = FALSE,na.translate = FALSE)+ # Ensure all levels are shown in the legend
geom_sf(data = admin, fill = NA, size = 0.1) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
xlab("Longitude") + ylab("Latitude") +
guides(fill = guide_legend(title = NULL)) +
ggtitle(paste0("Adaptive genomic disruption across populations ",set)) +
annotation_scale(location = "br", width_hint = 0.3) + # Scale bar at bottom left
annotation_north_arrow(location = "tl", which_north = "true",
pad_x = unit(-0.2, "cm"), pad_y = unit(0.08, "cm"),
style = north_arrow_fancy_orienteering) + # Compass rose
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(plot)
#save
pdf(paste0("Results/species/taxus/GDI/RDA/figures/GDI_across_populations ",set,"_newcolors.pdf"));print(plot);dev.off()
}
list_set <- c("random_same_AF","LC","MC","all_outliers")
# Combine all GDI values into one dataframe
combined_data <- data.frame()
for (set in list_set) {
GDI_df <- get(paste0("GDI_index_", set))
colnames(GDI_df)[2] <- "GDI" # Ensure second column is named GDI
GDI_df$Set <- set
combined_data <- rbind(combined_data, GDI_df[, c("GDI", "Set")])
}
# Density plot
density_plot <- ggplot(combined_data, aes(x = GDI, fill = Set)) +
geom_density(alpha = 0.7, color = "black") +
scale_fill_manual(values = c("darkred", "#E69F00", "#56B4E9", "darkgreen", "#009E73", "#CC79A7")) +
#geom_vline(xintercept = 1, color = "red", linetype = "dashed", size = 1) + # Optional threshold
facet_wrap(~ Set, scales = "free_y", ncol = 2) +
labs(
title = "Density of Genomic Discrepancy Index (GDI) across datasets",
x = "Genomic Discrepancy Index (GDI)",
y = "Density"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, face = "italic"),
strip.text = element_text(size = 10),
legend.position = "none"
)
print(density_plot)
# Save to PDF
pdf("Results/species/taxus/GDI/RDA/figures/GDI_density_faceted.pdf")
print(density_plot)
dev.off()
## png
## 2
list_set <- c("random_same_AF","LC","MC","all_outliers")
# Combine all GDI values into one dataframe
combined_data <- data.frame()
for (set in list_set) {
GDI_df <- get(paste0("GDI_index_", set))
colnames(GDI_df)[2] <- "GDI" # Ensure second column is named GDI
GDI_df$Set <- set
combined_data <- rbind(combined_data, GDI_df[, c("GDI", "Set")])
}
# Plot: histogram of GDI values, faceted by set
plot <- ggplot(combined_data, aes(x = GDI)) +
geom_histogram(aes(fill = Set), binwidth = 0.005, color = "black", alpha = 0.8) +
scale_fill_manual(values = c("darkred", "#E69F00", "#56B4E9", "darkgreen", "#009E73", "#CC79A7")) +
#geom_vline(xintercept = 1, color = "red", linetype = "dashed", size = 1) + # Adjust or remove if not relevant
facet_wrap(~ Set, scales = "free_y", ncol = 2) +
labs(
title = "Distribution of Genomic Discrepancy Index (GDI) across datasets",
x = "Genomic Discrepancy Index (GDI)",
y = "Number of Populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, face = "italic"),
strip.text = element_text(size = 10),
legend.position = "none"
)
print(plot)
# #Save
pdf("Results/species/taxus/GDI/RDA/figures/GDI_distribution_histogram_faceted.pdf")
print(plot)
dev.off()
## png
## 2
# Prepare an empty data frame to collect means
mean_GDI_df <- data.frame(Set = character(), Mean_GDI = numeric(), stringsAsFactors = FALSE)
for (set in list_set) {
GDI_df <- get(paste0("GDI_index_", set))
colnames(GDI_df)[2] <- "GDI"
mean_val <- mean(GDI_df$GDI, na.rm = TRUE)
mean_GDI_df <- rbind(mean_GDI_df, data.frame(Set = set, Mean_GDI = mean_val))
}
# Plot
barplot <- ggplot(mean_GDI_df, aes(x = Set, y = Mean_GDI, fill = Set)) +
geom_bar(stat = "identity", color = "black") +
ylab("Mean Genomic Discrepancy Index") +
xlab("Set") +
ggtitle("Mean GDI per Dataset") +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "italic"),
legend.position = "none"
)
print(barplot)
# Save plot
ggsave(
filename = "Results/species/taxus/GDI/RDA/figures/Mean_GDI_barplot_across_sets.pdf",
plot = barplot, width = 8, height = 6
)
#df GDI and elevation
df_GDI_cor <- data.frame(GDI_random_same_AF=GDI_index_random_same_AF$values_random_same_AF,GDI_LC=GDI_index_LC$values_LC,GDI_MC=GDI_index_MC$values_MC,GDI_all_outliers=GDI_index_all_outliers$values_all_outliers,elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.),Latitude=as.numeric(meta_data_pop_order$Latitude))
#correlation
correlation_GDI <- cor(df_GDI_cor)
corrplot(correlation_GDI, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
pdf("Results/species/taxus/GDI/RDA/figures/comparison/correlation_GDI_values_RDA_taxus.pdf");corrplot(correlation_GDI, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
## png
## 2
Genomic offset was not used in the study entitled Demographic history shapes forest tree vulnerability to climate change but it was used in the study Genetic signatures of population marginality in four European conifers.
In the RDA framework, genomic offset is defined as the Euclidean distance between future and reference genomic compositions (or other periods). To calculate this distance, the genomic composition is estimated along the first two RDA axes of the model, and the values for each population are weighted according to the proportion of genetic variation explained by these axes.
We adapted the function from Capblancq and Forester (2021) so that our analysis focuses solely on calculating genomic offset for sampled populations, without interpolating or extrapolating across unsampled areas.
We modified the function used in Capblancq and Forester (2021) to calculate the genomic offset exclusively for our sampled points.
genomic_offset_pop <- function(RDA, K, Past_score, Future_score,meta_data){
# Weights based on axis eigen values
weights <- RDA$CCA$eig/sum(RDA$CCA$eig)
# Weighing the current and future adaptive indices based on the eigen values of the associated axes
Past_score_df <- Past_score[,-c(1)]
Proj_offset_past <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Past_score_df[,x]*weights[x])))
Future_score_df <- Future_score[,-c(1)]
Proj_offset_fut <- as.data.frame(do.call(cbind, lapply(1:K, function(x) Future_score_df[,x]*weights[x])))
#Now we want to calculate the distance between present and future for each RDA axis before doing it for both axis simultaneously
Proj_offset <- list()
for(i in 1:K){
Proj_offset[[i]] <- abs(Proj_offset_past[[i]] - Proj_offset_fut[[i]])
names(Proj_offset)[i] <- paste0("RDA", as.character(i))
}
# Predict a global genetic offset, incorporating the K first axes weighted by their eigen values
ras <- Proj_offset[[1]] #we reused the format of the previous distance per RDA axis
ras[!is.na(ras)] <- unlist(lapply(1:nrow(Proj_offset_past), function(x) dist(rbind(Proj_offset_past[x,], Proj_offset_fut[x,]), method = "euclidean"))) #calculation of the euclidean distance on the non Na values of the previous distance -> that why we used the format of the previous distance, to be sure to only select the rows without Nas because they are not deal by euclidean distance,
#the euclidean distance is still calculated on the weighted data (not the previous distance but on the genomic composition weighted)
names(ras) <- "Global_offset"
Proj_offset_global <- ras
# Return prediction of genetic offset for each RDA axis and a global genetic offset for each population
return(list(Population=meta_data$Population,Proj_offset = Proj_offset, Proj_offset_global = Proj_offset_global, weights = weights[1:K]))
}
We can run the GO function for all the set of genetic markers for present and future climate:
list_RDA <- c("RDA_random_same_AF", "RDA_outliers_LC", "RDA_outliers_MC","RDA_all_outliers")
list_past_score <- c("adaptive_values_random_same_AF_past", "adaptive_values_LC_past", "adaptive_values_MC_past","adaptive_values_all_outliers_past")
list_present_score <- c("adaptive_values_random_same_AF_present", "adaptive_values_LC_present", "adaptive_values_MC_present","adaptive_values_all_outliers_present")
list_name_present <- c("random_same_AF_present", "LC_present", "MC_present","all_outliers_present")
list_name_future <- c("random_same_AF_future", "LC_future", "MC_future","all_outliers_future")
# Loop through each RDA model
for (i in 1:length(list_RDA)) {
# Get the RDA model, past score, present score, and future score
RDA <- get(list_RDA[i])
adaptive_past_score <- get(list_past_score[i])
present_score <- get(list_present_score[i])
# Calculate genomic offset for the present scenario
name_present <- list_name_present[i]
Run_genomic_offset_pop_present <- genomic_offset_pop(RDA, 2, adaptive_past_score, present_score, meta_data_pop_order)
# Create a data frame for the present scenario genomic offset
genomic_offset_df_present <- data.frame(Population = unlist(Run_genomic_offset_pop_present$Population),
Genomic_offset_random = unlist(Run_genomic_offset_pop_present$Proj_offset_global))
# Order the data frame by population and rename columns
Genomic_offset_Taxus_Adapcon_Gentree_RDA_present <- genomic_offset_df_present[order(genomic_offset_df_present$Population), ]
colnames(Genomic_offset_Taxus_Adapcon_Gentree_RDA_present) <- c("Population", paste0("GO_", name_present))
# Assign the present result to a new variable
assign(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_", name_present), Genomic_offset_Taxus_Adapcon_Gentree_RDA_present)
# Calculate genomic offset for the future scenario
for (j in 1:length(list_models_clim)) { # Loop through 5 future models
name_clim <- list_models_clim[j]
name_future <- paste0(list_name_future[i],"_", name_clim)
future_score <- get(paste0("adaptive_values_",list_name_future[i],"_", name_clim))
Run_genomic_offset_pop_future <- genomic_offset_pop(RDA, 2, adaptive_past_score, future_score, meta_data_pop_order)
# Create a data frame for the future scenario genomic offset
genomic_offset_df_future <- data.frame(Population = unlist(Run_genomic_offset_pop_future$Population),
Genomic_offset_random = unlist(Run_genomic_offset_pop_future$Proj_offset_global))
# Order the data frame by population and rename columns
Genomic_offset_Taxus_Adapcon_Gentree_RDA_future <- genomic_offset_df_future[order(genomic_offset_df_future$Population), ]
colnames(Genomic_offset_Taxus_Adapcon_Gentree_RDA_future) <- c("Population", paste0("GO_", name_future))
# Assign the future result to a new variable
assign(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_",name_future), Genomic_offset_Taxus_Adapcon_Gentree_RDA_future)
}
}
We can save the genomic offset values for downstream analysis.
list_set <- c("random_same_AF","LC","MC","all_outliers")
for(x in 1:length(list_set)){
set <- list_set[x]
df_tot_GO_predictions_clim_models_future <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_future_GFDL_ESM4[,1],GFDL_ESM4=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_GFDL_ESM4"))[,2],IPSL_CM6A_LR=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_IPSL_CM6A_LR"))[,2],MPI_ESM1_2_HR=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_MPI_ESM1_2_HR"))[,2],MRI_ESM2_0=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_MRI_ESM2_0"))[,2],UKESM1_0_LL=get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_future_UKESM1_0_LL"))[,2])#elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.
correlation_future <- cor(df_tot_GO_predictions_clim_models_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GCMs/correlation_future_predictions_GCMs_",set,".pdf"));corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
}
list_models_clim<- c("GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
for(x in 1:length(list_set)){
set <- list_set[x]
all_GO <- data.frame()
for(i in 1:length(list_models_clim)){
model <- list_models_clim[i]
GO_data <- get(paste0("Genomic_offset_Taxus_Adapcon_Gentree_RDA_", set, "_future_", model))
colnames(GO_data) <- c("Population","GO")
all_GO <- rbind(all_GO,GO_data)
}
mean_GO <- aggregate(GO ~ Population, data = all_GO, FUN = mean, na.rm = TRUE)
colnames(mean_GO) <- c("Population", "Mean_GO")
assign(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean"),mean_GO)
#save
file_save<- get(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean"))
name <- paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_mean")
save(list = name,file=paste0("Results/species/taxus/Genomic_offset/RDA/data/",name,".Rdata"))
}
list_models_clim<- c("mean","GFDL_ESM4","IPSL_CM6A_LR","MPI_ESM1_2_HR","MRI_ESM2_0","UKESM1_0_LL")
# List of SNP sets
list_sets <- c("random_same_AF","LC","MC","all_outliers")
# Initialize an empty list to store data for all SNP sets
plot_data <- list()
# Load the RColorBrewer package for dynamic color palettes
library(RColorBrewer)
color_palette <- c("#FF69B4","red","blue","orange","black","green")
# Initialize a vector to keep track of top populations across all GCMs
meta_data_pop$pop_abbrev <- c("VIS","BAU","SDF","PAT","CHO","VOU","OLY","ANC","PDT","UMB","VAL","BRA","HAR","GOR","BOM","UNS","BUJA","BUJG","CAR","RAS","SUE","SAV","BLI","HHR","JUR","WAL","CED","RWM","YEW")
meta_data_f <- meta_data_pop[order(meta_data_pop$Population),]
# Loop over each SNP set
for (x in 1:length(list_sets)) {
set <- list_sets[x]
# Initialize an empty data frame for all models in the current SNP set
ranks_df <- data.frame()
all_top_populations <- character(0)
# Loop over each climatic model
for (i in 1:length(list_models_clim)) {
model <- list_models_clim[i]
# Get the genomic offset data for the current SNP set and model
GO_data <- get(paste0("GO_T_Adapcon_Gentree_RDA_", set, "_future_", model))
colnames(GO_data) <- c("Population", "GO")
GO_data_V2 <- merge(GO_data,meta_data_f[,c(1,2,7)],"Population")
# Rank the populations by genomic offset (highest GO gets rank 1)
GO_data_V2$Rank <- rank(-GO_data_V2$GO, ties.method = "first")
# Add columns for the climatic model and SNP set
GO_data_V2$Model <- model
GO_data_V2$Set <- set
# Append to ranks dataframe
ranks_df <- rbind(ranks_df, GO_data_V2)
# Find the top 1 population for the current model (highest GO = rank 1)
top_1_population <- GO_data_V2 %>% top_n(1, GO) %>% pull(Population)
# Add this top 1 population to the list of all top populations (avoid duplicates)
all_top_populations <- unique(c(all_top_populations, top_1_population))
}
# Convert 'Model' and 'Set' to factors for plotting
ranks_df$Model <- factor(ranks_df$Model, levels = list_models_clim)
ranks_df$Set <- factor(ranks_df$Set, levels = list_sets)
num_top_populations <- length(all_top_populations)
if (num_top_populations > length(color_palette)) {
color_palette <- colorRampPalette(brewer.pal(9, "Set1"))(num_top_populations)
}
# Assign a unique color to each of the top populations (those ranked 1 in any model)
population_colors <- setNames(color_palette[1:num_top_populations], all_top_populations)
# Create a new column to store color assignments (highlight only rank 1 populations for each GCM)
ranks_df$Highlight <- ifelse(ranks_df$Population %in% all_top_populations, ranks_df$Population, "Other")
# Map colors for top populations and grey for the rest
ranks_df$Color <- ifelse(ranks_df$Highlight == "Other", "grey", population_colors[ranks_df$Highlight])
# Store the data frame for this SNP set in the plot_data list
plot_data[[set]] <- ranks_df
# Modify the plot generation code
p <- ggplot(ranks_df, aes(x = Model, y = Rank, group = Population)) +
geom_line(aes(color = Highlight), alpha = 0.7, size = 0.8) + # Color only top populations
geom_point(aes(color = Highlight), size = 2) +
geom_text(
data = ranks_df %>% filter(Model == list_models_clim[1]),
aes(label = meta_data_f$pop_abbrev[match(Population, meta_data_f$Population)]),
hjust = 1,
vjust = 0,
size = 3.8,
nudge_x = -0.25,
fontface= "italic"
) +
scale_y_reverse() + # Reverse rank (1 = top)
scale_color_manual(values = c(population_colors, "Other" = "grey")) + # Keep top populations colored, others grey
labs(
title = paste("RDA Rank Variability for SNP Set:", set),
x = "Climatic Models", y = "Population Rank"
) +
theme_minimal() +
theme(
legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
legend.title = element_text(size = 10),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 12)
)
print(p) # Display plot for this SNP set
#save
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GCMs/GO_rank_comparison_",set,".pdf"));print(p);dev.off()
}
We are going to use the mean of the GO predictions from the five GCMs as the future GO.
We can plot the genomic offset values for each population. As before, we can use the ‘cut_interval’ function to rescale the values so that they align with the colour scale used for other genomic offsets, such as GF.
list_period <- c("present","future")
list_set <- c("random_same_AF","LC","MC","all_outliers")
for(i in 1:length(list_period)){
period <- list_period[i]
for(x in 1: length(list_set)){
set <- list_set[x]
if(period == "future"){
for (j in 1:length(list_models_clim)) {
model_clim <- list_models_clim[j]
name_file <- paste0(set,"_",period,"_",model_clim)
GO_df <- get(paste0("GO_T_Adapcon_Gentree_RDA_",name_file))
#first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")
#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))
colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
plot <- ggplot(data = Genomic_offset_coord) +
geom_sf(data = admin, fill = gray(0.92), size = 0) +
geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
scale_fill_manual(
values = colors,
labels = c("low values","","","","high values"),
drop = FALSE,na.translate = FALSE)+ # Ensure all levels are shown in the legend
geom_sf(data = admin, fill = NA, size = 0.1) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
xlab("Longitude") + ylab("Latitude") +
guides(fill = guide_legend(title = "Genomic offset")) +
ggtitle(paste0("Genomic offset across populations ",set," ",period,"_",model_clim)) +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(plot)
#save
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/GO/GCMs_separate_and_mean/future/GO_RDA_standard_across_populations_",set,"_",period,"_",model_clim,".pdf"));print(plot);dev.off()
}
} else {
name_file <- paste0(set,"_",period)
GO_df <- get(paste0("GO_T_Adapcon_Gentree_RDA_",name_file))
#first, we need to add the coordinates
Genomic_offset_coord <- merge(GO_df,meta_data_pop_order[,c(2,4,5)],"Population")
#transform longitude and latitude to numeric variables
Genomic_offset_coord <- Genomic_offset_coord %>% mutate(Longitude=as.numeric(Longitude),Latitude=as.numeric(Latitude))
colors <- c("#005000","#85B22C","#FFFFBF","#F76D5E","#E51932")
#background map
admin <- ne_countries(scale = "medium", returnclass = "sf")
plot <- ggplot(data = Genomic_offset_coord) +
geom_sf(data = admin, fill = gray(0.92), size = 0) +
geom_point(aes(x = Longitude, y = Latitude, fill = cut_interval(Genomic_offset_coord[,2], n = 5)), shape = 21,size=3, color = "black") +
scale_fill_manual(
values = colors,
labels = c("low values","","","","high values"),
drop = FALSE,na.translate = FALSE)+ # Ensure all levels are shown in the legend
geom_sf(data = admin, fill = NA, size = 0.1) +
coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
xlab("Longitude") + ylab("Latitude") +
guides(fill = guide_legend(title = "Genomic offset")) +
ggtitle(paste0("Genomic offset across populations ",set," ",period)) +
theme_bw(base_size = 11) +
theme(legend.position="right", panel.grid = element_blank(), strip.text = element_text(size=11),plot.title = element_text(hjust = 0.5,color = "Black",face="italic"))
print(plot)
#save
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/GO/GCMs_separate_and_mean/present/GO_RDA_standard_across_populations_",set,"_",period,".pdf"));print(plot);dev.off()
}
}
}
#df
df_tot_GO_predictions_present <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,1],GO_rand_same_AF_pres=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,2],GO_LC_pres=GO_T_Adapcon_Gentree_RDA_LC_present[,2],GO_MC_pres=GO_T_Adapcon_Gentree_RDA_MC_present[,2],GO_all_outliers_pres=GO_T_Adapcon_Gentree_RDA_all_outliers_present[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))
correlation_present <- cor(df_tot_GO_predictions_present[,-1])
corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
df_tot_GO_predictions_future <- data.frame(Population=GO_T_Adapcon_Gentree_RDA_random_same_AF_present[,1],GO_rand_same_AF_Fut=GO_T_Adapcon_Gentree_RDA_random_same_AF_future_mean[,2],GO_LC_Fut=GO_T_Adapcon_Gentree_RDA_LC_future_mean[,2],GO_MC_Fut=GO_T_Adapcon_Gentree_RDA_MC_future_mean[,2],GO_all_outliers_Fut=GO_T_Adapcon_Gentree_RDA_all_outliers_future_mean[,2],elevation=as.numeric(meta_data_pop_order$Elevation.DEM_90m.))
correlation_future <- cor(df_tot_GO_predictions_future[,-1])
corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6)
pdf("Results/species/taxus/Genomic_offset/RDA/figures/comparison/correlation_GO_present_values_RDA_T_set_period.pdf");corrplot(correlation_present, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
## png
## 2
pdf("Results/species/taxus/Genomic_offset/RDA/figures/comparison/correlation_GO_future_values_RDA_T_set_period.pdf");corrplot(correlation_future, method = "number", addrect = 2, col = c("darkorange","darkred"), type = "lower", tl.col = "black", tl.cex = 0.6, number.cex = 0.6);dev.off()
## png
## 2
We can also examine the GO trend over time to see if it is consistent.
list_set <- c("random_same_AF","LC","MC","all_outliers")
for(i in 1:length(list_set)){
set <- list_set[i]
period_1 <- c("present")
period_2 <- c("future_mean")
df_pres <- get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_",period_1))
df_fut <- get(paste0("GO_T_Adapcon_Gentree_RDA_",set,"_",period_2))
#df
df_f_pres <- data.frame(Population = df_pres[,1], Values = df_pres[,2])
df_f_fut <- data.frame(Population = df_fut[,1], Values = df_fut[,2])
#add period
df_f_pres$Period <- "Present"
df_f_fut$Period <- "Future"
#combine them
df_GO <- rbind(df_f_fut,df_f_pres)
df_GO$Period <- factor(df_GO$Period, levels = c("Present", "Future"))
#plot
trend_plot <- ggplot(df_GO, aes(x = Period, y = Values, group = Population)) +
geom_line(aes(color = Population)) + # Line plot, connecting the points for each population
geom_point(aes(color = Population)) + # Adding points for each value
theme_minimal() +
labs(title = paste0("Genomic offset trend across present and future ",set),
x = "Period",
y = "Genomic offset prediction") +
theme(
legend.position = "none",
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
)
plot(trend_plot)
#save
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/GO_pres_future/Scatterplot_comparison_trend_GO_present_future_",set,"_Taxus.pdf"));print(trend_plot);dev.off()
}
While the values may be similar, we should consider that the ranking of populations could differ. We will investigate this further using graphical visualisation of the ranks.
list_period <- c("present","future_mean")
list_set <- c("random_same_AF","LC","MC","all_outliers")
for(i in 1:length(list_period)){
period <- list_period[i]
# Loop through each set and compare it with all subsequent sets
for(x in 1:(length(list_set)-1)){
for(y in (x+1):length(list_set)){
set1 <- list_set[x]
set2 <- list_set[y]
# Merge the two datasets
GO_RDA_merge <- merge(get(paste0("GO_T_Adapcon_Gentree_RDA_",set1,"_",period)),
get(paste0("GO_T_Adapcon_Gentree_RDA_",set2,"_",period)),
by = "Population")
# Create a dataframe for comparison
GO_RDA_set_df <- data.frame(Population = GO_RDA_merge$Population,
GO_RDA_1 = GO_RDA_merge[,2],
GO_RDA_2 = GO_RDA_merge[,3])
# Rank the values
GO_RDA_set_df$rank_1 <- rank(GO_RDA_set_df$GO_RDA_1)
GO_RDA_set_df$rank_2 <- rank(GO_RDA_set_df$GO_RDA_2)
# Merge with metadata to include country information
GO_RDA_set_df_meta <- merge(GO_RDA_set_df, meta_data_pop_order[,c(1,2)], by = "Population")
GO_RDA_set_df_meta$Country <- as.factor(GO_RDA_set_df_meta$Country)
# Create the scatter plot
Scatterplot <- ggplot(GO_RDA_set_df_meta, aes(x = rank_1, y = rank_2)) +
geom_point(aes(color = Country), size = 3) +
scale_colour_manual(name = "Countries",
values = c("orangered3","gold2","darkorchid3","navyblue","turquoise2","green3","blue","red","black","gray","orange","darkgreen")) +
geom_abline(intercept = 0, slope = 1, color = "gray60") +
ggtitle(paste0("Comparison GO rank of populations RDA ", set1, "/", set2, " ", period)) +
theme_bw()
# Print the plot
print(Scatterplot)
# Save the plot as a PDF
pdf(paste0("Results/species/taxus/Genomic_offset/RDA/figures/comparison/Scatterplot_comparison_rank_pop_",set1,"_",set2,"_",period,"_Taxus.pdf"));print(Scatterplot);dev.off()
}
}
}
Session info
packages <- c(
"vegan", "dplyr", "robust", "qvalue",
"rgdal", "sf","terra","ggplot2","radiant.data",
"textshape","rnaturalearth","scales","raster","corrplot",
"writexl","ggspatial"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
Package = names(packages_used),
Version = sapply(packages_used, function(x) x$Version)
)
## Package Version
## vegan vegan 2.6-4
## dplyr dplyr 1.1.4
## robust robust 0.7-4
## qvalue qvalue 2.34.0
## rgdal rgdal 1.6-7
## sf sf 1.0-15
## terra terra 1.7-71
## ggplot2 ggplot2 3.5.2
## radiant.data radiant.data 1.6.3
## textshape textshape 1.7.5
## rnaturalearth rnaturalearth 1.0.1
## scales scales 1.3.0
## raster raster 3.6-26
## corrplot corrplot 0.92
## writexl writexl 1.5.0
## ggspatial ggspatial 1.1.9