rm(list = ls())
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
cache = FALSE
)
This script will analyse the relationship between historical gene flow and genetic metrics of vulnerability to climate change across all species. Historical gene flow was approximated using the population-specific divergence index from BayeScan, and the three genetic indicators are genetic diversity, genetic load, and the genomic discrepancy index.
#Taxus
meta_data_vcf_Taxus <- read.csv("Data/Species/Taxus_baccata/Samples/samples_taxus_baccata_adapcon_gentree.csv",h=T,sep=";",dec=",")
meta_data_pop_Taxus <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
names(meta_data_pop_Taxus)[names(meta_data_pop_Taxus) == "Elevation.DEM_90m."] <- "Altitude"
#Pinea
load("Data/Species/Pinus_pineaV4/Individuals/meta_data_vcf.Rdata")
meta_data_vcf_Pinea <- data.frame(meta_data_vcf[,c(1,3)],Population=meta_data_vcf$Population_ID)
load("Data/Species/Pinus_pineaV4/Population/meta_data_pop.Rdata")
meta_data_pop_Pinea <- data.frame(Country=meta_data_pop[,c(7)],Population=meta_data_pop$Population_ID,N=meta_data_pop[,c(2)],meta_data_pop[,c(4,5,6)])
#Pinaster
load("Data/Species/Pinus_pinaster_draft/Individual/meta_data_vcf_final.Rdata")
meta_data_vcf_Pinaster <- data.frame(VCF_ID=meta_data_vcf_final[,c(1)],Population=meta_data_vcf_final$Population_ID)
load("Data/Species/Pinus_pinaster_draft/Population/meta_data_pop_final.Rdata")
meta_data_pop_Pinaster <- data.frame(Country=meta_data_pop_final[,c(3)],Population=meta_data_pop_final$Population_ID,N="Na",meta_data_pop_final[,c(4,5,6)])
#Sylvestris
meta_data_vcf_Sylvestris <- read.csv("Data/Species/Pinus_sylvestris/individual/Psyl_individuals.csv",h=T,sep=";",dec=",")
meta_data_vcf_Sylvestris <- data.frame(meta_data_vcf_Sylvestris[,c(1,3,2)])
load("Data/Species/Pinus_sylvestris/population/meta_data_pop_retained_order.Rdata")
meta_data_pop_Sylvestris <- data.frame(meta_data_pop_retained_order[,c(2,1)],N="Na",meta_data_pop_retained_order[,c(3,4)],Altitude="Na")
#Ash
load("Data/Species/Fraxinus_excelsior/Sample/meta_data_vcf_final_418_structure.Rdata")
meta_data_vcf_final_418_structure_format <- meta_data_vcf_final_418_structure
meta_data_vcf_final_418_structure_format$VCF_ID <- gsub("\\.", "-", meta_data_vcf_final_418_structure$VCF_ID)
meta_data_vcf_Ash <- data.frame(meta_data_vcf_final_418_structure_format[,c(1,3,2)])
meta_data_vcf_Ash_format <- data.frame(meta_data_vcf_final_418_structure[,c(1,3,2)])
load("Data/Species/Fraxinus_excelsior/Population/meta_data_pop_final_31_structure.Rdata")
meta_data_pop_Ash <- data.frame(meta_data_pop_final_31_structure[,c(3,2)],N="Na",meta_data_pop_final_31_structure[,c(4,5)],Altitude="Na")
#fagus forgenius
load("Data/Species/Fagus_sylvatica_forgenius/Sample/meta_data_vcf_final.Rdata")
meta_data_vcf_final$Country <- "NA"
meta_data_vcf_Fagus_forg <- data.frame(meta_data_vcf_final[,c(2,3,1)])
load("Data/Species/Fagus_sylvatica_forgenius/Population/meta_data_pop_ordered.Rdata")
meta_data_pop_Fagus_forg <- data.frame(meta_data_pop_ordered[,c(5,1)],N="Na",meta_data_pop_ordered[,c(3,2)],Altitude=meta_data_pop_ordered$ALTI)
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
list_species_name <- c("taxus","pinea","pinaster","sylvestris","Ash","Fagus_forg")
for( x in 1:length(list_species)){
species <- list_species[x]
species_name <- list_species_name[x]
data <- read.table(paste0("Results/all_species/FST_index/Data/Population_Structure/data/",species_name,"/pop_order_bayescan_",species_name,".txt"),h=T)
assign(paste0("popname_",species),data)
}
list_species <- c("Taxus_bis","PineaV4_bis","Pinaster_bis","Sylvestris","Ash","Fagus_forg")#,
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")#,
for(x in 1:length(list_species)){
species <- list_species[x]
species_name <- list_species_name[x]
pop_name <- get(paste0("popname_",species_name))
pop_name$BAYESCAN_POP <- paste0("Fst", pop_name$BAYESCAN_POP)
df_bayecan <- read.table(paste0("Results/all_species/FST_index/Data/Population_Structure/results/Bayescan/FST_bayescan_",species,".txt"),h=T)
df_bayesan_FST_mean <- data.frame(colMeans(df_bayecan[,-1]))
df_bayesan_FST_mean$BAYESCAN_POP <- rownames(df_bayesan_FST_mean)
df_bayesan_FST_mean <- data.frame(BAYESCAN_POP=df_bayesan_FST_mean$BAYESCAN_POP,FST=df_bayesan_FST_mean$colMeans.df_bayecan....1..)
df_bayesan_FST_mean_merge <- merge(pop_name,df_bayesan_FST_mean,"BAYESCAN_POP")
#we can save to compare to hierfstat at the population level
Bayescan_FST_pop <- data.frame(Population=df_bayesan_FST_mean_merge$STRATA,Fst=df_bayesan_FST_mean_merge$FST)
Bayescan_FST_pop_order <- Bayescan_FST_pop[order(Bayescan_FST_pop$Population), ]
varname <- paste0("Bayescan_FST_pop_order_", species_name)
# Assign the object
assign(varname, Bayescan_FST_pop_order)
if(species =="Sylvestris"){
Bayescan_FST_pop_order_Sylvestris <- Bayescan_FST_pop_order_Sylvestris %>%
filter(Population != "CATA")
} else {
# Save the dynamically named object to an .Rdata file
save(list = varname, file = paste0("Results/all_species/FST_index/", varname, ".Rdata"))
}
}
combined_data <- data.frame()
# Loop to process each species and combine data
for (x in 1:length(list_species_name)) {
species <- list_species_name[x]
data <- get(paste0("Bayescan_FST_pop_order_", species))
data$Species <- species # Add species name column
combined_data <- rbind(combined_data, data)
# Plot per species (optional, keeps your original individual plots)
plot <- ggplot() +
geom_density(data = data, aes(x = Fst), fill = "lightblue", alpha = 0.8) +
labs(
title = paste0("Distribution of Population Specific Divergence Values\nacross populations of ", species),
x = "Population Specific Divergence index",
y = "Density (number of populations)"
) +
theme_bw() +
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5)
)
print(plot)
# Save to PDF
pdf(paste0("Results/all_species/FST_index/figures/Bayescan_PSD_", species, ".pdf"))
print(plot)
dev.off()
}
# Combined density plot
combined_plot <- ggplot(combined_data, aes(x = Fst, fill = Species)) +
geom_density(alpha = 0.6) +
labs(
title = "Distribution of Population Specific Divergence Values\nacross species",
x = "Population Specific Divergence index",
y = "Density (number of populations)"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_brewer(palette = "Set1") # Or use manual colors with scale_fill_manual
# Display combined plot
print(combined_plot)
# Save combined plot
pdf("Results/all_species/FST_index/figures/Bayescan_PSD_combined_all_species.pdf")
print(combined_plot)
dev.off()
## png
## 2
for(x in 1:length(list_species_name)){
species <- list_species_name[x]
data <- get(paste0("Bayescan_FST_pop_order_", species))
write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}
x_breaks <- seq(0, 1, by = 0.3)
x_limits <- c(0, 1)
combined_data$Species <- factor(
combined_data$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)
combined_plot <- ggplot(combined_data, aes(x = Fst)) +
geom_histogram(binwidth = 0.01, color = "black", alpha = 0.8, aes(fill = Species)) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","#9467BD","orange","yellow")) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) +
facet_wrap(~ Species, scales = "free_y", ncol = 2) +
labs(
title = "Distribution of Population Specific Divergence Values across Species",
x = "Population Specific Divergence index",
y = "Count of populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(size = 10)
)
# Show facetted plot
print(combined_plot)
ggsave(
filename = "Results/all_species/FST_index/figures/Bayescan_PSD_barplot_all_species_withfagus.pdf",
plot = combined_plot, width = 10, height = 7
)
Paper size figures
x_breaks <- seq(0, 1, by = 0.3)
x_limits <- c(0, 1)
species_colors <- c(
"Taxus" = "darkred",
"Pinea" = "darkgreen",
"Pinaster" = "#56B4E9",
"Sylvestris" = "#A0522D",
"Ash" = "orange",
"Fagus_forg" = "#9467BD")
# Loop through species and create individual plots
for (sp in levels(combined_data$Species)) {
species_data <- subset(combined_data, Species == sp)
individual_plot <- ggplot(species_data, aes(x = Fst)) +
geom_histogram(binwidth = 0.01, color = "black", fill = species_colors[[sp]], alpha = 0.8) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) +
labs(
x = expression(italic(F)[ST]),
y = "Populations"
) +
theme_bw() +
theme(
axis.title.x = element_text(size = 25),
axis.title.y = element_text(size = 25),
axis.text.x = element_text(size = 24),
axis.text.y = element_text(size = 24)
)
print(individual_plot)
# Save each plot
ggsave(
filename = paste0(
"Results/all_species/FST_index/figures/",
"Bayescan_PSD_barplot_", sp, ".pdf"
),
plot = individual_plot, width = 6, height = 5
)
}
x_breaks <- seq(0, 1, by = 1)
x_limits <- c(0, 1)
species_colors <- c(
"Taxus" = "darkred",
"Pinea" = "darkgreen",
"Pinaster" = "#56B4E9",
"Sylvestris" = "#A0522D",
"Ash" = "orange",
"Fagus_forg" = "#9467BD")
# Loop through species and create individual plots
for (sp in levels(combined_data$Species)) {
species_data <- subset(combined_data, Species == sp)
individual_plot <- ggplot(species_data, aes(x = Fst)) +
geom_density(data = species_data, aes(x = Fst), fill = species_colors[[sp]], alpha = 0.8) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) +
labs(
x = expression(italic(F)[ST])
) +
theme_bw() +
theme(
axis.title.x = element_text(size = 28),
axis.text.x = element_text(size = 28),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.y=element_blank()
)
print(individual_plot)
# Save each plot
ggsave(
filename = paste0(
"Results/all_species/FST_index/figures/",
"Bayescan_PSD_boxplot_new_", sp, ".pdf"
),
plot = individual_plot, width = 6, height = 5
)
}
Good figure for Taxus, Pinea, Sylvestris and Fagus
list_species_all <- c("Taxus","Pinea","Pinaster","Sylvestris","Fagus_forg","Ash")
list_color <- c("#FFCCCC","#b6ddb6","#ADD8E6","#DEB887","#e8defa","#FFE5B4")
list_point_size <- c(1.5,0.85,1.9,0.6,1.4,1.15)
for(x in seq_along(list_species_all)) {
species <- list_species_all[x]
shapefile_color <- list_color[x]
meta_data <- get(paste0("meta_data_pop_", species))
fst_data <- subset(combined_data, Species == species)
fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
fst_proj <- st_transform(fst_sf, crs = 3035)
coords <- st_coordinates(fst_proj)
fst_data_coord$x <- coords[, 1]
fst_data_coord$y <- coords[, 2]
shapefile <- get(paste0("shapefile_", species))
admin <- ne_countries(scale = "medium", returnclass = "sf")
admin_proj <- st_transform(admin, crs = 3035)
lab_no_trailing_zeros <- function(x) {
sub("\\.?0+$", "", sprintf("%.2f", x))
}
# Plot
map_pop <- ggplot() +
geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
geom_point( data = fst_data_coord, aes(x = x, y = y, fill = Fst),
shape = 21,
color = "black",
size = 3.3,
alpha = 1 ) +
scale_fill_gradient(
low = "#f7f5f5",
high = "#E51932",
labels = lab_no_trailing_zeros,
breaks = scales::pretty_breaks(5)
)+ # Gradient based on Fst
scale_size_continuous(range = c(1, 5)) +
coord_sf(
crs = 3035,
xlim = c(2379349, 7002838),
ylim = c(1029721, 4519987),
expand = FALSE ) +
labs( title = paste("Genetic Structure for", species),
x = "Longitude", y = "Latitude",
fill = expression(italic(F)[ST]) ) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "azure", color = NA),
plot.title = element_text(face = "bold", hjust = 0.5) )
print(map_pop)
pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_",species,".pdf"))
print(map_pop)
dev.off()
}
Good figure for Ash and Pinaster
list_species_all <- c("Taxus","Pinea","Pinaster","Sylvestris","Fagus_forg","Ash")
list_color <- c("#FFCCCC","#d9f2d9","#ADD8E6","#DEB887","#e8defa","#FFE5B4")
list_point_size <- c(1.5,0.85,1.9,0.6,1.4,1.15)
for(x in seq_along(list_species_all)) {
species <- list_species_all[x]
shapefile_color <- list_color[x]
# Load metadata and FST data
meta_data <- get(paste0("meta_data_pop_", species))
fst_data <- subset(combined_data, Species == species)
# Merge FST with coordinates
fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
# Convert to sf object and project
fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
fst_proj <- st_transform(fst_sf, crs = 3035)
coords <- st_coordinates(fst_proj)
fst_data_coord$x <- coords[,1]
fst_data_coord$y <- coords[,2]
min_val <- min(fst_data_coord$Fst, na.rm = TRUE)
max_val <- max(fst_data_coord$Fst, na.rm = TRUE)
# Compute raw step
raw_step <- (max_val - min_val)/4
# Choose a "nice" step from predefined options
nice_steps <- c(0.01, 0.02, 0.05, 0.1, 0.2, 0.5)
step <- nice_steps[which.min(abs(nice_steps - raw_step))]
# Compute breaks
break_start <- floor(min_val/step)*step
break_end <- ceiling(max_val/step)*step
breaks_nice <- seq(break_start, break_end, by = step)
# Force exactly 5 breaks (4 classes)
if(length(breaks_nice) < 5){
breaks_nice <- seq(break_start, break_end, length.out = 5)
}
if(length(breaks_nice) > 5){
breaks_nice <- breaks_nice[seq(1, length(breaks_nice), length.out = 5)]
}
# Create factor for reference
fst_data_coord$Fst_class <- cut(fst_data_coord$Fst,
breaks = breaks_nice,
include.lowest = TRUE,
right = TRUE)
# Load shapefile and admin boundaries
shapefile <- get(paste0("shapefile_", species))
admin <- ne_countries(scale = "medium", returnclass = "sf")
admin_proj <- st_transform(admin, crs = 3035)
map_pop <- ggplot() +
geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
geom_point(
data = fst_data_coord,
aes(x = x, y = y, fill = Fst), # Use actual FST for gradient
shape = 21,
color = "black",
size = 3.3,
alpha = 1
) +
scale_fill_gradient(
low = "#f7f5f5",
high = "#E51932",
breaks = breaks_nice # Show exactly 4 intervals in legend
) +
scale_size_continuous(range = c(1, 5)) +
coord_sf(
crs = 3035,
xlim = c(2379349, 7002838),
ylim = c(1029721, 4519987),
expand = FALSE
) +
labs(
title = paste("Genetic Structure for", species),
x = "Longitude",
y = "Latitude",
fill = expression(italic(F)[ST])
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "azure", color = NA),
plot.title = element_text(face = "bold", hjust = 0.5)
)
print(map_pop)
pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_good_Ash_Pinaster_",species,".pdf"))
print(map_pop)
dev.off()
}
Plot for Sylvestris with all the populations
list_species_all <- c("Sylvestris")
list_color <- c("#DEB887")
for(x in seq_along(list_species_all)) {
species <- list_species_all[x]
shapefile_color <- list_color[x]
meta_data <- get(paste0("meta_data_pop_", species))
fst_data <- subset(combined_data, Species == species)
fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
fst_proj <- st_transform(fst_sf, crs = 3035)
coords <- st_coordinates(fst_proj)
fst_data_coord$x <- coords[, 1]
fst_data_coord$y <- coords[, 2]
shapefile <- get(paste0("shapefile_", species))
admin <- ne_countries(scale = "medium", returnclass = "sf")
admin_proj <- st_transform(admin, crs = 3035)
# Plot
map_pop <- ggplot() +
geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
geom_point(
data = fst_data_coord,
aes(x = x, y = y, fill = Fst),
shape = 21,
color = "black",
size = 3.3,
alpha = 1
) +
scale_fill_gradient(low = "#f7f5f5", high = "#E51932") +
scale_size_continuous(range = c(1, 5)) +
coord_sf(
crs = 3035,
xlim = c(2379349, 9000000),
ylim = c(1029721, 9000000),
expand = FALSE
) +
labs(
title = paste("Genetic Structure for", species),
x = "Longitude", y = "Latitude",
fill = expression(italic(F)[ST])
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "azure", color = NA),
plot.title = element_text(face = "bold", hjust = 0.5)
)
print(map_pop)
pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_",species,"_supplementary_russian_pop.pdf"))
print(map_pop)
dev.off()
}
list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
for(x in 1:length(list_species)){
name_species <- list_species[x]
meta_data <- get(paste0("meta_data_pop_",name_species))
data <- read.csv(paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=",",dec=",",h=T)
data <- data[, -1]
data$Hs <- as.numeric(data$Hs)
data_full <- merge(data,meta_data[,c(2,4,5)],"Population")
assign(paste0("Genetic_diversity_",name_species),data_full)
}
for(x in 1:length(list_species_name)){
species <- list_species_name[x]
data <- get(paste0("Genetic_diversity_", species))
write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}
combined_data <- data.frame()
for (x in 1:length(list_species)) {
species <- list_species[x]
data <- get(paste0("Genetic_diversity_", species))
data$Species <- species
combined_data <- rbind(combined_data, data)
}
x_breaks <- seq(0, 0.5, by = 0.05)
x_limits <- c(0, 0.5)
for (species in list_species) {
data <- subset(combined_data, Species == species)
plot <- ggplot(data, aes(x = Hs)) +
geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) + # fixed x-axis
# No y limit — y axis is free
labs(
title = paste0("Distribution of Hs values \nacross populations of ", species),
x = "Hs genetic diversity index",
y = "Count of populations"
) +
theme_bw() +
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5)
)
print(plot)
# Save to PDF
pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_", species, ".pdf"))
print(plot)
dev.off()
}
combined_data$Species <- factor(
combined_data$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)
# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = Hs)) +
geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","#9467BD","orange","yellow")) +#,"darkviolet"
scale_x_continuous(limits = c(0, 0.35), breaks = seq(0, 0.35, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
facet_wrap(~ Species, scales = "free_y", ncol = 2) + # Two columns: top = taxus & pinea; bottom = pinaster & sylvestris
labs(
title = "Distribution of Hs values \nacross populations all species ",
x = "Hs genetic diversity index",
y = "Count of populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(size = 10)
)
# Show the plot
print(combined_plot)
pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_all_species_", species, ".pdf"))
print(combined_plot)
dev.off()
## png
## 2
combined_data_v1 <- combined_data
combined_data_v1$Species <- factor(
combined_data_v1$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Ash", "Fagus_forg","Sylvestris"),
labels = c("T. baccata", "P. pinea", "P. pinaster", "F. excelsior", "F. sylvatica", "P. sylvestris")
)
# Boxplot with jittered points overlaid
boxplot_jitter_plot <- ggplot(combined_data_v1, aes(x = Species, y = Hs, fill = Species)) +
geom_jitter(aes(color = Species), width = 0.2, alpha = 0.6, size = 1.5) +
geom_boxplot(outlier.shape = NA, alpha = 0.6) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9","orange","#9467BD","#A0522D")) +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9","orange","#9467BD","#A0522D")) +
scale_x_discrete(labels = c(
"T. baccata" = expression(italic("T. baccata")),
"P. pinea" = expression(italic("P. pinea")),
"P. pinaster" = expression(italic("P. pinaster")),
"F. excelsior" = expression(italic("F. excelsior")),
"F. sylvatica" = expression(italic("F. sylvatica")),
"P. sylvestris" = expression(italic("P. sylvestris"))
))+
labs(
title = "Boxplot of Hs values with jittered points by species",
x = "Species",
y = "Hs genetic diversity index"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1)
)
# Display plot
print(boxplot_jitter_plot)
# Save to PDF
pdf("Results/all_species/Genetic_diversity/figures/Hs_boxplot_jitter_all_species.pdf", width = 8, height = 6)
print(boxplot_jitter_plot)
dev.off()
## png
## 2
Cumulative distribution functions genetic diversity
combined_data_bis <- combined_data
combined_data_bis$Species <- factor(
combined_data_bis$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Ash", "Fagus_forg","Sylvestris"),
labels = c("T. baccata", "P. pinea", "P. pinaster", "F. excelsior", "F. sylvatica", "P. sylvestris")
)
species_labels <- c(
"T. baccata" = "italic(T.~baccata)",
"P. pinea" = "italic(P.~pinea)",
"P. pinaster" = "italic(P.~pinaster)",
"F. excelsior" = "italic(F.~excelsior)",
"F. sylvatica" = "italic(F.~sylvatica)",
"P. sylvestris" = "italic(P.~sylvestris)"
)
cum_plot <- ggplot(combined_data_bis, aes(x = scale(Hs), colour = Species)) +
stat_ecdf(
aes(y = after_stat(y * 100)),
geom = "line",
linewidth = 1.15
) +
scale_color_manual(
values = c(
"darkred", "darkgreen", "#56B4E9",
"orange", "#9467BD", "#A0522D"
),
labels = parse(text = species_labels)
) +
scale_y_continuous(
limits = c(0, 100),
breaks = seq(0, 100, 20)
) +
labs(
x = "Scaled genetic diversity (Hs)",
y = "Cumulative percent",
colour = "Species"
) +
theme_bw() +
theme(
axis.title.x = element_text(size = 14),
axis.text.x = element_text(size = 13),
axis.title.y = element_text(size = 14),
axis.text.y = element_text(size = 13),
legend.title = element_text(size = 14),
legend.text = element_text(size = 13),
panel.grid.minor = element_blank(),
panel.grid.major = )
plot(cum_plot)
# Save to PDF
pdf("Results/all_species/Genetic_diversity/figures/Hs_cumulativeplot_all_species.pdf", width = 8, height = 6)
print(cum_plot)
dev.off()
## png
## 2
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
df_correlation_GD <- data.frame(
Species = character(),
Cor_Hs_PSD = numeric(),
pvalue_Hs_PSD = numeric(),
Cor_Hs_Longitude = numeric(),
pvalue_Hs_Longitude = numeric(),
Cor_Hs_Latitude = numeric(),
pvalue_Hs_Latitude = numeric()
)
for(x in 1:length(list_species)){
species <- list_species[x]
data_FST <- get(paste0("Bayescan_FST_pop_order_", species))
#GD
data_GD <- get(paste0("Genetic_diversity_",species))
data_tot <- merge(data_FST,data_GD,"Population")
data_tot$Longitude <- as.numeric(data_tot$Longitude)
data_tot$Latitude <- as.numeric(data_tot$Latitude)
assign(paste0("df_GD_PSD_",species),data_tot)
#GD
cor_Hs_PSD <- cor(data_tot$Fst,data_tot$Hs)
pvalue_Hs_PSD <- cor.test(data_tot$Fst,data_tot$Hs)
cor_Hs_Longitude <- cor(data_tot$Longitude,data_tot$Hs)
pvalue_Hs_Longitude <- cor.test(data_tot$Longitude,data_tot$Hs)
cor_Hs_Latitude <- cor(data_tot$Latitude,data_tot$Hs)
pvalue_Hs_Latitude <- cor.test(data_tot$Latitude,data_tot$Hs)
df_correlation_GD <- rbind(df_correlation_GD,data.frame(Species=species,Cor_Hs_PSD=cor_Hs_PSD,pvalue_Hs_PSD=pvalue_Hs_PSD$p.value,Cor_Hs_Longitude=cor_Hs_Longitude,pvalue_Hs_Longitude=pvalue_Hs_Longitude$p.value,Cor_Hs_Latitude=cor_Hs_Latitude,pvalue_Hs_Latitude=pvalue_Hs_Latitude$p.value))
}
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`Hs ~ PSD` = paste0(round(Cor_Hs_PSD, 2), " (p=", signif(pvalue_Hs_PSD, 2), ")"),
`Hs ~ Longitude` = paste0(round(Cor_Hs_Longitude, 2), " (p=", signif(pvalue_Hs_Longitude, 2), ")"),
`Hs ~ Latitude` = paste0(round(Cor_Hs_Latitude, 2), " (p=", signif(pvalue_Hs_Latitude, 2), ")")
) %>%
dplyr::select(Species, `Hs ~ PSD`,`Hs ~ Longitude`,`Hs ~ Latitude`)
# Create a nice gt table
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between genetic diversity and historical gene flow"
) %>%
cols_label(
Species = "Species",
`Hs ~ PSD` = "Hs ~ PSD",
`Hs ~ Longitude` = "Hs ~ Longitude",
`Hs ~ Latitude` = "Hs ~ Latitude",
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between genetic diversity and historical gene flow | |||
| Species | Hs ~ PSD | Hs ~ Longitude | Hs ~ Latitude |
|---|---|---|---|
| Taxus | -0.77 (p=1.1e-06) | 0.46 (p=0.012) | 0.27 (p=0.16) |
| Pinea | -0.93 (p=7.4e-22) | -0.37 (p=0.0088) | -0.05 (p=0.75) |
| Pinaster | -0.72 (p=1.1e-14) | -0.29 (p=0.0072) | 0.32 (p=0.0028) |
| Sylvestris | -0.69 (p=7.8e-13) | 0.45 (p=3.1e-05) | 0.42 (p=0.00011) |
| Ash | -0.66 (p=6.3e-05) | 0.09 (p=0.63) | 0.3 (p=0.1) |
| Fagus_forg | -0.94 (p=3.7e-24) | 0.46 (p=0.00069) | 0.08 (p=0.58) |
df_all <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Fagus_forg,df_GD_PSD_Ash)
#std
cor_GDstd_all <- cor(df_all$Fst,df_all$Hs)
pval_GDstd_all <- cor.test(df_all$Fst,df_all$Hs)
cor_GDstd_all
## [1] -0.6259541
pval_GDstd_all$p.value
## [1] 1.197892e-36
Overall, combining the genetic diversity provided a very consistent results comapred to individual species with an overall negative Pearson’s correlation of -0.68 associated to a 6.23 e-34 pvalue.
#merge all data
df_GD_PSD_Taxus$Species <- factor("Taxus")
df_GD_PSD_Pinea$Species <- factor("Pinea")
df_GD_PSD_Pinaster$Species <- factor("Pinaster")
df_GD_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GD_PSD_Ash$Species <- factor("Ash")
df_GD_PSD_Fagus_forg$Species <- factor("Fagus_forg")
df_merge_species <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Ash,df_GD_PSD_Fagus_forg)
#linear model
Ratio_lm_GD <- lm(Hs~Fst+Species, data=df_merge_species)
anova(Ratio_lm_GD)
## Analysis of Variance Table
##
## Response: Hs
## Df Sum Sq Mean Sq F value Pr(>F)
## Fst 1 0.33069 0.33069 770.13 < 2.2e-16 ***
## Species 5 0.37718 0.07544 175.68 < 2.2e-16 ***
## Residuals 317 0.13612 0.00043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Ratio_lm_GD)
##
## Call:
## lm(formula = Hs ~ Fst + Species, data = df_merge_species)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.063576 -0.012525 -0.001061 0.007308 0.085494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.217851 0.004149 52.502 < 2e-16 ***
## Fst -0.278356 0.009877 -28.183 < 2e-16 ***
## SpeciesPinea 0.123006 0.005402 22.769 < 2e-16 ***
## SpeciesPinaster 0.046559 0.004463 10.432 < 2e-16 ***
## SpeciesSylvestris 0.086207 0.004608 18.708 < 2e-16 ***
## SpeciesAsh 0.038909 0.005442 7.149 6.05e-12 ***
## SpeciesFagus_forg 0.025019 0.004956 5.049 7.52e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02072 on 317 degrees of freedom
## Multiple R-squared: 0.8387, Adjusted R-squared: 0.8357
## F-statistic: 274.8 on 6 and 317 DF, p-value: < 2.2e-16
#ratio per species
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
all_df_lm_results_Hs_fst <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
for(x in 1:length(list_species)){
species <- list_species[x]
df <- get(paste0("df_GD_PSD_",species))
Ratio_lm_GD <- lm(Hs~Fst, data=df)
anova(Ratio_lm_GD)
summary_model <- summary(Ratio_lm_GD)
# Extract coefficients and stats
intercept <- coef(summary_model)[1, 1]
slope <- coef(summary_model)[2, 1]
p_value <- coef(summary_model)[2, 4]
r2 <- summary_model$r.squared
# Add to results table
all_df_lm_results_Hs_fst <- rbind(
all_df_lm_results_Hs_fst,
data.frame(
Species = species,
Intercept = intercept,
Slope = slope,
R2 = r2,
p_value = p_value))
}
all_df_lm_results_Hs_fst_long_lat <- data.frame(
Species = character(),
Intercept = numeric(),
Slope_Fst = numeric(),
R2_partial_Fst = numeric(),
p_value_Fst = numeric(),
R2_geo = numeric(),
R2_full = numeric(),
Pvalue_long = numeric(),
Pvalue_lat = numeric()
)
for (x in seq_along(list_species)) {
species <- list_species[x]
df <- get(paste0("df_GD_PSD_", species))
## Geography-only model
model_geo <- lm(Hs ~ Longitude + Latitude, data = df)
summary_geo <- summary(model_geo)
## Full model (geography + Fst)
model_full <- lm(Hs ~ Longitude + Latitude + Fst, data = df)
summary_full <- summary(model_full)
## Partial R2 for Fst
R2_partial_Fst <- summary_full$r.squared - summary_geo$r.squared
## Extract coefficients for Fst
intercept <- coef(summary_full)[1, 1]
slope_Fst <- coef(summary_full)["Fst", 1]
p_value_Fst <- coef(summary_full)["Fst", 4]
## Geography stats
R2_geo <- summary_geo$r.squared
R2_full <- summary_full$r.squared
p_value_long <- coef(summary_geo)["Longitude", 4]
p_value_lat <- coef(summary_geo)["Latitude", 4]
## Store results
all_df_lm_results_Hs_fst_long_lat <- rbind(
all_df_lm_results_Hs_fst_long_lat,
data.frame(
Species = species,
Intercept = intercept,
Slope_Fst = slope_Fst,
R2_partial_Fst = R2_partial_Fst,
p_value_Fst = p_value_Fst,
R2_geo = R2_geo,
R2_full = R2_full,
Pvalue_long = p_value_long,
Pvalue_lat = p_value_lat
)
)
}
View(all_df_lm_results_Hs_fst_long_lat)
Excluding atypical populations in term of Fst:
#merge all data
df_GD_PSD_Taxus$Species <- factor("Taxus")
df_GD_PSD_Pinea$Species <- factor("Pinea")
df_GD_PSD_Pinaster$Species <- factor("Pinaster")
df_GD_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GD_PSD_Ash$Species <- factor("Ash")
df_GD_PSD_Fagus_forg$Species <- factor("Fagus_forg")
df_merge_species <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Ash,df_GD_PSD_Fagus_forg)
df_merge_species_bis <- df_merge_species %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
#ratio per species
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
all_df_lm_results_Hs_fst_wo_outliers <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
for(x in 1:length(list_species)){
species <- list_species[x]
df <- df_merge_species_bis %>% filter(Species == species)
Ratio_lm_GD <- lm(Hs~Fst, data=df)
anova(Ratio_lm_GD)
summary_model <- summary(Ratio_lm_GD)
# Extract coefficients and stats
intercept <- coef(summary_model)[1, 1]
slope <- coef(summary_model)[2, 1]
p_value <- coef(summary_model)[2, 4]
r2 <- summary_model$r.squared
# Add to results table
all_df_lm_results_Hs_fst_wo_outliers <- rbind(
all_df_lm_results_Hs_fst_wo_outliers,
data.frame(
Species = species,
Intercept = intercept,
Slope = slope,
R2 = r2,
p_value = p_value))
}
View(all_df_lm_results_Hs_fst_wo_outliers)
all_df_lm_results_Hs_fst_long_lat_wo_outliers <- data.frame(
Species = character(),
Intercept = numeric(),
Slope_Fst = numeric(),
R2_partial_Fst = numeric(),
p_value_Fst = numeric(),
R2_geo = numeric(),
R2_full = numeric(),
Pvalue_long = numeric(),
Pvalue_lat = numeric()
)
for (x in seq_along(list_species)) {
species <- list_species[x]
df <- df_merge_species_bis %>% filter(Species == species)
## Geography-only model
model_geo <- lm(Hs ~ Longitude + Latitude, data = df)
summary_geo <- summary(model_geo)
## Full model (geography + Fst)
model_full <- lm(Hs ~ Longitude + Latitude + Fst, data = df)
summary_full <- summary(model_full)
## Partial R2 for Fst
R2_partial_Fst <- summary_full$r.squared - summary_geo$r.squared
## Extract coefficients for Fst
intercept <- coef(summary_full)[1, 1]
slope_Fst <- coef(summary_full)["Fst", 1]
p_value_Fst <- coef(summary_full)["Fst", 4]
## Geography stats
R2_geo <- summary_geo$r.squared
R2_full <- summary_full$r.squared
p_value_long <- coef(summary_geo)["Longitude", 4]
p_value_lat <- coef(summary_geo)["Latitude", 4]
## Store results
all_df_lm_results_Hs_fst_long_lat_wo_outliers <- rbind(
all_df_lm_results_Hs_fst_long_lat_wo_outliers,
data.frame(
Species = species,
Intercept = intercept,
Slope_Fst = slope_Fst,
R2_partial_Fst = R2_partial_Fst,
p_value_Fst = p_value_Fst,
R2_geo = R2_geo,
R2_full = R2_full,
Pvalue_long = p_value_long,
Pvalue_lat = p_value_lat
)
)
}
View(all_df_lm_results_Hs_fst_long_lat_wo_outliers)
Without atypical populations regarding Fst
df_renamed <- df_merge_species %>%
filter(Species != "Fagus")
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Apply labels to the data
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Species <- factor(df_renamed$Species, levels = c(
"italic(T.~baccata)",
"italic(P.~pinea)",
"italic(P.~pinaster)",
"italic(F.~excelsior)",
"italic(F.~sylvatica)",
"italic(P.~sylvestris)"
))
df_renamed <- df_renamed %>%
group_by(Species) %>%
mutate(st_Hs = as.numeric(scale(Hs))) %>%
ungroup()
df_filtered <- df_renamed %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~excelsior)" = "orange",
"italic(F.~sylvatica)" = "#9467BD"
)
shapes <- c(
"italic(T.~baccata)" = 8,
"italic(P.~pinea)" = 18,
"italic(P.~pinaster)" = 15,
"italic(P.~sylvestris)" = 17,
"italic(F.~excelsior)" = 16,
"italic(F.~sylvatica)" = 25
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_filtered, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~excelsior)" = "orange",
"italic(F.~sylvatica)" = "#9467BD"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~excelsior)" = "orange",
"italic(F.~sylvatica)" = "#9467BD"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19,
"italic(F.~excelsior)" = 19,
"italic(F.~sylvatica)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Hs")+
scale_y_continuous(
limits = c(-4.5, 3),
breaks = seq(-4.5, 3, by = 1.5)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_allspecies_new_wo_outliers.pdf");
print(plot);
dev.off()
## png
## 2
plot <- ggplot(df_filtered, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~excelsior)" = "orange",
"italic(F.~sylvatica)" = "#9467BD"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~excelsior)" = "orange",
"italic(F.~sylvatica)" = "#9467BD"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19,
"italic(F.~excelsior)" = 19,
"italic(F.~sylvatica)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Hs")+
scale_y_continuous(
limits = c(-4.5, 3),
breaks = seq(-4.5, 3, by = 1.5)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/GD_relationship_allspecies_new_wo_SE_wo_outliers.pdf");
print(plot);
dev.off()
## png
## 2
df_correlation_GD <- data.frame(
Species = character(),
cor_GDI_PSD = numeric(),
pvalue_GDI_PSD = numeric()
)
# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Pearson correlation
cor_result <- cor.test(df_species$Fst, df_species$st_Hs)
df_correlation_GD <- rbind(
df_correlation_GD,
data.frame(
Species = species,
cor_st_Hs_PSD = cor_result$estimate,
pvalue_st_Hs_PSD = cor_result$p.value
)
)
}
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Loop over unique species
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Fit simple linear model
model <- lm(st_Hs ~ Fst, data = df_species)
summary_model <- summary(model)
# Extract coefficients and stats
intercept <- coef(summary_model)[1, 1]
slope <- coef(summary_model)[2, 1]
p_value <- coef(summary_model)[2, 4]
r2 <- summary_model$r.squared
# Add to results table
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = intercept,
Slope = slope,
R2 = r2,
p_value = p_value
)
)
}
# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
left_join(df_lm_results, by = "Species")
# Format nicely for viewing
df_results_all %>%
dplyr::mutate(
`st_Hs ~ Fst` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `st_Hs ~ Fst`) %>%
gt() %>%
tab_header(
title = "Linear model: st_Hs ~ Fst per species"
) %>%
cols_label(
Species = "Species",
`st_Hs ~ Fst` = "Model summary"
)
| Linear model: st_Hs ~ Fst per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=-11.135, R²=0.59, p=1.1e-06 |
| italic(P.~pinea) | slope=-3.898, R²=0.87, p=7.4e-22 |
| italic(P.~pinaster) | slope=-5.922, R²=0.46, p=1.9e-12 |
| italic(P.~sylvestris) | slope=-17.024, R²=0.33, p=2.5e-08 |
| italic(F.~excelsior) | slope=-15.815, R²=0.13, p=0.065 |
| italic(F.~sylvatica) | slope=-17.607, R²=0.56, p=1.2e-09 |
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`st_Hs ~ PSD` = paste0(round(cor_st_Hs_PSD, 2), " (p=", signif(pvalue_st_Hs_PSD, 2), ")")
) %>%
dplyr::select(Species, `st_Hs ~ PSD`)
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between st_Hs and historical realised gene flow"
) %>%
cols_label(
Species = "Species",
`st_Hs ~ PSD` = "st_Hs ~ PSD"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between st_Hs and historical realised gene flow | |
| Species | st_Hs ~ PSD |
|---|---|
| italic(T.~baccata) | -0.77 (p=1.1e-06) |
| italic(P.~pinea) | -0.93 (p=7.4e-22) |
| italic(P.~pinaster) | -0.68 (p=1.9e-12) |
| italic(P.~sylvestris) | -0.58 (p=2.5e-08) |
| italic(F.~excelsior) | -0.35 (p=0.065) |
| italic(F.~sylvatica) | -0.75 (p=1.2e-09) |
With atypical populations regarding Fst
df_renamed <- df_merge_species %>%
filter(Species != "Fagus")
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Apply labels to the data
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Species <- factor(df_renamed$Species, levels = c(
"italic(T.~baccata)",
"italic(P.~pinea)",
"italic(P.~pinaster)",
"italic(F.~excelsior)",
"italic(F.~sylvatica)",
"italic(P.~sylvestris)"
))
df_renamed <- df_renamed %>%
group_by(Species) %>%
mutate(st_Hs = as.numeric(scale(Hs))) %>%
ungroup()
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)
shapes <- c(
"italic(T.~baccata)" = 8,
"italic(P.~pinea)" = 18,
"italic(P.~pinaster)" = 15,
"italic(P.~sylvestris)" = 17,
"italic(F.~sylvatica)" = 25,
"italic(F.~excelsior)" = 16
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(F.~excelsior)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Hs")+
scale_y_continuous(
limits = c(-4.5, 3),
breaks = seq(-4.5, 3, by = 1.5)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_allspecies_new.pdf");
print(plot);
dev.off()
## png
## 2
plot <- ggplot(df_renamed, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(F.~excelsior)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Hs")+
scale_y_continuous(
limits = c(-4.5, 3),
breaks = seq(-4.5, 3, by = 1.5)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/GD_relationship_allspecies_new_wo_SE.pdf");
print(plot);
dev.off()
## png
## 2
df_correlation_GD <- data.frame(
Species = character(),
cor_GDI_PSD = numeric(),
pvalue_GDI_PSD = numeric()
)
# Loop over unique species in the filtered dataset
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Pearson correlation
cor_result <- cor.test(df_species$Fst, df_species$st_Hs)
df_correlation_GD <- rbind(
df_correlation_GD,
data.frame(
Species = species,
cor_st_Hs_PSD = cor_result$estimate,
pvalue_st_Hs_PSD = cor_result$p.value
)
)
}
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Loop over unique species
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Fit simple linear model
model <- lm(st_Hs ~ Fst, data = df_species)
summary_model <- summary(model)
# Extract coefficients and stats
intercept <- coef(summary_model)[1, 1]
slope <- coef(summary_model)[2, 1]
p_value <- coef(summary_model)[2, 4]
r2 <- summary_model$r.squared
# Add to results table
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = intercept,
Slope = slope,
R2 = r2,
p_value = p_value
)
)
}
# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
left_join(df_lm_results, by = "Species")
# Format nicely for viewing
df_results_all %>%
dplyr::mutate(
`st_Hs ~ Fst` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `st_Hs ~ Fst`) %>%
gt() %>%
tab_header(
title = "Linear model: st_Hs ~ Fst per species"
) %>%
cols_label(
Species = "Species",
`st_Hs ~ Fst` = "Model summary"
)
| Linear model: st_Hs ~ Fst per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=-11.135, R²=0.59, p=1.1e-06 |
| italic(P.~pinea) | slope=-3.898, R²=0.87, p=7.4e-22 |
| italic(P.~pinaster) | slope=-5.949, R²=0.52, p=1.1e-14 |
| italic(P.~sylvestris) | slope=-17.939, R²=0.48, p=7.8e-13 |
| italic(F.~excelsior) | slope=-8.396, R²=0.43, p=6.3e-05 |
| italic(F.~sylvatica) | slope=-26.117, R²=0.88, p=3.7e-24 |
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`st_Hs ~ PSD` = paste0(round(cor_st_Hs_PSD, 2), " (p=", signif(pvalue_st_Hs_PSD, 2), ")")
) %>%
dplyr::select(Species, `st_Hs ~ PSD`)
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between st_Hs and historical realised gene flow"
) %>%
cols_label(
Species = "Species",
`st_Hs ~ PSD` = "st_Hs ~ PSD"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between st_Hs and historical realised gene flow | |
| Species | st_Hs ~ PSD |
|---|---|
| italic(T.~baccata) | -0.77 (p=1.1e-06) |
| italic(P.~pinea) | -0.93 (p=7.4e-22) |
| italic(P.~pinaster) | -0.72 (p=1.1e-14) |
| italic(P.~sylvestris) | -0.69 (p=7.8e-13) |
| italic(F.~excelsior) | -0.66 (p=6.3e-05) |
| italic(F.~sylvatica) | -0.94 (p=3.7e-24) |
df_renamed <- df_merge_species
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Latitude <- as.numeric(df_renamed$Latitude)
df_renamed$Longitude <- as.numeric(df_renamed$Longitude)
df_renamed <- df_renamed %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)
shapes <- c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Longitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = "Longitude",
y = "Scaled genetic diversity (Hs)")+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new.pdf");
# print(plot);
# dev.off()
plot <- ggplot(df_renamed, aes(x = Longitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = "Longitude",
y = "Scaled genetic diversity (Hs)")+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
# print(plot);
# dev.off()
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Skip if not enough points
if (nrow(df_species) < 3) next
# Simple linear model
model <- lm(Hs ~ Longitude, data = df_species)
summary_model <- summary(model)
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = coef(summary_model)[1, 1],
Slope = coef(summary_model)[2, 1],
R2 = summary_model$r.squared,
p_value = coef(summary_model)[2, 4]
)
)
}
# Inspect results
print(df_lm_results)
## Species Intercept Slope R2 p_value
## 1 italic(T.~baccata) 0.1657582 9.245047e-04 0.2101233153 1.238988e-02
## 2 italic(P.~pinea) 0.2454332 -2.068998e-03 0.1398541150 8.834001e-03
## 3 italic(P.~pinaster) 0.2209233 -2.088206e-03 0.1238001596 1.106736e-03
## 4 italic(P.~sylvestris) 0.2870950 3.637667e-04 0.1875743825 6.700663e-05
## 5 italic(F.~excelsior) 0.2443825 2.310125e-05 0.0001762504 9.465422e-01
## 6 italic(F.~sylvatica) 0.2312181 2.247754e-04 0.2016464493 1.544876e-03
gt_table_lm <- df_lm_results %>%
dplyr::mutate(
`Hs ~ Longitude` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `Hs ~ Longitude`) %>%
gt() %>%
tab_header(
title = "Linear model: Hs ~ Longitude per species"
) %>%
cols_label(
Species = "Species",
`Hs ~ Longitude` = "Model summary"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
table.width = pct(100)
)
gt_table_lm
| Linear model: Hs ~ Longitude per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=0.001, R²=0.21, p=0.012 |
| italic(P.~pinea) | slope=-0.002, R²=0.14, p=0.0088 |
| italic(P.~pinaster) | slope=-0.002, R²=0.12, p=0.0011 |
| italic(P.~sylvestris) | slope=0, R²=0.19, p=6.7e-05 |
| italic(F.~excelsior) | slope=0, R²=0, p=0.95 |
| italic(F.~sylvatica) | slope=0, R²=0.2, p=0.0015 |
### Disentangle the effect of Fst and Longitude on GDI
# Data frame to store results
df_varpart_species_lon_fst_genetic_diversity <- data.frame(
Species = character(),
Fst_unique = numeric(),
Longitude_unique = numeric(),
Shared = numeric(),
Residual = numeric(),
Total_Explained = numeric(),
Fst_p = numeric(),
Longitude_p = numeric(),
stringsAsFactors = FALSE
)
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>% filter(Species == species)
# Skip if not enough points
if (nrow(df_species) < 3) next
# --- Full RDA to get total explained variance ---
rda_full <- rda(df_species$Hs ~ scale(Fst) + scale(Longitude), data = df_species)
total_explained <- RsquareAdj(rda_full)$adj.r.squared
# --- Partial RDA for unique fractions ---
rda_Fst <- rda(df_species$Hs ~ scale(Fst) + Condition(scale(Longitude)), data = df_species)
Fst_unique <- RsquareAdj(rda_Fst)$adj.r.squared
anova_Fst <- anova(rda_Fst, permutations = 999)
Fst_p <- anova_Fst$`Pr(>F)`[1] # p-value for Fst unique
rda_Lat <- rda(df_species$Hs ~ scale(Longitude) + Condition(scale(Fst)), data = df_species)
Longitude_unique <- RsquareAdj(rda_Lat)$adj.r.squared
anova_Lat <- anova(rda_Lat, permutations = 999)
Longitude_p <- anova_Lat$`Pr(>F)`[1] # p-value for Longitude unique
Shared <- total_explained - Fst_unique - Longitude_unique
Residual <- 1 - total_explained
Total_Explained <- total_explained
# Bind to results
df_varpart_species_lon_fst_genetic_diversity <- rbind(
df_varpart_species_lon_fst_genetic_diversity,
data.frame(
Species = species,
Fst_unique = Fst_unique,
Longitude_unique = Longitude_unique,
Shared = Shared,
Residual = Residual,
Total_Explained = Total_Explained,
Fst_p = Fst_p,
Longitude_p = Longitude_p
)
)
}
# Inspect results
print(df_varpart_species_lon_fst_genetic_diversity)
## Species Fst_unique Longitude_unique Shared Residual
## 1 italic(T.~baccata) 0.6602666 0.2654037993 -0.084535176 0.1588648
## 2 italic(P.~pinea) 0.7449377 0.0009513661 0.120203925 0.1339070
## 3 italic(P.~pinaster) 0.3340173 -0.0064242577 0.119407135 0.5529998
## 4 italic(P.~sylvestris) 0.3776214 0.2295041390 -0.052480739 0.4453552
## 5 italic(F.~excelsior) 0.0934554 -0.0361920082 -0.002086501 0.9448231
## 6 italic(F.~sylvatica) 0.4059910 0.0354165074 0.148488752 0.4101038
## Total_Explained Fst_p Longitude_p
## 1 0.8411352 0.001 0.001
## 2 0.8660930 0.001 0.258
## 3 0.4470002 0.001 0.813
## 4 0.5546448 0.001 0.001
## 5 0.0551769 0.059 0.953
## 6 0.5898962 0.001 0.028
# Optional: GT table
gt_table_vp <- df_varpart_species_lon_fst_genetic_diversity %>%
gt() %>%
tab_header(
title = "Variance Partitioning of Genetic diversity per Species Longitude"
) %>%
cols_label(
Species = "Species",
Fst_unique = "Fst (unique)",
Longitude_unique = "Longitude (unique)",
Shared = "Shared fraction",
Residual = "Residual",
Total_Explained = "Total Explained",
Fst_p = "Fst p-value",
Longitude_p = "Longitude p-value"
) %>%
fmt_number(columns = vars(Fst_p, Longitude_p), decimals = 3) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
table.width = pct(100)
)
gt_table_vp
| Variance Partitioning of Genetic diversity per Species Longitude | |||||||
| Species | Fst (unique) | Longitude (unique) | Shared fraction | Residual | Total Explained | Fst p-value | Longitude p-value |
|---|---|---|---|---|---|---|---|
| italic(T.~baccata) | 0.6602666 | 0.2654037993 | -0.084535176 | 0.1588648 | 0.8411352 | 0.001 | 0.001 |
| italic(P.~pinea) | 0.7449377 | 0.0009513661 | 0.120203925 | 0.1339070 | 0.8660930 | 0.001 | 0.258 |
| italic(P.~pinaster) | 0.3340173 | -0.0064242577 | 0.119407135 | 0.5529998 | 0.4470002 | 0.001 | 0.813 |
| italic(P.~sylvestris) | 0.3776214 | 0.2295041390 | -0.052480739 | 0.4453552 | 0.5546448 | 0.001 | 0.001 |
| italic(F.~excelsior) | 0.0934554 | -0.0361920082 | -0.002086501 | 0.9448231 | 0.0551769 | 0.059 | 0.953 |
| italic(F.~sylvatica) | 0.4059910 | 0.0354165074 | 0.148488752 | 0.4101038 | 0.5898962 | 0.001 | 0.028 |
Plots longitude
species_labels <- c(
"Taxus" = "T. baccata",
"Pinea" = "P. pinea",
"Pinaster" = "P. pinaster",
"Sylvestris" = "P. sylvestris",
"Ash" = "F. excelsior",
"Fagus_forg" = "F. sylvatica"
)
# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]
# Set Species as a factor with desired order for consistent colors/shapes/legend
df_renamed$Longitude <- as.numeric(df_renamed$Longitude)
df_renamed <- df_renamed %>%
group_by(Species) %>%
mutate(st_Hs = as.numeric(scale(Hs))) %>%
ungroup()
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Longitude, y = st_Hs, shape = Species, fill = Species, color = Species)) +
geom_point(position = position_jitter(width = 0, height = 0.01), alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)) +
scale_fill_manual(values = c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)) +
scale_shape_manual(values = c(
"T. baccata" = 8,
"P. pinea" = 18,
"P. pinaster" = 15,
"P. sylvestris" = 17,
"F. sylvatica" = 25, # downward triangle (fillable)
"F. excelsior" = 16
)) +
theme_minimal() +
labs(
title = "Mixed Model Fit: One Line per Species",
x = "Longitude",
y = "Genetic diversity Hs"
) +
#scale_y_continuous(breaks = seq(0.05, 0.35, by = 0.05)) +
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_allspecies_new.pdf");
print(plot);
dev.off()
## png
## 2
plot <- ggplot(df_renamed, aes(x = Longitude, y = st_Hs, shape = Species, fill = Species, color = Species)) +
#geom_point(position = position_jitter(width = 0, height = 0.01), alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)) +
scale_fill_manual(values = c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)) +
scale_shape_manual(values = c(
"T. baccata" = 8,
"P. pinea" = 18,
"P. pinaster" = 15,
"P. sylvestris" = 17,
"F. sylvatica" = 25, # downward triangle (fillable)
"F. excelsior" = 16
)) +
theme_minimal() +
labs(
title = "Mixed Model Fit: One Line per Species",
x = "Longitude",
y = "Genetic diversity Hs"
) +
#scale_y_continuous(breaks = seq(0.05, 0.35, by = 0.05)) +
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_allspecies_wo_points_new.pdf");
print(plot);
dev.off()
## png
## 2
ordered_species <- c(
"P. sylvestris",
"F. sylvatica",
"F. excelsior",
"P. pinaster",
"P. pinea",
"T. baccata"
)
# Update factor levels
df_renamed$Species <- factor(df_renamed$Species, levels = ordered_species)
# Apply same order to species_offsets
species_offsets <- data.frame(
Species = ordered_species,
offset = seq(0, by = 6, length.out = length(ordered_species))
)
df_renamed_bis <- df_renamed %>%
left_join(species_offsets, by = "Species") %>%
mutate(offset_Hs = st_Hs + offset)
tick_vals <- -2:2
breaks <- unlist(lapply(species_offsets$offset, function(o) o + tick_vals))
labels <- rep(tick_vals, times = nrow(species_offsets))
color_vals <- c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)
# Final Plot
plot <- ggplot(df_renamed_bis, aes(x = Longitude, y = offset_Hs, color = Species)) +
geom_point(alpha = 0.5, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
scale_color_manual(values = color_vals) +
scale_y_continuous(
breaks = species_offsets$offset,
labels = species_offsets$Species
) +
theme_minimal() +
labs(
title = "Standardized Hs by Species",
x = "Longitude",
y = "Scaled genetic diversity"
) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.y = element_text(face = "italic"), # <- makes species names italic
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_good_figure.pdf");
print(plot);
dev.off()
## png
## 2
# changed order
ordered_species <- c(
"P. sylvestris",
"F. sylvatica",
"F. excelsior",
"P. pinaster",
"P. pinea",
"T. baccata"
)
# Update factor levels
df_renamed$Species <- factor(df_renamed$Species, levels = ordered_species)
# Apply same order to species_offsets
species_offsets <- data.frame(
Species = ordered_species,
offset = seq(0, by = 6, length.out = length(ordered_species))
)
df_renamed_bis <- df_renamed %>%
left_join(species_offsets, by = "Species") %>%
mutate(offset_Hs = st_Hs + offset)
tick_vals <- -2:2
breaks <- unlist(lapply(species_offsets$offset, function(o) o + tick_vals))
labels <- rep(tick_vals, times = nrow(species_offsets))
color_vals <- c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)
# Final Plot
plot <- ggplot(df_renamed_bis, aes(x = Longitude, y = offset_Hs, color = Species)) +
geom_point(alpha = 0.5, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
scale_color_manual(values = color_vals) +
scale_y_continuous(
breaks = species_offsets$offset,
labels = species_offsets$Species
) +
theme_minimal() +
labs(
title = "Standardized Hs by Species",
x = "Longitude",
y = "Scaled genetic diversity"
) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.y = element_text(face = "italic"), # <- makes species names italic
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_good_figure_change_order.pdf");
print(plot);
dev.off()
## png
## 2
df_renamed <- df_merge_species
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Latitude <- as.numeric(df_renamed$Latitude)
df_renamed$Longitude <- as.numeric(df_renamed$Longitude)
df_renamed <- df_renamed %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)
shapes <- c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Latitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = "Latitude",
y = "Scaled genetic diversity (Hs)")+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new.pdf");
# print(plot);
# dev.off()
plot <- ggplot(df_renamed, aes(x = Latitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = "Latitude",
y = "Scaled genetic diversity (Hs)")+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
# print(plot);
# dev.off()
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Skip if not enough points
if (nrow(df_species) < 3) next
# Simple linear model
model <- lm(Hs ~ Latitude, data = df_species)
summary_model <- summary(model)
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = coef(summary_model)[1, 1],
Slope = coef(summary_model)[2, 1],
R2 = summary_model$r.squared,
p_value = coef(summary_model)[2, 4]
)
)
}
# Inspect results
print(df_lm_results)
## Species Intercept Slope R2 p_value
## 1 italic(T.~baccata) 0.1347653 8.545802e-04 0.072745981 0.1570836983
## 2 italic(P.~pinea) 0.2911319 -1.499962e-03 0.002240129 0.7493873697
## 3 italic(P.~pinaster) 0.1110890 2.718498e-03 0.058057319 0.0282128155
## 4 italic(P.~sylvestris) 0.2333875 1.268864e-03 0.176524523 0.0001158027
## 5 italic(F.~excelsior) 0.2299639 2.969343e-04 0.007352633 0.6643995337
## 6 italic(F.~sylvatica) 0.2354445 -3.413152e-05 0.001108792 0.8241600280
gt_table_lm <- df_lm_results %>%
dplyr::mutate(
`Hs ~ Latitude` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `Hs ~ Latitude`) %>%
gt() %>%
tab_header(
title = "Linear model: Hs ~ Latitude per species"
) %>%
cols_label(
Species = "Species",
`Hs ~ Latitude` = "Model summary"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
table.width = pct(100)
)
gt_table_lm
| Linear model: Hs ~ Latitude per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=0.001, R²=0.07, p=0.16 |
| italic(P.~pinea) | slope=-0.001, R²=0, p=0.75 |
| italic(P.~pinaster) | slope=0.003, R²=0.06, p=0.028 |
| italic(P.~sylvestris) | slope=0.001, R²=0.18, p=0.00012 |
| italic(F.~excelsior) | slope=0, R²=0.01, p=0.66 |
| italic(F.~sylvatica) | slope=0, R²=0, p=0.82 |
### Disentangle the effect of Fst and Latitude on GDI
# Data frame to store results
df_varpart_species_lon_fst_genetic_diversity <- data.frame(
Species = character(),
Fst_unique = numeric(),
Longitude_unique = numeric(),
Latitude_unique = numeric(),
Shared = numeric(),
Residual = numeric(),
Total_Explained = numeric(),
Fst_p = numeric(),
Longitude_p = numeric(),
Latitude_p = numeric(),
stringsAsFactors = FALSE
)
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>% filter(Species == species)
# Skip if not enough points
if (nrow(df_species) < 3) next
# --- Full RDA to get total explained variance ---
rda_full <- rda(df_species$Hs ~ scale(Fst) +scale(Longitude)+scale(Latitude) , data = df_species)
total_explained <- RsquareAdj(rda_full)$adj.r.squared
# --- Partial RDA for unique fractions ---
rda_Fst <- rda(df_species$Hs ~ scale(Fst) + Condition(scale(Longitude) + scale(Latitude)), data = df_species)
Fst_unique <- RsquareAdj(rda_Fst)$adj.r.squared
anova_Fst <- anova(rda_Fst, permutations = 999)
Fst_p <- anova_Fst$`Pr(>F)`[1] # p-value for Fst unique
rda_Lon <- rda(df_species$Hs ~ scale(Longitude) + Condition(scale(Fst)+scale(Latitude)), data = df_species)
Longitude_unique <- RsquareAdj(rda_Lon)$adj.r.squared
anova_Lon <- anova(rda_Lon, permutations = 999)
Longitude_p <- anova_Lon$`Pr(>F)`[1] # p-value for Latitude unique
rda_Lat <- rda(df_species$Hs ~ scale(Latitude) + Condition(scale(Fst)+scale(Longitude)), data = df_species)
Latitude_unique <- RsquareAdj(rda_Lat)$adj.r.squared
anova_Lat <- anova(rda_Lat, permutations = 999)
Latitude_p <- anova_Lat$`Pr(>F)`[1] # p-value for Latitude unique
# --- Shared and residual fractions ---
Shared <- total_explained - Fst_unique - Longitude_unique - Latitude_unique
Residual <- 1 - total_explained
Total_Explained <- total_explained
# Bind to results
df_varpart_species_lon_fst_genetic_diversity <- rbind(
df_varpart_species_lon_fst_genetic_diversity,
data.frame(
Species = species,
Fst_unique = Fst_unique,
Longitude_unique = Longitude_unique,
Latitude_unique = Latitude_unique,
Shared = Shared,
Residual = Residual,
Total_Explained = Total_Explained,
Fst_p = Fst_p,
Longitude_p = Longitude_p,
Latitude_p = Latitude_p
)
)
}
# Inspect results
print(df_varpart_species_lon_fst_genetic_diversity)
## Species Fst_unique Longitude_unique Latitude_unique
## 1 italic(T.~baccata) 0.54406358 0.3256065230 0.047897487
## 2 italic(P.~pinea) 0.73201670 0.0005769882 -0.003026506
## 3 italic(P.~pinaster) 0.24332765 -0.0062389588 -0.006507674
## 4 italic(P.~sylvestris) 0.31169545 0.1707714991 -0.005427702
## 5 italic(F.~excelsior) 0.08580789 -0.0392816956 -0.039367629
## 6 italic(F.~sylvatica) 0.51042647 0.0204461884 0.086881809
## Shared Residual Total_Explained Fst_p Longitude_p Latitude_p
## 1 -0.028534866 0.1109673 0.88903272 0.001 0.001 0.002
## 2 0.133499288 0.1369335 0.86306647 0.001 0.299 0.938
## 3 0.209911471 0.5595075 0.44049249 0.001 0.739 0.799
## 4 0.072177880 0.4507829 0.54921713 0.001 0.001 0.770
## 5 0.008652461 0.9841890 0.01581103 0.087 0.966 NA
## 6 0.059023585 0.3232220 0.67677805 0.001 0.055 0.001
# Optional: GT table
gt_table_vp <- df_varpart_species_lon_fst_genetic_diversity %>%
gt() %>%
tab_header(
title = "Variance Partitioning of Genetic diversity per Species Longitude and Latitude wo outliers Fst"
) %>%
cols_label(
Species = "Species",
Fst_unique = "Fst (unique)",
Longitude_unique = "Longitude (unique)",
Latitude_unique = "Latitude (unique)",
Shared = "Shared fraction",
Residual = "Residual",
Total_Explained = "Total Explained",
Fst_p = "Fst p-value",
Longitude_p = "Longitude p-value",
Latitude_p = "Latitude p-value"
) %>%
fmt_number(columns = vars(Fst_p,Longitude_p, Latitude_p), decimals = 3) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
table.width = pct(100)
)
gt_table_vp
| Variance Partitioning of Genetic diversity per Species Longitude and Latitude wo outliers Fst | |||||||||
| Species | Fst (unique) | Longitude (unique) | Latitude (unique) | Shared fraction | Residual | Total Explained | Fst p-value | Longitude p-value | Latitude p-value |
|---|---|---|---|---|---|---|---|---|---|
| italic(T.~baccata) | 0.54406358 | 0.3256065230 | 0.047897487 | -0.028534866 | 0.1109673 | 0.88903272 | 0.001 | 0.001 | 0.002 |
| italic(P.~pinea) | 0.73201670 | 0.0005769882 | -0.003026506 | 0.133499288 | 0.1369335 | 0.86306647 | 0.001 | 0.299 | 0.938 |
| italic(P.~pinaster) | 0.24332765 | -0.0062389588 | -0.006507674 | 0.209911471 | 0.5595075 | 0.44049249 | 0.001 | 0.739 | 0.799 |
| italic(P.~sylvestris) | 0.31169545 | 0.1707714991 | -0.005427702 | 0.072177880 | 0.4507829 | 0.54921713 | 0.001 | 0.001 | 0.770 |
| italic(F.~excelsior) | 0.08580789 | -0.0392816956 | -0.039367629 | 0.008652461 | 0.9841890 | 0.01581103 | 0.087 | 0.966 | NA |
| italic(F.~sylvatica) | 0.51042647 | 0.0204461884 | 0.086881809 | 0.059023585 | 0.3232220 | 0.67677805 | 0.001 | 0.055 | 0.001 |
Genetic load only computed for the conifer species.
list_species <- c("Taxus_baccata","Pinus_pinea","Pinus_pinaster","Pinus_sylvestris")
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris")
for(x in 1:length(list_species)){
species <- list_species[x]
species_name <- list_species_name[x]
data <- read.csv(paste0("Data/Species/Chapter2/GL_new_estimates/",species,"_GL.csv"),h=T,sep=";",dec=",")
data_select <- data %>% dplyr::select(Population,GLr.Provean)
data_select$GLr.Provean<- as.numeric(data_select$GLr.Provean)
#Fst
data_FST <- get(paste0("Bayescan_FST_pop_order_", species_name))
data_tot <- merge(data_FST,data_select,"Population")
assign(paste0("GL_",species_name),data_tot)
}
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris")
# Loop to process each species and combine data
combined_data_GL <- data.frame()
for (x in 1:length(list_species_name)) {
species <- list_species_name[x]
data <- get(paste0("GL_", species))
data$Species <- species
combined_data_GL <- rbind(combined_data_GL, data)
#provean
plot <- ggplot() +
geom_density(data = data, aes(x = GLr.Provean), fill = "lightblue", alpha = 0.8) +
labs(
title = paste0("Distribution of Genetic load between population of ", species),
x = "Genetic load Provean",
y = "Density (number of populations)"
) +
theme_bw() +
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5)
)
print(plot)
# Save to PDF
pdf(paste0("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_provean", species, ".pdf"))
print(plot)
dev.off()
}
combined_data_GL$Species <- factor(
combined_data_GL$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris")
)
#Provean
combined_plot <- ggplot(combined_data_GL, aes(x = log10(GLr.Provean))) +
geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +#"#9467BD","orange"
#scale_x_continuous(limits = c(0, 0.25), breaks = seq(0, 0.25, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
facet_wrap(~ Species, scales = "free_y", ncol = 2) + # Two columns: top = taxus & pinea; bottom = pinaster & sylvestris
labs(
title = "Distribution of genetic load values \nacross populations all species ",
x = "Log 10 Genetic load Provean",
y = "Count of populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(size = 10)
)
# Show the plot
print(combined_plot)
pdf("Results/all_species/Genetic_load/figures_new_estimates/Provean/GL_Provean_all_species.pdf")
print(combined_plot)
dev.off()
## png
## 2
combined_data_GL_bis <- combined_data_GL
combined_data_GL_bis$Species <- factor(
combined_data_GL_bis$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris"),
labels = c("T. baccata", "P. pinea", "P. pinaster", "P. sylvestris")
)
beeswarm_provean <- ggplot(combined_data_GL_bis, aes(x = Species, y = GLr.Provean, color = Species)) +
geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.7) +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +#"#9467BD","orange"
stat_summary(fun = median, geom = "crossbar", width = 0.3, color = "black", fatten = 0.13, size = 10) +
labs(
title = "Beeswarm Plot of Genetic Load estimates fro Provean",
x = "Species",
y = "Genetic Load (Provean)"
) +
scale_x_discrete(labels = c(
"T. baccata" = expression(italic("T. baccata")),
"P. pinea" = expression(italic("P. pinea")),
"P. pinaster" = expression(italic("P. pinaster")),
"P. sylvestris" = expression(italic("P. sylvestris"))
))+
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none"
)
print(beeswarm_provean)
pdf("Results/all_species/Genetic_load/figures_new_estimates/Provean/Beeswarm_GL_Provean_all_species.pdf");
print(beeswarm_provean);
dev.off()
## png
## 2
#zscores
combined_data_GL_bis$GLr.Provean.z <- ave(combined_data_GL_bis$GLr.Provean, combined_data_GL_bis$Species,
FUN = function(x) scale(x)[,1])
beeswarm_snpeff_horiz <- ggplot(combined_data_GL_bis, aes(y = Species, x = GLr.Provean.z, color = Species)) +
geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray30") +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
labs(
title = "Genetic Load (SnpEff) per Species",
x = "Genetic Load (SnpEff)",
y = ""
) +
scale_x_discrete(labels = c(
"T. baccata" = expression(italic("T. baccata")),
"P. pinea" = expression(italic("P. pinea")),
"P. pinaster" = expression(italic("P. pinaster")),
"P. sylvestris" = expression(italic("P. sylvestris"))
))+
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.y = element_text(size = 11)
)
print(beeswarm_snpeff_horiz)
# Set factor levels explicitly if not already done
# Boxplot with jittered points overlaid
boxplot_jitter_plot <- ggplot(combined_data_GL_bis, aes(x = Species, y = GLr.Provean, fill = Species)) +
geom_jitter(aes(color = Species), width = 0.2, alpha = 0.6, size = 1.5) + # add jittered points
geom_boxplot(outlier.shape = NA, alpha = 0.6) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
scale_x_discrete(labels = c(
"T. baccata" = expression(italic("T. baccata")),
"P. pinea" = expression(italic("P. pinea")),
"P. pinaster" = expression(italic("P. pinaster")),
"P. sylvestris" = expression(italic("P. sylvestris"))
))+
labs(
title = "Boxplot of genetic load values with jittered points by species",
x = "",
y = "Realised genetic load"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle = 0, hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
# Display plot
print(boxplot_jitter_plot)
# Save to PDF
pdf("Results/all_species/Genetic_load/figures_new_estimates/GL_boxplot_jitter_all_species.pdf", width = 8, height = 6)
print(boxplot_jitter_plot)
dev.off()
## png
## 2
# one way anova to see if groups are differents
anova_result <- aov(GLr.Provean ~ Species, data = combined_data_GL)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 3 0.3358 0.11193 1043 <2e-16 ***
## Residuals 238 0.0256 0.00011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# They are so we can do a Tukey post hoc to see which groups are different
tukey_result <- TukeyHSD(anova_result)
tukey_result
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = GLr.Provean ~ Species, data = combined_data_GL)
##
## $Species
## diff lwr upr p adj
## Pinea-Taxus -0.031411220 -0.037716488 -0.025105951 0.0000000
## Pinaster-Taxus -0.096757285 -0.102531306 -0.090983263 0.0000000
## Sylvestris-Taxus -0.098055316 -0.103856714 -0.092253918 0.0000000
## Pinaster-Pinea -0.065346065 -0.070196766 -0.060495364 0.0000000
## Sylvestris-Pinea -0.066644096 -0.071527353 -0.061760840 0.0000000
## Sylvestris-Pinaster -0.001298031 -0.005472851 0.002876788 0.8523071
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris")
df_correlation_GL <- data.frame(
Species = character(),
Cor_Provean_PSD = numeric(),
pvalue_Provean_PSD = numeric()
)
for(x in 1:length(list_species)){
species <- list_species[x]
data <- get(paste0("GL_",species))
cor_Provean <- cor(data$Fst,data$GLr.Provean)
pval_Provean <- cor.test(data$Fst,data$GLr.Provean)
df_correlation_GL <- rbind(df_correlation_GL,data.frame(Species=species,Cor_Provean_PSD=cor_Provean,pvalue_Provean_PSD=pval_Provean$p.value))
}
df_gt_table <- df_correlation_GL %>%
dplyr::mutate(
`Provean ~ PSD` = paste0(round(Cor_Provean_PSD, 2), " (p=", signif(pvalue_Provean_PSD, 2), ")"),
) %>%
dplyr::select(Species, `Provean ~ PSD`)
# Create a nice gt table
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between genetic load and historical gene flow"
) %>%
cols_label(
Species = "Species",
`Provean ~ PSD` = "Provean ~ PSD",
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between genetic load and historical gene flow | |
| Species | Provean ~ PSD |
|---|---|
| Taxus | -0.45 (p=0.013) |
| Pinea | 0.45 (p=0.0014) |
| Pinaster | -0.32 (p=0.0035) |
| Sylvestris | -0.31 (p=0.0046) |
Genetic load values could be comparable across models because it’s a ratio so we could use raw values to compare across models
Genetic load values could be comparable across models because it’s a ratio so we could use raw values to compare across models. Therefore, we can run a linear models with the species as fixed factor to investigate the effect of historical gene flow on genetic load and if we have differences among species.
#merge all data
GL_Taxus$Species <- factor("Taxus")
GL_Pinea$Species <- factor("Pinea")
GL_Pinaster$Species <- factor("Pinaster")
GL_Sylvestris$Species <- factor("Sylvestris")
df_merge_species <- rbind(GL_Taxus,GL_Pinea,GL_Pinaster,GL_Sylvestris)
#Is there a significant difference between species?
anova_model <- aov(GLr.Provean ~ Species, data = df_merge_species)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 3 0.3358 0.11193 1043 <2e-16 ***
## Residuals 238 0.0256 0.00011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey_results <- TukeyHSD(anova_model)
tukey_results
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = GLr.Provean ~ Species, data = df_merge_species)
##
## $Species
## diff lwr upr p adj
## Pinea-Taxus -0.031411220 -0.037716488 -0.025105951 0.0000000
## Pinaster-Taxus -0.096757285 -0.102531306 -0.090983263 0.0000000
## Sylvestris-Taxus -0.098055316 -0.103856714 -0.092253918 0.0000000
## Pinaster-Pinea -0.065346065 -0.070196766 -0.060495364 0.0000000
## Sylvestris-Pinea -0.066644096 -0.071527353 -0.061760840 0.0000000
## Sylvestris-Pinaster -0.001298031 -0.005472851 0.002876788 0.8523071
Ratio_lm_Provean <- lm(GLr.Provean~Fst+Species, data=df_merge_species)
anova(Ratio_lm_Provean)
## Analysis of Variance Table
##
## Response: GLr.Provean
## Df Sum Sq Mean Sq F value Pr(>F)
## Fst 1 0.064404 0.064404 598.36 < 2.2e-16 ***
## Species 3 0.271431 0.090477 840.61 < 2.2e-16 ***
## Residuals 237 0.025509 0.000108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Ratio_lm_Provean)
##
## Call:
## lm(formula = GLr.Provean ~ Fst + Species, data = df_merge_species)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033727 -0.004316 -0.000147 0.002910 0.033465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.203272 0.002086 97.442 <2e-16 ***
## Fst 0.003264 0.005090 0.641 0.522
## SpeciesPinea -0.032182 0.002720 -11.833 <2e-16 ***
## SpeciesPinaster -0.096751 0.002234 -43.299 <2e-16 ***
## SpeciesSylvestris -0.097704 0.002311 -42.282 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01037 on 237 degrees of freedom
## Multiple R-squared: 0.9294, Adjusted R-squared: 0.9282
## F-statistic: 780 on 4 and 237 DF, p-value: < 2.2e-16
palette <- c("darkred","orange","lightblue","darkgreen")
#provean
ggplotRegression <- function (fit) {
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point(aes(color=Species)) +
scale_color_manual(values=c("darkred", "#E69F00", "#56B4E9","darkgreen"))+
stat_smooth(method = "lm", col = "red") +
labs(title = paste("R2 = ",signif(summary(fit)$r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)),
x = "Fst",
y = "Genetic load Provean")
}
plot_Provean <- ggplotRegression(Ratio_lm_Provean)
pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies.pdf");
print(plot_Provean);
dev.off()
## png
## 2
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)"
)
# Apply labels to the data
df_renamed <- df_merge_species
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)
shapes <- c(
"italic(T.~baccata)" = 8,
"italic(P.~pinea)" = 18,
"italic(P.~pinaster)" = 15,
"italic(P.~sylvestris)" = 17
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Realised genetic load")+
scale_y_continuous(
limits = c(0.05, 0.25),
breaks = seq(0.05, 0.25, by = 0.05)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new.pdf");
print(plot);
dev.off()
## png
## 2
plot <- ggplot(df_renamed, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Realised genetic load")+
scale_y_continuous(
limits = c(0.05, 0.25),
breaks = seq(0.05, 0.25, by = 0.05)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
print(plot);
dev.off()
## png
## 2
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Skip if not enough points
if (nrow(df_species) < 3) next
# Simple linear model
model <- lm(GLr.Provean ~ Fst, data = df_species)
summary_model <- summary(model)
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = coef(summary_model)[1, 1],
Slope = coef(summary_model)[2, 1],
R2 = summary_model$r.squared,
p_value = coef(summary_model)[2, 4]
)
)
}
# Inspect results
print(df_lm_results)
## Species Intercept Slope R2 p_value
## 1 italic(T.~baccata) 0.2140783 -0.06547539 0.20675345 0.013206130
## 2 italic(P.~pinea) 0.1627249 0.02454334 0.19987864 0.001443942
## 3 italic(P.~pinaster) 0.1123536 -0.03432457 0.09953515 0.003463933
## 4 italic(P.~sylvestris) 0.1065011 -0.01550811 0.09718620 0.004610150
gt_table_lm <- df_lm_results %>%
dplyr::mutate(
`GLr.Provean ~ Fst` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `GLr.Provean ~ Fst`) %>%
gt() %>%
tab_header(
title = "Linear model: GLr.Provean ~ Fst per species"
) %>%
cols_label(
Species = "Species",
`GLr.Provean ~ Fst` = "Model summary"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
table.width = pct(100)
)
gt_table_lm
| Linear model: GLr.Provean ~ Fst per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=-0.065, R²=0.21, p=0.013 |
| italic(P.~pinea) | slope=0.025, R²=0.2, p=0.0014 |
| italic(P.~pinaster) | slope=-0.034, R²=0.1, p=0.0035 |
| italic(P.~sylvestris) | slope=-0.016, R²=0.1, p=0.0046 |
We can remove the outliers
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)"
)
# Apply labels to the data
df_renamed <- df_merge_species
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_filtered <- df_renamed %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D")
shapes <- c(
"italic(T.~baccata)" = 8,
"italic(P.~pinea)" = 18,
"italic(P.~pinaster)" = 15,
"italic(P.~sylvestris)" = 17
)
# Plot with custom colors and shapes per species
plot <- ggplot(df_filtered, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Realised genetic load")+
scale_y_continuous(
limits = c(0.05, 0.25),
breaks = seq(0.05, 0.25, by = 0.05)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_outliers.pdf");
print(plot);
dev.off()
## png
## 2
plot <- ggplot(df_filtered, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5)+
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(P.~pinaster)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
labs(title = "Mixed model fit: One line per species",
x = expression(italic(F)[ST]),
y = "Realised genetic load")+
scale_y_continuous(
limits = c(0.05, 0.25),
breaks = seq(0.05, 0.25, by = 0.05)
)+
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_SE_wo_outliers.pdf");
print(plot);
dev.off()
## png
## 2
df_correlation_GD <- data.frame(
Species = character(),
cor_GDI_PSD = numeric(),
pvalue_GDI_PSD = numeric()
)
# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Pearson correlation
cor_result <- cor.test(df_species$Fst, df_species$GLr.Provean)
df_correlation_GD <- rbind(
df_correlation_GD,
data.frame(
Species = species,
cor_GL_PSD = cor_result$estimate,
pvalue_GL_PSD = cor_result$p.value
)
)
}
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Loop over unique species
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Fit simple linear model
model <- lm(GLr.Provean ~ Fst, data = df_species)
summary_model <- summary(model)
# Extract coefficients and stats
intercept <- coef(summary_model)[1, 1]
slope <- coef(summary_model)[2, 1]
p_value <- coef(summary_model)[2, 4]
r2 <- summary_model$r.squared
# Add to results table
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = intercept,
Slope = slope,
R2 = r2,
p_value = p_value
)
)
}
# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
left_join(df_lm_results, by = "Species")
# Format nicely for viewing
df_results_all %>%
dplyr::mutate(
`GLr.Provean ~ Fst` = paste0(
"slope=", round(Slope, 3),
", R²=", round(R2, 2),
", p=", signif(p_value, 2)
)
) %>%
dplyr::select(Species, `GLr.Provean ~ Fst`) %>%
gt() %>%
tab_header(
title = "Linear model: GLr.Provean ~ Fst per species"
) %>%
cols_label(
Species = "Species",
`GLr.Provean ~ Fst` = "Model summary"
)
| Linear model: GLr.Provean ~ Fst per species | |
| Species | Model summary |
|---|---|
| italic(T.~baccata) | slope=-0.065, R²=0.21, p=0.013 |
| italic(P.~pinea) | slope=0.025, R²=0.2, p=0.0014 |
| italic(P.~pinaster) | slope=-0.042, R²=0.12, p=0.0012 |
| italic(P.~sylvestris) | slope=-0.026, R²=0.18, p=9.8e-05 |
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`GLr.Provean ~ PSD` = paste0(round(cor_GL_PSD, 2), " (p=", signif(pvalue_GL_PSD, 2), ")")
) %>%
dplyr::select(Species, `GLr.Provean ~ PSD`)
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between genetic load and historical realised gene flow"
) %>%
cols_label(
Species = "Species",
`GLr.Provean ~ PSD` = "GLr.Provean ~ PSD"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between genetic load and historical realised gene flow | |
| Species | GLr.Provean ~ PSD |
|---|---|
| italic(T.~baccata) | -0.45 (p=0.013) |
| italic(P.~pinea) | 0.45 (p=0.0014) |
| italic(P.~pinaster) | -0.35 (p=0.0012) |
| italic(P.~sylvestris) | -0.42 (p=9.8e-05) |
list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
for(x in 1:length(list_species)){
name_species <- list_species[x]
data <- read.csv(paste0("Data/Species/Chapter2/GDI_past/GDI_",name_species,".csv"),sep=",",dec=",",h=T)
data <- data[, -1]
colnames(data) <- c("Population","GDI")
data$GDI <- as.numeric(data$GDI)
p <- ggplot(data, aes(x = GDI)) +
geom_density(fill = "steelblue", alpha = 0.6) +
labs(title = paste("GDI distribution -", name_species),
x = "GDI",
y = "Density") +
theme_minimal()
print(p)
assign(paste0("GDI_",name_species),data)
}
for(x in 1:length(list_species)){
species <- list_species[x]
data <- get(paste0("GDI_", species))
write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
combined_data <- data.frame()
for (x in 1:length(list_species)) {
species <- list_species[x]
data <- get(paste0("GDI_", species))
data$Species <- species
combined_data <- rbind(combined_data, data)
}
x_breaks <- seq(0, 0.3, by = 0.05)
x_limits <- c(0, 0.3)
for (species in list_species) {
data <- subset(combined_data, Species == species)
plot <- ggplot(data, aes(x = GDI)) +
geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) + # fixed x-axis
# No y limit — y axis is free
labs(
title = paste0("Distribution of GDI values \nacross populations of ", species),
x = "GDI",
y = "Count of populations"
) +
theme_bw() +
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5)
)
print(plot)
# Save to PDF
pdf(paste0("Results/all_species/GDI/figures/GDI_past_", species, ".pdf"))
print(plot)
dev.off()
}
combined_data$Species <- factor(
combined_data$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)
# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = GDI)) +
geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +#
scale_x_continuous(limits = c(0, 0.20), breaks = seq(0, 0.20, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
facet_wrap(~ Species, scales = "free_y", ncol = 2) +
labs(
title = "Distribution of GDI values \nacross populations all species ",
x = "GDI",
y = "Count of populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(size = 10)
)
# Show the plot
print(combined_plot)
pdf(paste0("Results/all_species/GDI/figures/GDI_past_all_species_", species, ".pdf"))
print(combined_plot)
dev.off()
## png
## 2
beeswarm_provean <- ggplot(combined_data, aes(x = Species, y = GDI, color = Species)) +
geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.7) +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +#
stat_summary(fun = median, geom = "crossbar", width = 0.5, color = "black", fatten = 0.07, size = 17) +
labs(
title = "Beeswarm Plot of GDI",
x = "Species",
y = "GDI"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none"
)
print(beeswarm_provean)
#pdf("Results/all_species/Genetic_load/figures/Provean/Beeswarm_GL_Provean_all_species.pdf")
#print(beeswarm_provean)
#dev.off()
#zscores
combined_data$GDI.z <- ave(combined_data$GDI, combined_data$Species,
FUN = function(x) scale(x)[,1])
beeswarm_snpeff_horiz <- ggplot(combined_data, aes(y = Species, x = GDI.z, color = Species)) +
geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray30") +
scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +
labs(
title = " zscores of GDI per Species",
x = "zscores GDI",
y = ""
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.y = element_text(size = 11)
)
print(beeswarm_snpeff_horiz)
combined_data_bis <- combined_data
combined_data_bis$Species_raw <- combined_data_bis$Species
species_labels <- c(
"Taxus" = "T. baccata",
"Pinea" = "P. pinea",
"Pinaster" = "P. pinaster",
"Ash" = "F. excelsior",
"Fagus_forg" = "F. sylvatica",
"Sylvestris" = "P. sylvestris"
)
# Apply labels
combined_data_bis$Species <- species_labels[combined_data_bis$Species_raw]
medians_graph <- combined_data_bis %>%
dplyr::group_by(Species_raw) %>%
summarise(median_GDI = median(GDI, na.rm = TRUE)) %>%
arrange(median_GDI)
ordered_species_labels <- species_labels
# Apply correct order to factors
combined_data_bis$Species <- factor(species_labels[combined_data_bis$Species_raw], levels = rev(ordered_species_labels))
medians_graph$Species_raw <- factor(species_labels[medians_graph$Species_raw], levels = rev(ordered_species_labels))
ridgeline_plot <- ggplot(combined_data_bis, aes(x = GDI, y = Species, fill = Species)) +
geom_density_ridges(scale = 1.2, alpha = 0.8, color = "black") +
scale_fill_manual(
values = c(
"T. baccata" = "darkred",
"P. pinea" = "darkgreen",
"P. pinaster" = "#56B4E9",
"P. sylvestris" = "#A0522D",
"F. sylvatica" = "#9467BD",
"F. excelsior" = "orange"
)
) +
geom_segment(
data = medians_graph,
aes(x = median_GDI, xend = median_GDI,
y = as.numeric(Species_raw) - 0.4,
yend = as.numeric(Species_raw) + 0.4),
inherit.aes = FALSE,
color = "black", size = 1.5
) +
labs(
title = "Density of GDI values per species (ordered by median)",
x = "GDI",
y = "Species"
) +
scale_y_discrete(
labels = c(
"F. excelsior" = expression(italic('F. excelsior')),
"F. sylvatica" = expression(italic('F. sylvatica')),
"P. sylvestris" = expression(italic('P. sylvestris')),
"P. pinaster" = expression(italic('P. pinaster')),
"P. pinea" = expression(italic('P. pinea')),
"T. baccata" = expression(italic('T. baccata'))
)
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
panel.grid.major = element_blank()
)
# Save plot
print(ridgeline_plot)
pdf("Results/all_species/GDI/figures/GDI_ridgeline_all_species.pdf", width = 8, height = 5)
print(ridgeline_plot)
dev.off()
## png
## 2
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
df_correlation_GD <- data.frame(
Species = character(),
Cor_GDI_PSD = numeric(),
pvalue_GDI_PSD = numeric()
)
for(x in 1:length(list_species)){
species <- list_species[x]
data_FST <- get(paste0("Bayescan_FST_pop_order_", species))
#GD
data_GDI <- get(paste0("GDI_",species))
data_tot <- merge(data_FST,data_GDI,"Population")
assign(paste0("df_GDI_PSD_",species),data_tot)
#GD
cor_GDI_PSD <- cor(data_tot$Fst,data_tot$GDI)
pvalue_GDI_PSD <- cor.test(data_tot$Fst,data_tot$GDI)
df_correlation_GD <- rbind(df_correlation_GD,data.frame(Species=species,cor_GDI_PSD=cor_GDI_PSD,pvalue_GDI_PSD=pvalue_GDI_PSD$p.value))
}
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`GDI ~ PSD` = paste0(round(cor_GDI_PSD, 2), " (p=", signif(pvalue_GDI_PSD, 2), ")")
) %>%
dplyr::select(Species, `GDI ~ PSD`)
# Create a nice gt table
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between GDI and historical gene flow"
) %>%
cols_label(
Species = "Species",
`GDI ~ PSD` = "GDI ~ PSD",
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between GDI and historical gene flow | |
| Species | GDI ~ PSD |
|---|---|
| Taxus | 0.52 (p=0.004) |
| Pinea | 0.55 (p=4.7e-05) |
| Pinaster | 0.4 (p=0.00013) |
| Sylvestris | -0.1 (p=0.39) |
| Ash | 0.48 (p=0.0061) |
| Fagus_forg | 0.55 (p=2.6e-05) |
#merge all data
df_GDI_PSD_Taxus$Species <- factor("Taxus")
df_GDI_PSD_Pinea$Species <- factor("Pinea")
df_GDI_PSD_Pinaster$Species <- factor("Pinaster")
df_GDI_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GDI_PSD_Ash$Species <- factor("Ash")
df_GDI_PSD_Fagus_forg$Species <- factor("Fagus_forg")
df_merge_species_GDI <- rbind(df_GDI_PSD_Taxus,df_GDI_PSD_Pinea,df_GDI_PSD_Pinaster,df_GDI_PSD_Sylvestris,df_GDI_PSD_Ash,df_GDI_PSD_Fagus_forg)
#linear model
Ratio_lm_GDI <- lm(GDI~Fst+Species, data=df_merge_species_GDI)
anova(Ratio_lm_GDI)
## Analysis of Variance Table
##
## Response: GDI
## Df Sum Sq Mean Sq F value Pr(>F)
## Fst 1 0.037616 0.037616 45.7630 6.446e-11 ***
## Species 5 0.036907 0.007381 8.9801 5.395e-08 ***
## Residuals 317 0.260565 0.000822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Ratio_lm_GDI)
##
## Call:
## lm(formula = GDI ~ Fst + Species, data = df_merge_species_GDI)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.061445 -0.017440 -0.005688 0.012303 0.122270
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.045937 0.005741 8.002 2.33e-14 ***
## Fst 0.097573 0.013665 7.140 6.40e-12 ***
## SpeciesPinea -0.037282 0.007474 -4.988 1.01e-06 ***
## SpeciesPinaster -0.025635 0.006175 -4.151 4.25e-05 ***
## SpeciesSylvestris -0.005529 0.006376 -0.867 0.386521
## SpeciesAsh -0.022085 0.007530 -2.933 0.003602 **
## SpeciesFagus_forg -0.023687 0.006856 -3.455 0.000626 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02867 on 317 degrees of freedom
## Multiple R-squared: 0.2224, Adjusted R-squared: 0.2077
## F-statistic: 15.11 on 6 and 317 DF, p-value: 3.203e-15
GDI plots with Fst
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Apply labels to the data
df_renamed <- df_merge_species_GDI
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
# Define colors and shapes using the same expressions
colors <- c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(P.~pinaster)" = "#56B4E9",
"italic(P.~sylvestris)" = "#A0522D",
"italic(F.~sylvatica)" = "#9467BD",
"italic(F.~excelsior)" = "orange"
)
shapes <- c(
"italic(T.~baccata)" = 8,
"italic(P.~pinea)" = 18,
"italic(P.~pinaster)" = 15,
"italic(P.~sylvestris)" = 17,
"italic(F.~sylvatica)" = 25,
"italic(F.~excelsior)" = 16
)
# Plot
plot <- ggplot(df_renamed, aes(x = Fst, y = GDI, color = Species)) +
geom_point(aes(shape = Species, fill = Species),
position = position_jitter(width = 0, height = 0.01),
alpha = 0.6, size = 2.5, show.legend = TRUE) +
geom_smooth(aes(color = Species, fill = Species), method = "lm", se = TRUE, linewidth = 1, alpha = 0.2, show.legend = FALSE)+
scale_color_manual(
values = colors,
labels = parse(text = levels(df_renamed$Species))
) +
scale_fill_manual(
values = colors,
guide = "none"
) +
scale_shape_manual(
values = shapes,
guide = "none"
) +
theme_minimal() +
labs(
title = "Mixed model fit: One line per species",
x = "Fst",
y = "GDI"
) +
scale_y_continuous(breaks = seq(0.00, 0.20, by = 0.05)) +
facet_wrap(~ Species, scales = "free", ncol = 2, labeller = label_parsed) +
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text()
) +
guides(
color = guide_legend(override.aes = list(shape = 16, fill = NA))
)
print(plot)
# Save to PDF
pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new.pdf")
print(plot)
dev.off()
## png
## 2
Atypical populations Fst included
# Rename species for labels
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
# Update species names for plotting
df_renamed$Species <- species_labels[df_renamed$Species]
# Set factor levels in desired order
df_renamed$Species <- factor(df_renamed$Species, levels = c(
"italic(T.~baccata)",
"italic(P.~pinea)",
"italic(P.~pinaster)",
"italic(F.~excelsior)",
"italic(F.~sylvatica)",
"italic(P.~sylvestris)"
))
plot <- ggplot(df_renamed, aes(x = Fst, y = GDI, shape = Species, fill = Species, color = Species)) +
geom_point( alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
labs(
title = "Mixed Model Fit: One Line per Species",
x = expression(italic(F)[ST]),
y = "GDI"
) +
scale_y_continuous(
limits = c(0, 0.15),
breaks = seq(0, 0.15, by = 0.05)
)+
facet_wrap(~ Species, scales = "free", ncol = 2, labeller = label_parsed) +
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new_wo_lazic.pdf");
print(plot);
dev.off()
## png
## 2
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
df_species <- df_renamed %>%
filter(Species == species)
# Skip species with too few points
if (nrow(df_species) < 3) next
# Simple linear model
model <- lm(GDI ~ Fst, data = df_species)
summary_model <- summary(model)
# Store coefficients and statistics
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = coef(summary_model)[1, 1],
Slope = coef(summary_model)[2, 1],
R2 = summary_model$r.squared,
p_value = coef(summary_model)[2, 4]
)
)
}
# Inspect results
print(df_lm_results)
## Species Intercept Slope R2 p_value
## 1 italic(T.~baccata) 0.01583221 0.28908235 0.267934165 4.029524e-03
## 2 italic(P.~pinea) 0.00862133 0.09766065 0.304852584 4.734091e-05
## 3 italic(P.~pinaster) 0.02393187 0.07418146 0.163770430 1.344716e-04
## 4 italic(P.~sylvestris) 0.04951160 -0.08555970 0.009490352 3.869296e-01
## 5 italic(F.~excelsior) 0.02179080 0.13313319 0.231777193 6.105838e-03
## 6 italic(F.~sylvatica) 0.01098127 0.37696348 0.305822674 2.572255e-05
Without atypical Fst populations
df_filtered <- df_renamed %>%
dplyr::group_by(Species) %>%
dplyr::mutate(
Q1 = quantile(Fst, 0.25, na.rm = TRUE),
Q3 = quantile(Fst, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower = Q1 - 2.4 * IQR,
upper = Q3 + 2.4 * IQR
) %>%
filter(Fst >= lower & Fst <= upper) %>%
ungroup() %>%
dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)
species_labels <- c(
"Taxus" = "italic(T.~baccata)",
"Pinea" = "italic(P.~pinea)",
"Pinaster" = "italic(P.~pinaster)",
"Sylvestris" = "italic(P.~sylvestris)",
"Ash" = "italic(F.~excelsior)",
"Fagus_forg" = "italic(F.~sylvatica)"
)
df_renamed$Species <- species_labels[df_renamed$Species]
# Create a new data frame with updated species names
df_filtered$Species <- species_labels[df_filtered$Species]
# Set Species as a factor with desired order for consistent colors/shapes/legend
df_filtered$Species <- factor(df_filtered$Species, levels = c(
"italic(T.~baccata)",
"italic(P.~pinea)",
"italic(P.~pinaster)",
"italic(F.~excelsior)",
"italic(F.~sylvatica)",
"italic(P.~sylvestris)"
))
plot <- ggplot(df_filtered, aes(x = Fst, y = GDI, shape = Species, fill = Species, color = Species)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
scale_color_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_fill_manual(values = c(
"italic(T.~baccata)" = "darkred",
"italic(P.~pinea)" = "darkgreen",
"italic(F.~excelsior)" = "orange",
"italic(P.~pinaster)" = "#56B4E9",
"italic(F.~sylvatica)" = "#9467BD",
"italic(P.~sylvestris)" = "#A0522D"
)) +
scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
"italic(P.~pinea)" = 19,
"italic(F.~excelsior)" = 19,
"italic(P.~pinaster)" = 19,
"italic(F.~sylvatica)" = 19,
"italic(P.~sylvestris)" = 19
)) +
theme_minimal() +
labs(
title = "Mixed Model Fit: One Line per Species",
x = expression(italic(F)[ST]),
y = "GDI"
) +
scale_y_continuous(
limits = c(0, 0.15),
breaks = seq(0, 0.15, by = 0.05)
)+
facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +
theme(
plot.title = element_text(hjust = 0.5),
legend.title = element_blank(),
legend.text = element_text(face = "italic"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12)
)
print(plot)
pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new_wo_outliers.pdf");
print(plot);
dev.off()
## png
## 2
df_correlation_GD <- data.frame(
Species = character(),
cor_GDI_PSD = numeric(),
pvalue_GDI_PSD = numeric()
)
# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Pearson correlation
cor_result <- cor.test(df_species$Fst, df_species$GDI)
df_correlation_GD <- rbind(
df_correlation_GD,
data.frame(
Species = species,
cor_GDI_PSD = cor_result$estimate,
pvalue_GDI_PSD = cor_result$p.value
)
)
}
df_lm_results <- data.frame(
Species = character(),
Intercept = numeric(),
Slope = numeric(),
R2 = numeric(),
p_value = numeric()
)
# Fit LM for each species independently on the filtered dataset
for (species in unique(df_filtered$Species)) {
df_species <- df_filtered %>%
filter(Species == species)
# Skip species with too few points
if (nrow(df_species) < 3) next
# Simple linear model
model <- lm(GDI ~ Fst, data = df_species)
summary_model <- summary(model)
# Store coefficients and stats
df_lm_results <- rbind(
df_lm_results,
data.frame(
Species = species,
Intercept = coef(summary_model)[1, 1],
Slope = coef(summary_model)[2, 1],
R2 = summary_model$r.squared,
p_value = coef(summary_model)[2, 4]
)
)
}
# Inspect the results
print(df_lm_results)
## Species Intercept Slope R2 p_value
## 1 italic(T.~baccata) 0.01583221 0.289082350 0.2679341646 4.029524e-03
## 2 italic(P.~pinea) 0.00862133 0.097660648 0.3048525843 4.734091e-05
## 3 italic(P.~pinaster) 0.02690169 0.050567199 0.0730301254 1.347991e-02
## 4 italic(F.~sylvatica) 0.04647749 -0.008438308 0.0000566826 9.474960e-01
## 5 italic(P.~sylvestris) 0.02899551 -0.087471083 0.0070759232 6.704174e-01
## 6 italic(F.~excelsior) 0.01528825 0.233432367 0.0516945384 1.243016e-01
df_gt_table <- df_correlation_GD %>%
dplyr::mutate(
`GDI ~ PSD` = paste0(round(cor_GDI_PSD, 2), " (p=", signif(pvalue_GDI_PSD, 2), ")")
) %>%
dplyr::select(Species, `GDI ~ PSD`)
gt_table <- df_gt_table %>%
gt() %>%
tab_header(
title = "Pearson's correlation between GDI and historical gene flow"
) %>%
cols_label(
Species = "Species",
`GDI ~ PSD` = "GDI ~ PSD"
) %>%
tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
) %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
table.width = pct(100)
)
gt_table
| Pearson's correlation between GDI and historical gene flow | |
| Species | GDI ~ PSD |
|---|---|
| italic(T.~baccata) | 0.52 (p=0.004) |
| italic(P.~pinea) | 0.55 (p=4.7e-05) |
| italic(P.~pinaster) | 0.27 (p=0.013) |
| italic(F.~sylvatica) | -0.01 (p=0.95) |
| italic(P.~sylvestris) | -0.08 (p=0.67) |
| italic(F.~excelsior) | 0.23 (p=0.12) |
Session info
packages <- c(
"ggplot2", "dplyr", "gt", "lme4",
"sf", "rnaturalearth","vegan", "ggbeeswarm",
"ggridges"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
Package = names(packages_used),
Version = sapply(packages_used, function(x) x$Version)
)
## Package Version
## ggplot2 ggplot2 3.5.2
## dplyr dplyr 1.1.4
## gt gt 0.11.0
## lme4 lme4 1.1-37
## sf sf 1.0-15
## rnaturalearth rnaturalearth 1.0.1
## vegan vegan 2.6-4
## ggbeeswarm ggbeeswarm 0.7.2
## ggridges ggridges 0.5.6