rm(list = ls())
knitr::opts_chunk$set(
    message = FALSE,
    warning = FALSE,
    cache = FALSE
)

This script will analyse the relationship between historical gene flow and genetic metrics of vulnerability to climate change across all species. Historical gene flow was approximated using the population-specific divergence index from BayeScan, and the three genetic indicators are genetic diversity, genetic load, and the genomic discrepancy index.

#Taxus 
meta_data_vcf_Taxus <- read.csv("Data/Species/Taxus_baccata/Samples/samples_taxus_baccata_adapcon_gentree.csv",h=T,sep=";",dec=",")
meta_data_pop_Taxus <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")

names(meta_data_pop_Taxus)[names(meta_data_pop_Taxus) == "Elevation.DEM_90m."] <- "Altitude"

#Pinea
load("Data/Species/Pinus_pineaV4/Individuals/meta_data_vcf.Rdata")
meta_data_vcf_Pinea <- data.frame(meta_data_vcf[,c(1,3)],Population=meta_data_vcf$Population_ID)

load("Data/Species/Pinus_pineaV4/Population/meta_data_pop.Rdata")
meta_data_pop_Pinea <- data.frame(Country=meta_data_pop[,c(7)],Population=meta_data_pop$Population_ID,N=meta_data_pop[,c(2)],meta_data_pop[,c(4,5,6)])

#Pinaster
load("Data/Species/Pinus_pinaster_draft/Individual/meta_data_vcf_final.Rdata")
meta_data_vcf_Pinaster <- data.frame(VCF_ID=meta_data_vcf_final[,c(1)],Population=meta_data_vcf_final$Population_ID)

load("Data/Species/Pinus_pinaster_draft/Population/meta_data_pop_final.Rdata")
meta_data_pop_Pinaster <- data.frame(Country=meta_data_pop_final[,c(3)],Population=meta_data_pop_final$Population_ID,N="Na",meta_data_pop_final[,c(4,5,6)])

#Sylvestris
meta_data_vcf_Sylvestris <- read.csv("Data/Species/Pinus_sylvestris/individual/Psyl_individuals.csv",h=T,sep=";",dec=",")
meta_data_vcf_Sylvestris <- data.frame(meta_data_vcf_Sylvestris[,c(1,3,2)])

load("Data/Species/Pinus_sylvestris/population/meta_data_pop_retained_order.Rdata")
meta_data_pop_Sylvestris <- data.frame(meta_data_pop_retained_order[,c(2,1)],N="Na",meta_data_pop_retained_order[,c(3,4)],Altitude="Na")

#Ash
load("Data/Species/Fraxinus_excelsior/Sample/meta_data_vcf_final_418_structure.Rdata")
meta_data_vcf_final_418_structure_format <- meta_data_vcf_final_418_structure
meta_data_vcf_final_418_structure_format$VCF_ID <- gsub("\\.", "-", meta_data_vcf_final_418_structure$VCF_ID)
meta_data_vcf_Ash <- data.frame(meta_data_vcf_final_418_structure_format[,c(1,3,2)])
meta_data_vcf_Ash_format <- data.frame(meta_data_vcf_final_418_structure[,c(1,3,2)])
load("Data/Species/Fraxinus_excelsior/Population/meta_data_pop_final_31_structure.Rdata")
meta_data_pop_Ash <- data.frame(meta_data_pop_final_31_structure[,c(3,2)],N="Na",meta_data_pop_final_31_structure[,c(4,5)],Altitude="Na")

#fagus forgenius
load("Data/Species/Fagus_sylvatica_forgenius/Sample/meta_data_vcf_final.Rdata")
meta_data_vcf_final$Country <- "NA"
meta_data_vcf_Fagus_forg <- data.frame(meta_data_vcf_final[,c(2,3,1)])

load("Data/Species/Fagus_sylvatica_forgenius/Population/meta_data_pop_ordered.Rdata")
meta_data_pop_Fagus_forg <- data.frame(meta_data_pop_ordered[,c(5,1)],N="Na",meta_data_pop_ordered[,c(3,2)],Altitude=meta_data_pop_ordered$ALTI)

1 Load gene flow estimates

1.1 PSD from BayeScan

list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")
list_species_name <- c("taxus","pinea","pinaster","sylvestris","Ash","Fagus_forg")
for( x in 1:length(list_species)){
  
  species <- list_species[x]
  species_name <- list_species_name[x]
  
data <- read.table(paste0("Results/all_species/FST_index/Data/Population_Structure/data/",species_name,"/pop_order_bayescan_",species_name,".txt"),h=T)
  
assign(paste0("popname_",species),data)
}

list_species <- c("Taxus_bis","PineaV4_bis","Pinaster_bis","Sylvestris","Ash","Fagus_forg")#,
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")#,

for(x in 1:length(list_species)){
  
  species <- list_species[x]
  species_name <- list_species_name[x]
  
  pop_name <- get(paste0("popname_",species_name))
  
  pop_name$BAYESCAN_POP <- paste0("Fst", pop_name$BAYESCAN_POP)
  
  df_bayecan <- read.table(paste0("Results/all_species/FST_index/Data/Population_Structure/results/Bayescan/FST_bayescan_",species,".txt"),h=T)
  
  df_bayesan_FST_mean <- data.frame(colMeans(df_bayecan[,-1]))

  df_bayesan_FST_mean$BAYESCAN_POP <- rownames(df_bayesan_FST_mean)
  df_bayesan_FST_mean <- data.frame(BAYESCAN_POP=df_bayesan_FST_mean$BAYESCAN_POP,FST=df_bayesan_FST_mean$colMeans.df_bayecan....1..)

  df_bayesan_FST_mean_merge <- merge(pop_name,df_bayesan_FST_mean,"BAYESCAN_POP")
#we can save to compare to hierfstat at the population level
Bayescan_FST_pop <- data.frame(Population=df_bayesan_FST_mean_merge$STRATA,Fst=df_bayesan_FST_mean_merge$FST)

Bayescan_FST_pop_order <- Bayescan_FST_pop[order(Bayescan_FST_pop$Population), ]

varname <- paste0("Bayescan_FST_pop_order_", species_name)

# Assign the object
assign(varname, Bayescan_FST_pop_order)

if(species =="Sylvestris"){
  
  Bayescan_FST_pop_order_Sylvestris <- Bayescan_FST_pop_order_Sylvestris %>%
    filter(Population != "CATA")
  
} else {
  
  # Save the dynamically named object to an .Rdata file
save(list = varname, file = paste0("Results/all_species/FST_index/", varname, ".Rdata"))
  }
}

1.1.1 Density plot

combined_data <- data.frame()

# Loop to process each species and combine data
for (x in 1:length(list_species_name)) {
  species <- list_species_name[x]
  data <- get(paste0("Bayescan_FST_pop_order_", species))
  data$Species <- species  # Add species name column
  combined_data <- rbind(combined_data, data)
  
  # Plot per species (optional, keeps your original individual plots)
  plot <- ggplot() + 
    geom_density(data = data, aes(x = Fst), fill = "lightblue", alpha = 0.8) +
    labs(
      title = paste0("Distribution of Population Specific Divergence Values\nacross populations of ", species),
      x = "Population Specific Divergence index", 
      y = "Density (number of populations)"
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.background = element_blank(),
      panel.background = element_blank(),
      strip.text = element_text(size = 11),
      plot.title = element_text(hjust = 0.5)
    )
  
  print(plot)
  
  # Save to PDF
  pdf(paste0("Results/all_species/FST_index/figures/Bayescan_PSD_", species, ".pdf"))
  print(plot)
  dev.off()
}

# Combined density plot
combined_plot <- ggplot(combined_data, aes(x = Fst, fill = Species)) +
  geom_density(alpha = 0.6) +
  labs(
    title = "Distribution of Population Specific Divergence Values\nacross species",
    x = "Population Specific Divergence index", 
    y = "Density (number of populations)"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_fill_brewer(palette = "Set1")  # Or use manual colors with scale_fill_manual

# Display combined plot
print(combined_plot)

# Save combined plot
pdf("Results/all_species/FST_index/figures/Bayescan_PSD_combined_all_species.pdf")
print(combined_plot)
dev.off()

## png 
##   2

for(x in 1:length(list_species_name)){
  
  species <- list_species_name[x]
    data <- get(paste0("Bayescan_FST_pop_order_", species))

    write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}

1.1.2 Barplot

x_breaks <- seq(0, 1, by = 0.3)
x_limits <- c(0, 1)

combined_data$Species <- factor(
  combined_data$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)

combined_plot <- ggplot(combined_data, aes(x = Fst)) +
  geom_histogram(binwidth = 0.01, color = "black", alpha = 0.8, aes(fill = Species)) +
  scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","#9467BD","orange","yellow")) +
  scale_x_continuous(limits = x_limits, breaks = x_breaks) +  
  facet_wrap(~ Species, scales = "free_y", ncol = 2) +
  labs(
    title = "Distribution of Population Specific Divergence Values across Species",
    x = "Population Specific Divergence index",
    y = "Count of populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    strip.text = element_text(size = 10)
  )

# Show facetted plot
print(combined_plot)

ggsave(
  filename = "Results/all_species/FST_index/figures/Bayescan_PSD_barplot_all_species_withfagus.pdf",
  plot = combined_plot, width = 10, height = 7
)

Paper size figures

x_breaks <- seq(0, 1, by = 0.3)
x_limits <- c(0, 1) 

species_colors <- c(
    "Taxus" = "darkred",
    "Pinea" = "darkgreen",
    "Pinaster" = "#56B4E9",
    "Sylvestris" = "#A0522D",
    "Ash" = "orange",
    "Fagus_forg" = "#9467BD")

# Loop through species and create individual plots
for (sp in levels(combined_data$Species)) {
  
  species_data <- subset(combined_data, Species == sp)
  
  individual_plot <- ggplot(species_data, aes(x = Fst)) +
    geom_histogram(binwidth = 0.01, color = "black", fill = species_colors[[sp]], alpha = 0.8) +
    scale_x_continuous(limits = x_limits, breaks = x_breaks) +
    labs(
      x = expression(italic(F)[ST]),
      y = "Populations"
    ) +
    theme_bw() +
  theme(
    axis.title.x = element_text(size = 25),
    axis.title.y = element_text(size = 25),
    axis.text.x = element_text(size = 24),
    axis.text.y = element_text(size = 24)
  )
  print(individual_plot)
  # Save each plot
  ggsave(
    filename = paste0(
      "Results/all_species/FST_index/figures/",
      "Bayescan_PSD_barplot_", sp, ".pdf"
    ),
    plot = individual_plot, width = 6, height = 5
  )
}

x_breaks <- seq(0, 1, by = 1)
x_limits <- c(0, 1)  

species_colors <- c(
    "Taxus" = "darkred",
    "Pinea" = "darkgreen",
    "Pinaster" = "#56B4E9",
    "Sylvestris" = "#A0522D",
    "Ash" = "orange",
    "Fagus_forg" = "#9467BD")
# Loop through species and create individual plots
for (sp in levels(combined_data$Species)) {
  
  species_data <- subset(combined_data, Species == sp)
  
  individual_plot <- ggplot(species_data, aes(x = Fst)) +
    geom_density(data = species_data, aes(x = Fst), fill = species_colors[[sp]], alpha = 0.8) +
    scale_x_continuous(limits = x_limits, breaks = x_breaks) +
    labs(
      x = expression(italic(F)[ST])
    ) +
    theme_bw() +
  theme(
    axis.title.x = element_text(size = 28),
    axis.text.x = element_text(size = 28),
    axis.text.y=element_blank(),axis.ticks=element_blank(),
    axis.title.y=element_blank()
    
  )
  print(individual_plot)
  # Save each plot
  ggsave(
    filename = paste0(
      "Results/all_species/FST_index/figures/",
      "Bayescan_PSD_boxplot_new_", sp, ".pdf"
    ),
    plot = individual_plot, width = 6, height = 5
  )
}

1.2 Nice maps with Fst values

Good figure for Taxus, Pinea, Sylvestris and Fagus

list_species_all <- c("Taxus","Pinea","Pinaster","Sylvestris","Fagus_forg","Ash")
list_color <- c("#FFCCCC","#b6ddb6","#ADD8E6","#DEB887","#e8defa","#FFE5B4")
list_point_size <- c(1.5,0.85,1.9,0.6,1.4,1.15)

for(x in seq_along(list_species_all)) {
  
  species <- list_species_all[x]
  shapefile_color <- list_color[x]
  meta_data <- get(paste0("meta_data_pop_", species))
  fst_data <- subset(combined_data, Species == species)
  fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
  fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
  fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
  fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
  fst_proj <- st_transform(fst_sf, crs = 3035)
  coords <- st_coordinates(fst_proj)
  fst_data_coord$x <- coords[, 1]
  fst_data_coord$y <- coords[, 2]
  shapefile <- get(paste0("shapefile_", species))
  admin <- ne_countries(scale = "medium", returnclass = "sf")
  admin_proj <- st_transform(admin, crs = 3035) 
  
  lab_no_trailing_zeros <- function(x) {
  sub("\\.?0+$", "", sprintf("%.2f", x))
  }
  
  # Plot
  map_pop <- ggplot() + 
    geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
    geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
    geom_point( data = fst_data_coord, aes(x = x, y = y, fill = Fst),
                shape = 21, 
                color = "black",
                size = 3.3, 
                alpha = 1 ) + 
    scale_fill_gradient(
  low = "#f7f5f5",
  high = "#E51932",
  labels = lab_no_trailing_zeros,
  breaks = scales::pretty_breaks(5)  
)+ # Gradient based on Fst
    scale_size_continuous(range = c(1, 5)) +
    coord_sf( 
      crs = 3035, 
      xlim = c(2379349, 7002838), 
      ylim = c(1029721, 4519987), 
      expand = FALSE ) + 
    labs( title = paste("Genetic Structure for", species), 
          x = "Longitude", y = "Latitude", 
          fill = expression(italic(F)[ST]) ) + 
    theme_minimal() + 
    theme( 
      panel.background = element_rect(fill = "azure", color = NA), 
      plot.title = element_text(face = "bold", hjust = 0.5) ) 
  
  print(map_pop) 
  pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_",species,".pdf"))
  print(map_pop)
  dev.off() 
  }

Good figure for Ash and Pinaster

list_species_all <- c("Taxus","Pinea","Pinaster","Sylvestris","Fagus_forg","Ash")
list_color <- c("#FFCCCC","#d9f2d9","#ADD8E6","#DEB887","#e8defa","#FFE5B4")
list_point_size <- c(1.5,0.85,1.9,0.6,1.4,1.15)

for(x in seq_along(list_species_all)) {
  
  species <- list_species_all[x]
  shapefile_color <- list_color[x]
  
  # Load metadata and FST data
  meta_data <- get(paste0("meta_data_pop_", species))
  fst_data <- subset(combined_data, Species == species)
  
  # Merge FST with coordinates
  fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
  fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
  fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
  
  # Convert to sf object and project
  fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
  fst_proj <- st_transform(fst_sf, crs = 3035)
  coords <- st_coordinates(fst_proj)
  fst_data_coord$x <- coords[,1]
  fst_data_coord$y <- coords[,2]
  
  min_val <- min(fst_data_coord$Fst, na.rm = TRUE)
  max_val <- max(fst_data_coord$Fst, na.rm = TRUE)
  
  # Compute raw step
  raw_step <- (max_val - min_val)/4
  
  # Choose a "nice" step from predefined options
  nice_steps <- c(0.01, 0.02, 0.05, 0.1, 0.2, 0.5)
  step <- nice_steps[which.min(abs(nice_steps - raw_step))]
  
  # Compute breaks
  break_start <- floor(min_val/step)*step
  break_end <- ceiling(max_val/step)*step
  breaks_nice <- seq(break_start, break_end, by = step)
  
  # Force exactly 5 breaks (4 classes)
  if(length(breaks_nice) < 5){
    breaks_nice <- seq(break_start, break_end, length.out = 5)
  }
  if(length(breaks_nice) > 5){
    breaks_nice <- breaks_nice[seq(1, length(breaks_nice), length.out = 5)]
  }
  
  # Create factor for reference
  fst_data_coord$Fst_class <- cut(fst_data_coord$Fst,
                                  breaks = breaks_nice,
                                  include.lowest = TRUE,
                                  right = TRUE)
  
  # Load shapefile and admin boundaries
  shapefile <- get(paste0("shapefile_", species))
  admin <- ne_countries(scale = "medium", returnclass = "sf")
  admin_proj <- st_transform(admin, crs = 3035)
  
  map_pop <- ggplot() +
    geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
    geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
    geom_point(
      data = fst_data_coord,
      aes(x = x, y = y, fill = Fst),  # Use actual FST for gradient
      shape = 21,
      color = "black",
      size = 3.3,
      alpha = 1
    ) +
    scale_fill_gradient(
      low = "#f7f5f5",
  high = "#E51932",
      breaks = breaks_nice  # Show exactly 4 intervals in legend
    ) +
    scale_size_continuous(range = c(1, 5)) +
    coord_sf(
      crs = 3035,
      xlim = c(2379349, 7002838),
      ylim = c(1029721, 4519987),
      expand = FALSE
    ) +
    labs(
      title = paste("Genetic Structure for", species),
      x = "Longitude",
      y = "Latitude",
      fill = expression(italic(F)[ST])
    ) +
    theme_minimal() +
    theme(
      panel.background = element_rect(fill = "azure", color = NA),
      plot.title = element_text(face = "bold", hjust = 0.5)
    )
  
  print(map_pop)
  
  pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_good_Ash_Pinaster_",species,".pdf"))
  print(map_pop)
  dev.off()
}

Plot for Sylvestris with all the populations

list_species_all <- c("Sylvestris")
list_color <- c("#DEB887")

for(x in seq_along(list_species_all)) {
  
  species <- list_species_all[x]
  shapefile_color <- list_color[x]
  meta_data <- get(paste0("meta_data_pop_", species))
  fst_data <- subset(combined_data, Species == species)
  
  fst_data_coord <- merge(fst_data, meta_data[, c(2,5,4)], by = "Population")
  fst_data_coord$Longitude <- as.numeric(fst_data_coord$Longitude)
  fst_data_coord$Latitude <- as.numeric(fst_data_coord$Latitude)
  
  fst_sf <- st_as_sf(fst_data_coord, coords = c("Longitude", "Latitude"), crs = 4326)
  fst_proj <- st_transform(fst_sf, crs = 3035)
  coords <- st_coordinates(fst_proj)
  fst_data_coord$x <- coords[, 1]
  fst_data_coord$y <- coords[, 2]
  
  shapefile <- get(paste0("shapefile_", species))
  admin <- ne_countries(scale = "medium", returnclass = "sf")
  admin_proj <- st_transform(admin, crs = 3035)
  
  # Plot
  map_pop <- ggplot() +
    geom_sf(data = admin_proj, fill = gray(0.92), size = 0.2, color = "white") +
    geom_sf(data = shapefile, fill = shapefile_color, color = "gray60", size = 0.2) +
    geom_point(
  data = fst_data_coord,
  aes(x = x, y = y, fill = Fst),
  shape = 21,                      
  color = "black",                
  size = 3.3,
  alpha = 1
) +
    scale_fill_gradient(low = "#f7f5f5", high = "#E51932") + 
    scale_size_continuous(range = c(1, 5)) +           
    coord_sf(
      crs = 3035,
      xlim = c(2379349, 9000000),
      ylim = c(1029721, 9000000),
      expand = FALSE
    ) +
    labs(
      title = paste("Genetic Structure for", species),
      x = "Longitude", y = "Latitude",
      fill = expression(italic(F)[ST])
    ) +
    theme_minimal() +
    theme(
      panel.background = element_rect(fill = "azure", color = NA),
      plot.title = element_text(face = "bold", hjust = 0.5)
    )
  
  print(map_pop)
  
  pdf(paste0("Results/all_species/FST_index/maps/Map_Fst_",species,"_supplementary_russian_pop.pdf"))
print(map_pop)
dev.off()
  
}

2 Genetic diversity

2.1 Load genetic diversity

list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")

for(x in 1:length(list_species)){

  name_species <- list_species[x]
  meta_data <- get(paste0("meta_data_pop_",name_species))
  
data <- read.csv(paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=",",dec=",",h=T)
data <- data[, -1]

data$Hs <- as.numeric(data$Hs)

data_full <- merge(data,meta_data[,c(2,4,5)],"Population")

assign(paste0("Genetic_diversity_",name_species),data_full)
}

for(x in 1:length(list_species_name)){
  species <- list_species_name[x]
    data <- get(paste0("Genetic_diversity_", species))

    write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}

combined_data <- data.frame()

for (x in 1:length(list_species)) {
  species <- list_species[x]
  data <- get(paste0("Genetic_diversity_", species))
  data$Species <- species
  combined_data <- rbind(combined_data, data)
}

x_breaks <- seq(0, 0.5, by = 0.05)
x_limits <- c(0, 0.5)

for (species in list_species) {
  data <- subset(combined_data, Species == species)
  
  plot <- ggplot(data, aes(x = Hs)) + 
    geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
    scale_x_continuous(limits = x_limits, breaks = x_breaks) +  # fixed x-axis
    # No y limit — y axis is free
    labs(
      title = paste0("Distribution of Hs values \nacross populations of ", species),
      x = "Hs genetic diversity index", 
      y = "Count of populations"
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.background = element_blank(),
      panel.background = element_blank(),
      strip.text = element_text(size = 11),
      plot.title = element_text(hjust = 0.5)
    )
  
  print(plot)
  
   # Save to PDF
  pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_", species, ".pdf"))
  print(plot)
  dev.off()
}

combined_data$Species <- factor(
  combined_data$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)

# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = Hs)) +
  geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
      scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","#9467BD","orange","yellow")) +#,"darkviolet"
  scale_x_continuous(limits = c(0, 0.35), breaks = seq(0, 0.35, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
  facet_wrap(~ Species, scales = "free_y", ncol = 2) +  # Two columns: top = taxus & pinea; bottom = pinaster & sylvestris
  labs(
    title = "Distribution of Hs values \nacross populations all species ",
    x = "Hs genetic diversity index",
    y = "Count of populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    strip.text = element_text(size = 10)
  )

# Show the plot
print(combined_plot)

pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_all_species_", species, ".pdf"))
  print(combined_plot)
  dev.off()

## png 
##   2

combined_data_v1 <- combined_data
combined_data_v1$Species <- factor(
  combined_data_v1$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Ash", "Fagus_forg","Sylvestris"),
  labels = c("T. baccata", "P. pinea", "P. pinaster", "F. excelsior", "F. sylvatica", "P. sylvestris")
)

# Boxplot with jittered points overlaid
boxplot_jitter_plot <- ggplot(combined_data_v1, aes(x = Species, y = Hs, fill = Species)) +
  geom_jitter(aes(color = Species), width = 0.2, alpha = 0.6, size = 1.5) + 
  geom_boxplot(outlier.shape = NA, alpha = 0.6) +
  scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9","orange","#9467BD","#A0522D")) +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9","orange","#9467BD","#A0522D")) +
  scale_x_discrete(labels = c(
  "T. baccata" = expression(italic("T. baccata")),
  "P. pinea" = expression(italic("P. pinea")),
  "P. pinaster" = expression(italic("P. pinaster")),
  "F. excelsior" = expression(italic("F. excelsior")),
  "F. sylvatica" = expression(italic("F. sylvatica")),
  "P. sylvestris" = expression(italic("P. sylvestris"))
))+
  labs(
    title = "Boxplot of Hs values with jittered points by species",
    x = "Species",
    y = "Hs genetic diversity index"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Display plot
print(boxplot_jitter_plot)

# Save to PDF
pdf("Results/all_species/Genetic_diversity/figures/Hs_boxplot_jitter_all_species.pdf", width = 8, height = 6)
print(boxplot_jitter_plot)
dev.off()

## png 
##   2

Cumulative distribution functions genetic diversity

combined_data_bis <- combined_data

combined_data_bis$Species <- factor(
  combined_data_bis$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Ash", "Fagus_forg","Sylvestris"),
  labels = c("T. baccata", "P. pinea", "P. pinaster", "F. excelsior", "F. sylvatica", "P. sylvestris")
)

species_labels <- c(
  "T. baccata" = "italic(T.~baccata)",
  "P. pinea" = "italic(P.~pinea)",
  "P. pinaster" = "italic(P.~pinaster)",
  "F. excelsior" = "italic(F.~excelsior)",
  "F. sylvatica" = "italic(F.~sylvatica)",
  "P. sylvestris" = "italic(P.~sylvestris)"
)

cum_plot <- ggplot(combined_data_bis, aes(x = scale(Hs), colour = Species)) +
  stat_ecdf(
    aes(y = after_stat(y * 100)),
    geom = "line",
    linewidth = 1.15
  ) +
  scale_color_manual(
    values = c(
      "darkred", "darkgreen", "#56B4E9",
      "orange", "#9467BD", "#A0522D"
    ),
    labels = parse(text = species_labels)
  ) +
  scale_y_continuous(
    limits = c(0, 100),
    breaks = seq(0, 100, 20)
  ) +
  labs(
    x = "Scaled genetic diversity (Hs)",
    y = "Cumulative percent",
    colour = "Species"
  ) +
   theme_bw() +
  theme(
    axis.title.x = element_text(size = 14),
    axis.text.x = element_text(size = 13),
    axis.title.y = element_text(size = 14),
    axis.text.y = element_text(size = 13),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 13),
    panel.grid.minor = element_blank(),
  panel.grid.major = )

plot(cum_plot)

# Save to PDF
pdf("Results/all_species/Genetic_diversity/figures/Hs_cumulativeplot_all_species.pdf", width = 8, height = 6)
print(cum_plot)
dev.off()

## png 
##   2

2.2 Association with gene flow

2.2.1 Pearson correlation

2.2.1.1 Species per spcies

list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")

df_correlation_GD <- data.frame(
  Species = character(),
  Cor_Hs_PSD = numeric(),
  pvalue_Hs_PSD = numeric(),
  Cor_Hs_Longitude = numeric(),
  pvalue_Hs_Longitude = numeric(),
  Cor_Hs_Latitude = numeric(),
  pvalue_Hs_Latitude = numeric()
)

for(x in 1:length(list_species)){
  
  species <- list_species[x]

  data_FST <- get(paste0("Bayescan_FST_pop_order_", species))
 
 #GD
  data_GD <- get(paste0("Genetic_diversity_",species))
  data_tot <- merge(data_FST,data_GD,"Population")
  data_tot$Longitude <- as.numeric(data_tot$Longitude)
  data_tot$Latitude <- as.numeric(data_tot$Latitude)
  
  assign(paste0("df_GD_PSD_",species),data_tot)

 #GD
  cor_Hs_PSD <- cor(data_tot$Fst,data_tot$Hs)
  pvalue_Hs_PSD <- cor.test(data_tot$Fst,data_tot$Hs)
  
  cor_Hs_Longitude <- cor(data_tot$Longitude,data_tot$Hs)
  pvalue_Hs_Longitude <- cor.test(data_tot$Longitude,data_tot$Hs)

  cor_Hs_Latitude <- cor(data_tot$Latitude,data_tot$Hs)
  pvalue_Hs_Latitude <- cor.test(data_tot$Latitude,data_tot$Hs)

  df_correlation_GD <- rbind(df_correlation_GD,data.frame(Species=species,Cor_Hs_PSD=cor_Hs_PSD,pvalue_Hs_PSD=pvalue_Hs_PSD$p.value,Cor_Hs_Longitude=cor_Hs_Longitude,pvalue_Hs_Longitude=pvalue_Hs_Longitude$p.value,Cor_Hs_Latitude=cor_Hs_Latitude,pvalue_Hs_Latitude=pvalue_Hs_Latitude$p.value))
}

df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `Hs ~ PSD` = paste0(round(Cor_Hs_PSD, 2), " (p=", signif(pvalue_Hs_PSD, 2), ")"),
     `Hs ~ Longitude` = paste0(round(Cor_Hs_Longitude, 2), " (p=", signif(pvalue_Hs_Longitude, 2), ")"),
     `Hs ~ Latitude` = paste0(round(Cor_Hs_Latitude, 2), " (p=", signif(pvalue_Hs_Latitude, 2), ")")
  ) %>%
  dplyr::select(Species, `Hs ~ PSD`,`Hs ~ Longitude`,`Hs ~ Latitude`)

# Create a nice gt table
gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between genetic diversity and historical gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `Hs ~ PSD` = "Hs ~ PSD",
    `Hs ~ Longitude` = "Hs ~ Longitude",
    `Hs ~ Latitude` = "Hs ~ Latitude",
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )
gt_table

Species	Hs ~ PSD	Hs ~ Longitude	Hs ~ Latitude
Pearson's correlation between genetic diversity and historical gene flow
Taxus	-0.77 (p=1.1e-06)	0.46 (p=0.012)	0.27 (p=0.16)
Pinea	-0.93 (p=7.4e-22)	-0.37 (p=0.0088)	-0.05 (p=0.75)
Pinaster	-0.72 (p=1.1e-14)	-0.29 (p=0.0072)	0.32 (p=0.0028)
Sylvestris	-0.69 (p=7.8e-13)	0.45 (p=3.1e-05)	0.42 (p=0.00011)
Ash	-0.66 (p=6.3e-05)	0.09 (p=0.63)	0.3 (p=0.1)
Fagus_forg	-0.94 (p=3.7e-24)	0.46 (p=0.00069)	0.08 (p=0.58)

2.2.1.2 Combined species

df_all <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Fagus_forg,df_GD_PSD_Ash)

#std
cor_GDstd_all <- cor(df_all$Fst,df_all$Hs)
pval_GDstd_all <- cor.test(df_all$Fst,df_all$Hs)

cor_GDstd_all

## [1] -0.6259541

pval_GDstd_all$p.value

## [1] 1.197892e-36

Overall, combining the genetic diversity provided a very consistent results comapred to individual species with an overall negative Pearson’s correlation of -0.68 associated to a 6.23 e-34 pvalue.

2.2.2 Linear models

#merge all data
df_GD_PSD_Taxus$Species <- factor("Taxus")
df_GD_PSD_Pinea$Species <- factor("Pinea")
df_GD_PSD_Pinaster$Species <- factor("Pinaster")
df_GD_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GD_PSD_Ash$Species <- factor("Ash")
df_GD_PSD_Fagus_forg$Species <- factor("Fagus_forg")

df_merge_species <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Ash,df_GD_PSD_Fagus_forg)

#linear model
Ratio_lm_GD <- lm(Hs~Fst+Species, data=df_merge_species)

anova(Ratio_lm_GD)

## Analysis of Variance Table
## 
## Response: Hs
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Fst         1 0.33069 0.33069  770.13 < 2.2e-16 ***
## Species     5 0.37718 0.07544  175.68 < 2.2e-16 ***
## Residuals 317 0.13612 0.00043                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(Ratio_lm_GD)

## 
## Call:
## lm(formula = Hs ~ Fst + Species, data = df_merge_species)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.063576 -0.012525 -0.001061  0.007308  0.085494 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.217851   0.004149  52.502  < 2e-16 ***
## Fst               -0.278356   0.009877 -28.183  < 2e-16 ***
## SpeciesPinea       0.123006   0.005402  22.769  < 2e-16 ***
## SpeciesPinaster    0.046559   0.004463  10.432  < 2e-16 ***
## SpeciesSylvestris  0.086207   0.004608  18.708  < 2e-16 ***
## SpeciesAsh         0.038909   0.005442   7.149 6.05e-12 ***
## SpeciesFagus_forg  0.025019   0.004956   5.049 7.52e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02072 on 317 degrees of freedom
## Multiple R-squared:  0.8387, Adjusted R-squared:  0.8357 
## F-statistic: 274.8 on 6 and 317 DF,  p-value: < 2.2e-16

#ratio per species
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")

all_df_lm_results_Hs_fst <- data.frame(
    Species = character(),
    Intercept = numeric(),
    Slope = numeric(),
    R2 = numeric(),
    p_value = numeric()
)

for(x in 1:length(list_species)){
  
  species <- list_species[x]
  df <- get(paste0("df_GD_PSD_",species))
  
  Ratio_lm_GD <- lm(Hs~Fst, data=df)
  anova(Ratio_lm_GD)
 summary_model <- summary(Ratio_lm_GD)
  
  # Extract coefficients and stats
  intercept <- coef(summary_model)[1, 1]
  slope <- coef(summary_model)[2, 1]
  p_value <- coef(summary_model)[2, 4]
  r2 <- summary_model$r.squared
  
  # Add to results table
  all_df_lm_results_Hs_fst <- rbind(
    all_df_lm_results_Hs_fst,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope = slope,
      R2 = r2,
      p_value = p_value))
  
}

all_df_lm_results_Hs_fst_long_lat <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope_Fst = numeric(),
  R2_partial_Fst = numeric(),
  p_value_Fst = numeric(),
  R2_geo = numeric(),
  R2_full = numeric(),
  Pvalue_long = numeric(),
  Pvalue_lat = numeric()
)

for (x in seq_along(list_species)) {

  species <- list_species[x]
  df <- get(paste0("df_GD_PSD_", species))

  ## Geography-only model
  model_geo <- lm(Hs ~ Longitude + Latitude, data = df)
  summary_geo <- summary(model_geo)

  ## Full model (geography + Fst)
  model_full <- lm(Hs ~ Longitude + Latitude + Fst, data = df)
  summary_full <- summary(model_full)

  ## Partial R2 for Fst
  R2_partial_Fst <- summary_full$r.squared - summary_geo$r.squared

  ## Extract coefficients for Fst
  intercept <- coef(summary_full)[1, 1]
  slope_Fst <- coef(summary_full)["Fst", 1]
  p_value_Fst <- coef(summary_full)["Fst", 4]

  ## Geography stats
  R2_geo <- summary_geo$r.squared
  R2_full <- summary_full$r.squared
  p_value_long <- coef(summary_geo)["Longitude", 4]
  p_value_lat  <- coef(summary_geo)["Latitude", 4]

  ## Store results
  all_df_lm_results_Hs_fst_long_lat <- rbind(
    all_df_lm_results_Hs_fst_long_lat,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope_Fst = slope_Fst,
      R2_partial_Fst = R2_partial_Fst,
      p_value_Fst = p_value_Fst,
      R2_geo = R2_geo,
      R2_full = R2_full,
      Pvalue_long = p_value_long,
      Pvalue_lat = p_value_lat
    )
  )
}

View(all_df_lm_results_Hs_fst_long_lat)

Excluding atypical populations in term of Fst:

#merge all data
df_GD_PSD_Taxus$Species <- factor("Taxus")
df_GD_PSD_Pinea$Species <- factor("Pinea")
df_GD_PSD_Pinaster$Species <- factor("Pinaster")
df_GD_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GD_PSD_Ash$Species <- factor("Ash")
df_GD_PSD_Fagus_forg$Species <- factor("Fagus_forg")

df_merge_species <- rbind(df_GD_PSD_Taxus,df_GD_PSD_Pinea,df_GD_PSD_Pinaster,df_GD_PSD_Sylvestris,df_GD_PSD_Ash,df_GD_PSD_Fagus_forg)

df_merge_species_bis <- df_merge_species %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

#ratio per species
list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")

all_df_lm_results_Hs_fst_wo_outliers <- data.frame(
    Species = character(),
    Intercept = numeric(),
    Slope = numeric(),
    R2 = numeric(),
    p_value = numeric()
)

for(x in 1:length(list_species)){
  
  
  species <- list_species[x]
  df <- df_merge_species_bis %>% filter(Species == species)
  
  Ratio_lm_GD <- lm(Hs~Fst, data=df)
  anova(Ratio_lm_GD)
 summary_model <- summary(Ratio_lm_GD)
  
  # Extract coefficients and stats
  intercept <- coef(summary_model)[1, 1]
  slope <- coef(summary_model)[2, 1]
  p_value <- coef(summary_model)[2, 4]
  r2 <- summary_model$r.squared
  
  # Add to results table
  all_df_lm_results_Hs_fst_wo_outliers <- rbind(
    all_df_lm_results_Hs_fst_wo_outliers,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope = slope,
      R2 = r2,
      p_value = p_value))
  
}

View(all_df_lm_results_Hs_fst_wo_outliers)

all_df_lm_results_Hs_fst_long_lat_wo_outliers <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope_Fst = numeric(),
  R2_partial_Fst = numeric(),
  p_value_Fst = numeric(),
  R2_geo = numeric(),
  R2_full = numeric(),
  Pvalue_long = numeric(),
  Pvalue_lat = numeric()
)

for (x in seq_along(list_species)) {

  species <- list_species[x]
  df <- df_merge_species_bis %>% filter(Species == species)

  ## Geography-only model
  model_geo <- lm(Hs ~ Longitude + Latitude, data = df)
  summary_geo <- summary(model_geo)

  ## Full model (geography + Fst)
  model_full <- lm(Hs ~ Longitude + Latitude + Fst, data = df)
  summary_full <- summary(model_full)

  ## Partial R2 for Fst
  R2_partial_Fst <- summary_full$r.squared - summary_geo$r.squared

  ## Extract coefficients for Fst
  intercept <- coef(summary_full)[1, 1]
  slope_Fst <- coef(summary_full)["Fst", 1]
  p_value_Fst <- coef(summary_full)["Fst", 4]

  ## Geography stats
  R2_geo <- summary_geo$r.squared
  R2_full <- summary_full$r.squared
  p_value_long <- coef(summary_geo)["Longitude", 4]
  p_value_lat  <- coef(summary_geo)["Latitude", 4]

  ## Store results
  all_df_lm_results_Hs_fst_long_lat_wo_outliers <- rbind(
    all_df_lm_results_Hs_fst_long_lat_wo_outliers,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope_Fst = slope_Fst,
      R2_partial_Fst = R2_partial_Fst,
      p_value_Fst = p_value_Fst,
      R2_geo = R2_geo,
      R2_full = R2_full,
      Pvalue_long = p_value_long,
      Pvalue_lat = p_value_lat
    )
  )
}

View(all_df_lm_results_Hs_fst_long_lat_wo_outliers)

Without atypical populations regarding Fst

df_renamed <- df_merge_species %>%
  filter(Species != "Fagus")

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
  "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"
)

# Apply labels to the data
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))

df_renamed$Species <- factor(df_renamed$Species, levels = c(
  "italic(T.~baccata)",
  "italic(P.~pinea)",
  "italic(P.~pinaster)",
 "italic(F.~excelsior)",
  "italic(F.~sylvatica)",
  "italic(P.~sylvestris)"
))


df_renamed <- df_renamed %>%
  group_by(Species) %>%
  mutate(st_Hs = as.numeric(scale(Hs))) %>%
  ungroup()

df_filtered <- df_renamed %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

# Define colors and shapes using the same expressions
colors <- c(
  "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~excelsior)" = "orange",
  "italic(F.~sylvatica)" = "#9467BD"
)

shapes <- c(
  "italic(T.~baccata)" = 8,
  "italic(P.~pinea)" = 18,
  "italic(P.~pinaster)" = 15,
  "italic(P.~sylvestris)" = 17,
  "italic(F.~excelsior)" = 16,
  "italic(F.~sylvatica)" = 25
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_filtered, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~excelsior)" = "orange",
  "italic(F.~sylvatica)" = "#9467BD"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~excelsior)" = "orange",
  "italic(F.~sylvatica)" = "#9467BD"
  )) +
   scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19,
  "italic(F.~excelsior)" = 19,
  "italic(F.~sylvatica)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Hs")+
    scale_y_continuous(
  limits = c(-4.5, 3),
  breaks = seq(-4.5, 3, by = 1.5)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_allspecies_new_wo_outliers.pdf");
  print(plot);
  dev.off()

## png 
##   2

plot <- ggplot(df_filtered, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
 geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~excelsior)" = "orange",
  "italic(F.~sylvatica)" = "#9467BD"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~excelsior)" = "orange",
  "italic(F.~sylvatica)" = "#9467BD"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19,
  "italic(F.~excelsior)" = 19,
  "italic(F.~sylvatica)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Hs")+
  scale_y_continuous(
  limits = c(-4.5, 3),
  breaks = seq(-4.5, 3, by = 1.5)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GD/figures/GD_relationship_allspecies_new_wo_SE_wo_outliers.pdf");
  print(plot);
  dev.off()

## png 
##   2

df_correlation_GD <- data.frame(
  Species = character(),
  cor_GDI_PSD = numeric(),
  pvalue_GDI_PSD = numeric()
)

# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Pearson correlation
  cor_result <- cor.test(df_species$Fst, df_species$st_Hs)
  
  df_correlation_GD <- rbind(
    df_correlation_GD,
    data.frame(
      Species = species,
      cor_st_Hs_PSD = cor_result$estimate,
      pvalue_st_Hs_PSD = cor_result$p.value
    )
  )
}

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Loop over unique species
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Fit simple linear model
  model <- lm(st_Hs ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  # Extract coefficients and stats
  intercept <- coef(summary_model)[1, 1]
  slope <- coef(summary_model)[2, 1]
  p_value <- coef(summary_model)[2, 4]
  r2 <- summary_model$r.squared
  
  # Add to results table
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope = slope,
      R2 = r2,
      p_value = p_value
    )
  )
}

# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
  left_join(df_lm_results, by = "Species")

# Format nicely for viewing
df_results_all %>%
  dplyr::mutate(
    `st_Hs ~ Fst` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `st_Hs ~ Fst`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: st_Hs ~ Fst per species"
  ) %>%
  cols_label(
    Species = "Species",
    `st_Hs ~ Fst` = "Model summary"
  )

Species	Model summary
Linear model: st_Hs ~ Fst per species
italic(T.~baccata)	slope=-11.135, R²=0.59, p=1.1e-06
italic(P.~pinea)	slope=-3.898, R²=0.87, p=7.4e-22
italic(P.~pinaster)	slope=-5.922, R²=0.46, p=1.9e-12
italic(P.~sylvestris)	slope=-17.024, R²=0.33, p=2.5e-08
italic(F.~excelsior)	slope=-15.815, R²=0.13, p=0.065
italic(F.~sylvatica)	slope=-17.607, R²=0.56, p=1.2e-09

  df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `st_Hs ~ PSD` = paste0(round(cor_st_Hs_PSD, 2), " (p=", signif(pvalue_st_Hs_PSD, 2), ")")
  ) %>%
  dplyr::select(Species, `st_Hs ~ PSD`)

gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between st_Hs and historical realised gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `st_Hs ~ PSD` = "st_Hs ~ PSD"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )

gt_table

Species	st_Hs ~ PSD
Pearson's correlation between st_Hs and historical realised gene flow
italic(T.~baccata)	-0.77 (p=1.1e-06)
italic(P.~pinea)	-0.93 (p=7.4e-22)
italic(P.~pinaster)	-0.68 (p=1.9e-12)
italic(P.~sylvestris)	-0.58 (p=2.5e-08)
italic(F.~excelsior)	-0.35 (p=0.065)
italic(F.~sylvatica)	-0.75 (p=1.2e-09)

With atypical populations regarding Fst

df_renamed <- df_merge_species %>%
  filter(Species != "Fagus")

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
  "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"
)

# Apply labels to the data
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))

df_renamed$Species <- factor(df_renamed$Species, levels = c(
  "italic(T.~baccata)",
  "italic(P.~pinea)",
  "italic(P.~pinaster)",
 "italic(F.~excelsior)",
  "italic(F.~sylvatica)",
  "italic(P.~sylvestris)"
))

df_renamed <- df_renamed %>%
  group_by(Species) %>%
  mutate(st_Hs = as.numeric(scale(Hs))) %>%
  ungroup()

# Define colors and shapes using the same expressions
colors <- c(
  "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
)

shapes <- c(
  "italic(T.~baccata)" = 8,
  "italic(P.~pinea)" = 18,
  "italic(P.~pinaster)" = 15,
  "italic(P.~sylvestris)" = 17,
  "italic(F.~sylvatica)" = 25,
  "italic(F.~excelsior)" = 16
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
  )) +
   scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19,
  "italic(F.~sylvatica)" = 19,
  "italic(F.~excelsior)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Hs")+
    scale_y_continuous(
  limits = c(-4.5, 3),
  breaks = seq(-4.5, 3, by = 1.5)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_allspecies_new.pdf");
  print(plot);
  dev.off()

## png 
##   2

plot <- ggplot(df_renamed, aes(x = Fst, y = st_Hs, shape = Species, fill = Species, color = Species)) +
 geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19,
  "italic(F.~sylvatica)" = 19,
  "italic(F.~excelsior)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Hs")+
  scale_y_continuous(
  limits = c(-4.5, 3),
  breaks = seq(-4.5, 3, by = 1.5)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GD/figures/GD_relationship_allspecies_new_wo_SE.pdf");
  print(plot);
  dev.off()

## png 
##   2

df_correlation_GD <- data.frame(
  Species = character(),
  cor_GDI_PSD = numeric(),
  pvalue_GDI_PSD = numeric()
)

# Loop over unique species in the filtered dataset
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Pearson correlation
  cor_result <- cor.test(df_species$Fst, df_species$st_Hs)
  
  df_correlation_GD <- rbind(
    df_correlation_GD,
    data.frame(
      Species = species,
      cor_st_Hs_PSD = cor_result$estimate,
      pvalue_st_Hs_PSD = cor_result$p.value
    )
  )
}

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Loop over unique species
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Fit simple linear model
  model <- lm(st_Hs ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  # Extract coefficients and stats
  intercept <- coef(summary_model)[1, 1]
  slope <- coef(summary_model)[2, 1]
  p_value <- coef(summary_model)[2, 4]
  r2 <- summary_model$r.squared
  
  # Add to results table
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope = slope,
      R2 = r2,
      p_value = p_value
    )
  )
}

# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
  left_join(df_lm_results, by = "Species")

# Format nicely for viewing
df_results_all %>%
  dplyr::mutate(
    `st_Hs ~ Fst` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `st_Hs ~ Fst`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: st_Hs ~ Fst per species"
  ) %>%
  cols_label(
    Species = "Species",
    `st_Hs ~ Fst` = "Model summary"
  )

Species	Model summary
Linear model: st_Hs ~ Fst per species
italic(T.~baccata)	slope=-11.135, R²=0.59, p=1.1e-06
italic(P.~pinea)	slope=-3.898, R²=0.87, p=7.4e-22
italic(P.~pinaster)	slope=-5.949, R²=0.52, p=1.1e-14
italic(P.~sylvestris)	slope=-17.939, R²=0.48, p=7.8e-13
italic(F.~excelsior)	slope=-8.396, R²=0.43, p=6.3e-05
italic(F.~sylvatica)	slope=-26.117, R²=0.88, p=3.7e-24

  df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `st_Hs ~ PSD` = paste0(round(cor_st_Hs_PSD, 2), " (p=", signif(pvalue_st_Hs_PSD, 2), ")")
  ) %>%
  dplyr::select(Species, `st_Hs ~ PSD`)

gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between st_Hs and historical realised gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `st_Hs ~ PSD` = "st_Hs ~ PSD"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )

gt_table

Species	st_Hs ~ PSD
Pearson's correlation between st_Hs and historical realised gene flow
italic(T.~baccata)	-0.77 (p=1.1e-06)
italic(P.~pinea)	-0.93 (p=7.4e-22)
italic(P.~pinaster)	-0.72 (p=1.1e-14)
italic(P.~sylvestris)	-0.69 (p=7.8e-13)
italic(F.~excelsior)	-0.66 (p=6.3e-05)
italic(F.~sylvatica)	-0.94 (p=3.7e-24)

2.2.3 Graph Hs latitude/Longitude

2.2.3.1 Longitude

df_renamed <- df_merge_species

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
    "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"  
)

# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Latitude <- as.numeric(df_renamed$Latitude)
df_renamed$Longitude <- as.numeric(df_renamed$Longitude)

df_renamed <- df_renamed %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

# Define colors and shapes using the same expressions
colors <- c(
   "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)

shapes <- c(
  "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Longitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_fill_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
   scale_shape_manual(values = c(
  "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = "Longitude",
       y = "Scaled genetic diversity (Hs)")+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new.pdf");
#  print(plot);
#  dev.off()

plot <- ggplot(df_renamed, aes(x = Longitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_fill_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = "Longitude",
       y = "Scaled genetic diversity (Hs)")+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
#  print(plot);
#  dev.off()

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Skip if not enough points
  if (nrow(df_species) < 3) next
  
  # Simple linear model
  model <- lm(Hs ~ Longitude, data = df_species)
  summary_model <- summary(model)
  
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = coef(summary_model)[1, 1],
      Slope = coef(summary_model)[2, 1],
      R2 = summary_model$r.squared,
      p_value = coef(summary_model)[2, 4]
    )
  )
}

# Inspect results
print(df_lm_results)

##                 Species Intercept         Slope           R2      p_value
## 1    italic(T.~baccata) 0.1657582  9.245047e-04 0.2101233153 1.238988e-02
## 2      italic(P.~pinea) 0.2454332 -2.068998e-03 0.1398541150 8.834001e-03
## 3   italic(P.~pinaster) 0.2209233 -2.088206e-03 0.1238001596 1.106736e-03
## 4 italic(P.~sylvestris) 0.2870950  3.637667e-04 0.1875743825 6.700663e-05
## 5  italic(F.~excelsior) 0.2443825  2.310125e-05 0.0001762504 9.465422e-01
## 6  italic(F.~sylvatica) 0.2312181  2.247754e-04 0.2016464493 1.544876e-03

gt_table_lm <- df_lm_results %>%
  dplyr::mutate(
    `Hs ~ Longitude` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `Hs ~ Longitude`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: Hs ~ Longitude per species"
  ) %>%
  cols_label(
    Species = "Species",
    `Hs ~ Longitude` = "Model summary"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    table.width = pct(100)
  )

gt_table_lm

Species	Model summary
Linear model: Hs ~ Longitude per species
italic(T.~baccata)	slope=0.001, R²=0.21, p=0.012
italic(P.~pinea)	slope=-0.002, R²=0.14, p=0.0088
italic(P.~pinaster)	slope=-0.002, R²=0.12, p=0.0011
italic(P.~sylvestris)	slope=0, R²=0.19, p=6.7e-05
italic(F.~excelsior)	slope=0, R²=0, p=0.95
italic(F.~sylvatica)	slope=0, R²=0.2, p=0.0015

### Disentangle the effect of Fst and Longitude on GDI

# Data frame to store results
df_varpart_species_lon_fst_genetic_diversity <- data.frame(
  Species = character(),
  Fst_unique = numeric(),
  Longitude_unique = numeric(),
  Shared = numeric(),
  Residual = numeric(),
  Total_Explained = numeric(),
  Fst_p = numeric(),
  Longitude_p = numeric(),
  stringsAsFactors = FALSE
)

for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>% filter(Species == species)
  
  # Skip if not enough points
  if (nrow(df_species) < 3) next
  
  # --- Full RDA to get total explained variance ---
  rda_full <- rda(df_species$Hs ~ scale(Fst) + scale(Longitude), data = df_species)
  total_explained <- RsquareAdj(rda_full)$adj.r.squared
  
  # --- Partial RDA for unique fractions ---
  rda_Fst <- rda(df_species$Hs ~ scale(Fst) + Condition(scale(Longitude)), data = df_species)
  Fst_unique <- RsquareAdj(rda_Fst)$adj.r.squared
  anova_Fst <- anova(rda_Fst, permutations = 999)
  Fst_p <- anova_Fst$`Pr(>F)`[1]  # p-value for Fst unique
  
  rda_Lat <- rda(df_species$Hs ~ scale(Longitude) + Condition(scale(Fst)), data = df_species)
  Longitude_unique <- RsquareAdj(rda_Lat)$adj.r.squared
  anova_Lat <- anova(rda_Lat, permutations = 999)
  Longitude_p <- anova_Lat$`Pr(>F)`[1]  # p-value for Longitude unique
  
  Shared <- total_explained - Fst_unique - Longitude_unique
  Residual <- 1 - total_explained
  Total_Explained <- total_explained
  
  # Bind to results
  df_varpart_species_lon_fst_genetic_diversity <- rbind(
    df_varpart_species_lon_fst_genetic_diversity,
    data.frame(
      Species = species,
      Fst_unique = Fst_unique,
      Longitude_unique = Longitude_unique,
      Shared = Shared,
      Residual = Residual,
      Total_Explained = Total_Explained,
      Fst_p = Fst_p,
      Longitude_p = Longitude_p
    )
  )
}

# Inspect results
print(df_varpart_species_lon_fst_genetic_diversity)

##                 Species Fst_unique Longitude_unique       Shared  Residual
## 1    italic(T.~baccata)  0.6602666     0.2654037993 -0.084535176 0.1588648
## 2      italic(P.~pinea)  0.7449377     0.0009513661  0.120203925 0.1339070
## 3   italic(P.~pinaster)  0.3340173    -0.0064242577  0.119407135 0.5529998
## 4 italic(P.~sylvestris)  0.3776214     0.2295041390 -0.052480739 0.4453552
## 5  italic(F.~excelsior)  0.0934554    -0.0361920082 -0.002086501 0.9448231
## 6  italic(F.~sylvatica)  0.4059910     0.0354165074  0.148488752 0.4101038
##   Total_Explained Fst_p Longitude_p
## 1       0.8411352 0.001       0.001
## 2       0.8660930 0.001       0.258
## 3       0.4470002 0.001       0.813
## 4       0.5546448 0.001       0.001
## 5       0.0551769 0.059       0.953
## 6       0.5898962 0.001       0.028

# Optional: GT table
gt_table_vp <- df_varpart_species_lon_fst_genetic_diversity %>%
  gt() %>%
  tab_header(
    title = "Variance Partitioning of Genetic diversity per Species Longitude"
  ) %>%
  cols_label(
    Species = "Species",
    Fst_unique = "Fst (unique)",
   Longitude_unique = "Longitude (unique)",
    Shared = "Shared fraction",
    Residual = "Residual",
    Total_Explained = "Total Explained",
    Fst_p = "Fst p-value",
    Longitude_p = "Longitude p-value"
  ) %>%
  fmt_number(columns = vars(Fst_p, Longitude_p), decimals = 3) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    table.width = pct(100)
  )

gt_table_vp

Species	Fst (unique)	Longitude (unique)	Shared fraction	Residual	Total Explained	Fst p-value	Longitude p-value
Variance Partitioning of Genetic diversity per Species Longitude
italic(T.~baccata)	0.6602666	0.2654037993	-0.084535176	0.1588648	0.8411352	0.001	0.001
italic(P.~pinea)	0.7449377	0.0009513661	0.120203925	0.1339070	0.8660930	0.001	0.258
italic(P.~pinaster)	0.3340173	-0.0064242577	0.119407135	0.5529998	0.4470002	0.001	0.813
italic(P.~sylvestris)	0.3776214	0.2295041390	-0.052480739	0.4453552	0.5546448	0.001	0.001
italic(F.~excelsior)	0.0934554	-0.0361920082	-0.002086501	0.9448231	0.0551769	0.059	0.953
italic(F.~sylvatica)	0.4059910	0.0354165074	0.148488752	0.4101038	0.5898962	0.001	0.028

Plots longitude

species_labels <- c(
  "Taxus" = "T. baccata",
  "Pinea" = "P. pinea",
  "Pinaster" = "P. pinaster",
  "Sylvestris" = "P. sylvestris",
  "Ash" = "F. excelsior",
  "Fagus_forg" = "F. sylvatica"
)
# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]

# Set Species as a factor with desired order for consistent colors/shapes/legend

df_renamed$Longitude <- as.numeric(df_renamed$Longitude)

df_renamed <- df_renamed %>%
  group_by(Species) %>%
  mutate(st_Hs = as.numeric(scale(Hs))) %>%
  ungroup()
# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Longitude, y = st_Hs, shape = Species, fill = Species, color = Species)) +
  geom_point(position = position_jitter(width = 0, height = 0.01), alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "T. baccata" = "darkred",
    "P. pinea" = "darkgreen",
    "P. pinaster" = "#56B4E9",
    "P. sylvestris" = "#A0522D",
    "F. sylvatica" = "#9467BD",
    "F. excelsior" = "orange"
  )) +
  scale_fill_manual(values = c(
    "T. baccata" = "darkred",
    "P. pinea" = "darkgreen",
    "P. pinaster" = "#56B4E9",
    "P. sylvestris" = "#A0522D",
    "F. sylvatica" = "#9467BD",
    "F. excelsior" = "orange"
  )) +
  scale_shape_manual(values = c(
    "T. baccata" = 8,
    "P. pinea" = 18,
    "P. pinaster" = 15,
    "P. sylvestris" = 17,
    "F. sylvatica" = 25,   # downward triangle (fillable)
    "F. excelsior" = 16
  )) +
  theme_minimal() +
  labs(
    title = "Mixed Model Fit: One Line per Species",
    x = "Longitude",
    y = "Genetic diversity Hs"
  ) +
  #scale_y_continuous(breaks = seq(0.05, 0.35, by = 0.05)) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )
print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_allspecies_new.pdf");
  print(plot);
  dev.off()

## png 
##   2

plot <- ggplot(df_renamed, aes(x = Longitude, y = st_Hs, shape = Species, fill = Species, color = Species)) +
  #geom_point(position = position_jitter(width = 0, height = 0.01), alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "T. baccata" = "darkred",
    "P. pinea" = "darkgreen",
    "P. pinaster" = "#56B4E9",
    "P. sylvestris" = "#A0522D",
    "F. sylvatica" = "#9467BD",
    "F. excelsior" = "orange"
  )) +
  scale_fill_manual(values = c(
    "T. baccata" = "darkred",
    "P. pinea" = "darkgreen",
    "P. pinaster" = "#56B4E9",
    "P. sylvestris" = "#A0522D",
    "F. sylvatica" = "#9467BD",
    "F. excelsior" = "orange"
  )) +
  scale_shape_manual(values = c(
    "T. baccata" = 8,
    "P. pinea" = 18,
    "P. pinaster" = 15,
    "P. sylvestris" = 17,
    "F. sylvatica" = 25,   # downward triangle (fillable)
    "F. excelsior" = 16
  )) +
  theme_minimal() +
  labs(
    title = "Mixed Model Fit: One Line per Species",
    x = "Longitude",
    y = "Genetic diversity Hs"
  ) +
  #scale_y_continuous(breaks = seq(0.05, 0.35, by = 0.05)) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )
print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_allspecies_wo_points_new.pdf");
  print(plot);
  dev.off()

## png 
##   2

ordered_species <- c(
  "P. sylvestris",
  "F. sylvatica",
  "F. excelsior",
  "P. pinaster",
  "P. pinea",
  "T. baccata"
)

# Update factor levels
df_renamed$Species <- factor(df_renamed$Species, levels = ordered_species)

# Apply same order to species_offsets
species_offsets <- data.frame(
  Species = ordered_species,
  offset = seq(0, by = 6, length.out = length(ordered_species))
)

df_renamed_bis <- df_renamed %>%
  left_join(species_offsets, by = "Species") %>%
  mutate(offset_Hs = st_Hs + offset)

tick_vals <- -2:2
breaks <- unlist(lapply(species_offsets$offset, function(o) o + tick_vals))
labels <- rep(tick_vals, times = nrow(species_offsets))

color_vals <- c(
  "T. baccata" = "darkred",
  "P. pinea" = "darkgreen",
  "P. pinaster" = "#56B4E9",
  "P. sylvestris" = "#A0522D",
  "F. sylvatica" = "#9467BD",
  "F. excelsior" = "orange"
)

# Final Plot
plot <- ggplot(df_renamed_bis, aes(x = Longitude, y = offset_Hs, color = Species)) +
  geom_point(alpha = 0.5, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = color_vals) +
  scale_y_continuous(
    breaks = species_offsets$offset,
    labels = species_offsets$Species
  ) +
  theme_minimal() +
  labs(
    title = "Standardized Hs by Species",
    x = "Longitude",
    y = "Scaled genetic diversity"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.y = element_text(face = "italic"),  # <- makes species names italic
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
    )
print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_good_figure.pdf");
  print(plot);
  dev.off()

## png 
##   2

# changed order
ordered_species <- c(
  "P. sylvestris",
  "F. sylvatica",
  "F. excelsior",
  "P. pinaster",
  "P. pinea",
  "T. baccata"
)

# Update factor levels
df_renamed$Species <- factor(df_renamed$Species, levels = ordered_species)

# Apply same order to species_offsets
species_offsets <- data.frame(
  Species = ordered_species,
  offset = seq(0, by = 6, length.out = length(ordered_species))
)

df_renamed_bis <- df_renamed %>%
  left_join(species_offsets, by = "Species") %>%
  mutate(offset_Hs = st_Hs + offset)

tick_vals <- -2:2
breaks <- unlist(lapply(species_offsets$offset, function(o) o + tick_vals))
labels <- rep(tick_vals, times = nrow(species_offsets))

color_vals <- c(
  "T. baccata" = "darkred",
  "P. pinea" = "darkgreen",
  "P. pinaster" = "#56B4E9",
  "P. sylvestris" = "#A0522D",
  "F. sylvatica" = "#9467BD",
  "F. excelsior" = "orange"
)

# Final Plot
plot <- ggplot(df_renamed_bis, aes(x = Longitude, y = offset_Hs, color = Species)) +
  geom_point(alpha = 0.5, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = color_vals) +
  scale_y_continuous(
    breaks = species_offsets$offset,
    labels = species_offsets$Species
  ) +
  theme_minimal() +
  labs(
    title = "Standardized Hs by Species",
    x = "Longitude",
    y = "Scaled genetic diversity"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.y = element_text(face = "italic"),  # <- makes species names italic
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
    )
print(plot)

pdf("Results/all_species/Relationship/GD/figures/Hs_relationship_Longitude_good_figure_change_order.pdf");
  print(plot);
  dev.off()

## png 
##   2

2.2.3.2 Latitude

df_renamed <- df_merge_species

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
    "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"  
)

# Create a new data frame with updated species names
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))
df_renamed$Latitude <- as.numeric(df_renamed$Latitude)
df_renamed$Longitude <- as.numeric(df_renamed$Longitude)

df_renamed <- df_renamed %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

# Define colors and shapes using the same expressions
colors <- c(
   "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)

shapes <- c(
  "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Latitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_fill_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
   scale_shape_manual(values = c(
  "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = "Latitude",
       y = "Scaled genetic diversity (Hs)")+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new.pdf");
#  print(plot);
#  dev.off()

plot <- ggplot(df_renamed, aes(x = Latitude, y = scale(Hs), shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_fill_manual(values = c(
  "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
)) +
  scale_shape_manual(values = c(
"italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
)) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = "Latitude",
       y = "Scaled genetic diversity (Hs)")+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

#pdf("Results/all_species/Relationship/GL/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
#  print(plot);
#  dev.off()

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Skip if not enough points
  if (nrow(df_species) < 3) next
  
  # Simple linear model
  model <- lm(Hs ~ Latitude, data = df_species)
  summary_model <- summary(model)
  
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = coef(summary_model)[1, 1],
      Slope = coef(summary_model)[2, 1],
      R2 = summary_model$r.squared,
      p_value = coef(summary_model)[2, 4]
    )
  )
}

# Inspect results
print(df_lm_results)

##                 Species Intercept         Slope          R2      p_value
## 1    italic(T.~baccata) 0.1347653  8.545802e-04 0.072745981 0.1570836983
## 2      italic(P.~pinea) 0.2911319 -1.499962e-03 0.002240129 0.7493873697
## 3   italic(P.~pinaster) 0.1110890  2.718498e-03 0.058057319 0.0282128155
## 4 italic(P.~sylvestris) 0.2333875  1.268864e-03 0.176524523 0.0001158027
## 5  italic(F.~excelsior) 0.2299639  2.969343e-04 0.007352633 0.6643995337
## 6  italic(F.~sylvatica) 0.2354445 -3.413152e-05 0.001108792 0.8241600280

gt_table_lm <- df_lm_results %>%
  dplyr::mutate(
    `Hs ~ Latitude` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `Hs ~ Latitude`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: Hs ~ Latitude per species"
  ) %>%
  cols_label(
    Species = "Species",
    `Hs ~ Latitude` = "Model summary"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    table.width = pct(100)
  )

gt_table_lm

Species	Model summary
Linear model: Hs ~ Latitude per species
italic(T.~baccata)	slope=0.001, R²=0.07, p=0.16
italic(P.~pinea)	slope=-0.001, R²=0, p=0.75
italic(P.~pinaster)	slope=0.003, R²=0.06, p=0.028
italic(P.~sylvestris)	slope=0.001, R²=0.18, p=0.00012
italic(F.~excelsior)	slope=0, R²=0.01, p=0.66
italic(F.~sylvatica)	slope=0, R²=0, p=0.82

### Disentangle the effect of Fst and Latitude on GDI

# Data frame to store results
df_varpart_species_lon_fst_genetic_diversity <- data.frame(
  Species = character(),
  Fst_unique = numeric(),
  Longitude_unique = numeric(),
  Latitude_unique = numeric(),
  Shared = numeric(),
  Residual = numeric(),
  Total_Explained = numeric(),
  Fst_p = numeric(),
  Longitude_p = numeric(),
  Latitude_p = numeric(),
  stringsAsFactors = FALSE
)

for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>% filter(Species == species)
  
  # Skip if not enough points
  if (nrow(df_species) < 3) next
  
  # --- Full RDA to get total explained variance ---
  rda_full <- rda(df_species$Hs ~ scale(Fst) +scale(Longitude)+scale(Latitude) , data = df_species)
  total_explained <- RsquareAdj(rda_full)$adj.r.squared
  
  # --- Partial RDA for unique fractions ---
  rda_Fst <- rda(df_species$Hs ~ scale(Fst) + Condition(scale(Longitude) + scale(Latitude)), data = df_species)
  Fst_unique <- RsquareAdj(rda_Fst)$adj.r.squared
  anova_Fst <- anova(rda_Fst, permutations = 999)
  Fst_p <- anova_Fst$`Pr(>F)`[1]  # p-value for Fst unique
  
  rda_Lon <- rda(df_species$Hs ~ scale(Longitude) + Condition(scale(Fst)+scale(Latitude)), data = df_species)
  Longitude_unique <- RsquareAdj(rda_Lon)$adj.r.squared
  anova_Lon <- anova(rda_Lon, permutations = 999)
  Longitude_p <- anova_Lon$`Pr(>F)`[1]  # p-value for Latitude unique
  
  rda_Lat <- rda(df_species$Hs ~ scale(Latitude) + Condition(scale(Fst)+scale(Longitude)), data = df_species)
  Latitude_unique <- RsquareAdj(rda_Lat)$adj.r.squared
  anova_Lat <- anova(rda_Lat, permutations = 999)
  Latitude_p <- anova_Lat$`Pr(>F)`[1]  # p-value for Latitude unique
  
  # --- Shared and residual fractions ---
  Shared <- total_explained - Fst_unique - Longitude_unique - Latitude_unique
  Residual <- 1 - total_explained
  Total_Explained <- total_explained
  
  # Bind to results
  df_varpart_species_lon_fst_genetic_diversity <- rbind(
    df_varpart_species_lon_fst_genetic_diversity,
    data.frame(
      Species = species,
      Fst_unique = Fst_unique,
      Longitude_unique = Longitude_unique,
      Latitude_unique = Latitude_unique,
      Shared = Shared,
      Residual = Residual,
      Total_Explained = Total_Explained,
      Fst_p = Fst_p,
      Longitude_p = Longitude_p,
      Latitude_p = Latitude_p
    )
  )
}

# Inspect results
print(df_varpart_species_lon_fst_genetic_diversity)

##                 Species Fst_unique Longitude_unique Latitude_unique
## 1    italic(T.~baccata) 0.54406358     0.3256065230     0.047897487
## 2      italic(P.~pinea) 0.73201670     0.0005769882    -0.003026506
## 3   italic(P.~pinaster) 0.24332765    -0.0062389588    -0.006507674
## 4 italic(P.~sylvestris) 0.31169545     0.1707714991    -0.005427702
## 5  italic(F.~excelsior) 0.08580789    -0.0392816956    -0.039367629
## 6  italic(F.~sylvatica) 0.51042647     0.0204461884     0.086881809
##         Shared  Residual Total_Explained Fst_p Longitude_p Latitude_p
## 1 -0.028534866 0.1109673      0.88903272 0.001       0.001      0.002
## 2  0.133499288 0.1369335      0.86306647 0.001       0.299      0.938
## 3  0.209911471 0.5595075      0.44049249 0.001       0.739      0.799
## 4  0.072177880 0.4507829      0.54921713 0.001       0.001      0.770
## 5  0.008652461 0.9841890      0.01581103 0.087       0.966         NA
## 6  0.059023585 0.3232220      0.67677805 0.001       0.055      0.001

# Optional: GT table
gt_table_vp <- df_varpart_species_lon_fst_genetic_diversity %>%
  gt() %>%
  tab_header(
    title = "Variance Partitioning of Genetic diversity per Species Longitude and Latitude wo outliers Fst"
  ) %>%
  cols_label(
    Species = "Species",
    Fst_unique = "Fst (unique)",
    Longitude_unique = "Longitude (unique)",
   Latitude_unique = "Latitude (unique)",
    Shared = "Shared fraction",
    Residual = "Residual",
    Total_Explained = "Total Explained",
    Fst_p = "Fst p-value",
   Longitude_p = "Longitude p-value",
    Latitude_p = "Latitude p-value"
  ) %>%
  fmt_number(columns = vars(Fst_p,Longitude_p, Latitude_p), decimals = 3) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    table.width = pct(100)
  )

gt_table_vp

Species	Fst (unique)	Longitude (unique)	Latitude (unique)	Shared fraction	Residual	Total Explained	Fst p-value	Longitude p-value	Latitude p-value
Variance Partitioning of Genetic diversity per Species Longitude and Latitude wo outliers Fst
italic(T.~baccata)	0.54406358	0.3256065230	0.047897487	-0.028534866	0.1109673	0.88903272	0.001	0.001	0.002
italic(P.~pinea)	0.73201670	0.0005769882	-0.003026506	0.133499288	0.1369335	0.86306647	0.001	0.299	0.938
italic(P.~pinaster)	0.24332765	-0.0062389588	-0.006507674	0.209911471	0.5595075	0.44049249	0.001	0.739	0.799
italic(P.~sylvestris)	0.31169545	0.1707714991	-0.005427702	0.072177880	0.4507829	0.54921713	0.001	0.001	0.770
italic(F.~excelsior)	0.08580789	-0.0392816956	-0.039367629	0.008652461	0.9841890	0.01581103	0.087	0.966	NA
italic(F.~sylvatica)	0.51042647	0.0204461884	0.086881809	0.059023585	0.3232220	0.67677805	0.001	0.055	0.001

3 Genetic load

Genetic load only computed for the conifer species.

3.1 Load genetic load

list_species <- c("Taxus_baccata","Pinus_pinea","Pinus_pinaster","Pinus_sylvestris")
list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris")

for(x in 1:length(list_species)){
  
  species <- list_species[x]
  species_name <- list_species_name[x]
  
  data <- read.csv(paste0("Data/Species/Chapter2/GL_new_estimates/",species,"_GL.csv"),h=T,sep=";",dec=",")
  data_select <- data %>% dplyr::select(Population,GLr.Provean)
  data_select$GLr.Provean<- as.numeric(data_select$GLr.Provean)

  #Fst
  data_FST <- get(paste0("Bayescan_FST_pop_order_", species_name))
  data_tot <- merge(data_FST,data_select,"Population")
  assign(paste0("GL_",species_name),data_tot)
}

3.2 Graphical visualisation

list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris")

# Loop to process each species and combine data
combined_data_GL <- data.frame()

for (x in 1:length(list_species_name)) {
  species <- list_species_name[x]
  data <- get(paste0("GL_", species))
  data$Species <- species
  combined_data_GL <- rbind(combined_data_GL, data)
  
  #provean
  plot <- ggplot() + 
    geom_density(data = data, aes(x = GLr.Provean), fill = "lightblue", alpha = 0.8) +
    labs(
      title = paste0("Distribution of Genetic load between population of ", species),
      x = "Genetic load Provean", 
      y = "Density (number of populations)"
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.background = element_blank(),
      panel.background = element_blank(),
      strip.text = element_text(size = 11),
      plot.title = element_text(hjust = 0.5)
    )
  
  print(plot)
  
  # Save to PDF
  pdf(paste0("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_provean", species, ".pdf"))
  print(plot)
  dev.off()
}

combined_data_GL$Species <- factor(
  combined_data_GL$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris")
)


#Provean
combined_plot <- ggplot(combined_data_GL, aes(x = log10(GLr.Provean))) +
  geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
  scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +#"#9467BD","orange"
  #scale_x_continuous(limits = c(0, 0.25), breaks = seq(0, 0.25, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
  facet_wrap(~ Species, scales = "free_y", ncol = 2) +  # Two columns: top = taxus & pinea; bottom = pinaster & sylvestris
  labs(
    title = "Distribution of genetic load values \nacross populations all species ",
    x = "Log 10 Genetic load Provean",
    y = "Count of populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    strip.text = element_text(size = 10)
  )

# Show the plot
print(combined_plot)

pdf("Results/all_species/Genetic_load/figures_new_estimates/Provean/GL_Provean_all_species.pdf")
  print(combined_plot)
  dev.off()

## png 
##   2

combined_data_GL_bis <- combined_data_GL
combined_data_GL_bis$Species <- factor(
  combined_data_GL_bis$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris"),
  labels = c("T. baccata", "P. pinea", "P. pinaster", "P. sylvestris")
)

beeswarm_provean <- ggplot(combined_data_GL_bis, aes(x = Species, y = GLr.Provean, color = Species)) +
  geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.7) +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +#"#9467BD","orange"
  stat_summary(fun = median, geom = "crossbar", width = 0.3, color = "black", fatten = 0.13, size = 10) +
  labs(
    title = "Beeswarm Plot of Genetic Load estimates fro Provean",
    x = "Species",
    y = "Genetic Load (Provean)"
  ) +
  scale_x_discrete(labels = c(
  "T. baccata" = expression(italic("T. baccata")),
  "P. pinea" = expression(italic("P. pinea")),
  "P. pinaster" = expression(italic("P. pinaster")),
  "P. sylvestris" = expression(italic("P. sylvestris"))
))+
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none"
  )

print(beeswarm_provean)

pdf("Results/all_species/Genetic_load/figures_new_estimates/Provean/Beeswarm_GL_Provean_all_species.pdf");
  print(beeswarm_provean);
  dev.off()

## png 
##   2

#zscores
combined_data_GL_bis$GLr.Provean.z <- ave(combined_data_GL_bis$GLr.Provean, combined_data_GL_bis$Species,
                                      FUN = function(x) scale(x)[,1])

beeswarm_snpeff_horiz <- ggplot(combined_data_GL_bis, aes(y = Species, x = GLr.Provean.z, color = Species)) +
  geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray30") +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
  labs(
    title = "Genetic Load (SnpEff) per Species",
    x = "Genetic Load (SnpEff)",
    y = ""
  ) +
  scale_x_discrete(labels = c(
  "T. baccata" = expression(italic("T. baccata")),
  "P. pinea" = expression(italic("P. pinea")),
  "P. pinaster" = expression(italic("P. pinaster")),
  "P. sylvestris" = expression(italic("P. sylvestris"))
))+
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.y = element_text(size = 11)
  )
print(beeswarm_snpeff_horiz)

# Set factor levels explicitly if not already done

# Boxplot with jittered points overlaid
boxplot_jitter_plot <- ggplot(combined_data_GL_bis, aes(x = Species, y = GLr.Provean, fill = Species)) +
  geom_jitter(aes(color = Species), width = 0.2, alpha = 0.6, size = 1.5) +  # add jittered points
  geom_boxplot(outlier.shape = NA, alpha = 0.6) +
  scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D")) +
  scale_x_discrete(labels = c(
  "T. baccata" = expression(italic("T. baccata")),
  "P. pinea" = expression(italic("P. pinea")),
  "P. pinaster" = expression(italic("P. pinaster")),
  "P. sylvestris" = expression(italic("P. sylvestris"))
))+
  labs(
    title = "Boxplot of genetic load values with jittered points by species",
    x = "",
    y = "Realised genetic load"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

# Display plot
print(boxplot_jitter_plot)

# Save to PDF
pdf("Results/all_species/Genetic_load/figures_new_estimates/GL_boxplot_jitter_all_species.pdf", width = 8, height = 6)
print(boxplot_jitter_plot)
dev.off()

## png 
##   2

# one way anova to see if groups are differents
anova_result <- aov(GLr.Provean ~ Species, data = combined_data_GL)
summary(anova_result)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## Species       3 0.3358 0.11193    1043 <2e-16 ***
## Residuals   238 0.0256 0.00011                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# They are so we can do a Tukey post hoc to see which groups are different
tukey_result <- TukeyHSD(anova_result)
tukey_result

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GLr.Provean ~ Species, data = combined_data_GL)
## 
## $Species
##                             diff          lwr          upr     p adj
## Pinea-Taxus         -0.031411220 -0.037716488 -0.025105951 0.0000000
## Pinaster-Taxus      -0.096757285 -0.102531306 -0.090983263 0.0000000
## Sylvestris-Taxus    -0.098055316 -0.103856714 -0.092253918 0.0000000
## Pinaster-Pinea      -0.065346065 -0.070196766 -0.060495364 0.0000000
## Sylvestris-Pinea    -0.066644096 -0.071527353 -0.061760840 0.0000000
## Sylvestris-Pinaster -0.001298031 -0.005472851  0.002876788 0.8523071

3.3 Association with historical gene flow

3.3.1 Pearson correlation

3.3.1.1 species per species correlation

list_species <- c("Taxus","Pinea","Pinaster","Sylvestris")

df_correlation_GL <- data.frame(
  Species = character(),
  Cor_Provean_PSD = numeric(),
  pvalue_Provean_PSD = numeric()
)

for(x in 1:length(list_species)){
  
  species <- list_species[x]
data <- get(paste0("GL_",species))

cor_Provean <- cor(data$Fst,data$GLr.Provean)
pval_Provean <- cor.test(data$Fst,data$GLr.Provean)


df_correlation_GL <- rbind(df_correlation_GL,data.frame(Species=species,Cor_Provean_PSD=cor_Provean,pvalue_Provean_PSD=pval_Provean$p.value))
  
}

df_gt_table <- df_correlation_GL %>%
  dplyr::mutate(
    `Provean ~ PSD` = paste0(round(Cor_Provean_PSD, 2), " (p=", signif(pvalue_Provean_PSD, 2), ")"),
  ) %>%
  dplyr::select(Species, `Provean ~ PSD`)

# Create a nice gt table
gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between genetic load and historical gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `Provean ~ PSD` = "Provean ~ PSD",
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )

gt_table

Species	Provean ~ PSD
Pearson's correlation between genetic load and historical gene flow
Taxus	-0.45 (p=0.013)
Pinea	0.45 (p=0.0014)
Pinaster	-0.32 (p=0.0035)
Sylvestris	-0.31 (p=0.0046)

3.3.1.2 Combined species

Genetic load values could be comparable across models because it’s a ratio so we could use raw values to compare across models

3.3.2 Linear models

Genetic load values could be comparable across models because it’s a ratio so we could use raw values to compare across models. Therefore, we can run a linear models with the species as fixed factor to investigate the effect of historical gene flow on genetic load and if we have differences among species.

#merge all data
GL_Taxus$Species <- factor("Taxus")
GL_Pinea$Species <- factor("Pinea")
GL_Pinaster$Species <- factor("Pinaster")
GL_Sylvestris$Species <- factor("Sylvestris")

df_merge_species <- rbind(GL_Taxus,GL_Pinea,GL_Pinaster,GL_Sylvestris)

#Is there a significant difference between species? 

anova_model <- aov(GLr.Provean ~ Species, data = df_merge_species)
summary(anova_model)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## Species       3 0.3358 0.11193    1043 <2e-16 ***
## Residuals   238 0.0256 0.00011                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tukey_results <- TukeyHSD(anova_model)
tukey_results

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GLr.Provean ~ Species, data = df_merge_species)
## 
## $Species
##                             diff          lwr          upr     p adj
## Pinea-Taxus         -0.031411220 -0.037716488 -0.025105951 0.0000000
## Pinaster-Taxus      -0.096757285 -0.102531306 -0.090983263 0.0000000
## Sylvestris-Taxus    -0.098055316 -0.103856714 -0.092253918 0.0000000
## Pinaster-Pinea      -0.065346065 -0.070196766 -0.060495364 0.0000000
## Sylvestris-Pinea    -0.066644096 -0.071527353 -0.061760840 0.0000000
## Sylvestris-Pinaster -0.001298031 -0.005472851  0.002876788 0.8523071

Ratio_lm_Provean <- lm(GLr.Provean~Fst+Species, data=df_merge_species)

anova(Ratio_lm_Provean)

## Analysis of Variance Table
## 
## Response: GLr.Provean
##            Df   Sum Sq  Mean Sq F value    Pr(>F)    
## Fst         1 0.064404 0.064404  598.36 < 2.2e-16 ***
## Species     3 0.271431 0.090477  840.61 < 2.2e-16 ***
## Residuals 237 0.025509 0.000108                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(Ratio_lm_Provean)

## 
## Call:
## lm(formula = GLr.Provean ~ Fst + Species, data = df_merge_species)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033727 -0.004316 -0.000147  0.002910  0.033465 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.203272   0.002086  97.442   <2e-16 ***
## Fst                0.003264   0.005090   0.641    0.522    
## SpeciesPinea      -0.032182   0.002720 -11.833   <2e-16 ***
## SpeciesPinaster   -0.096751   0.002234 -43.299   <2e-16 ***
## SpeciesSylvestris -0.097704   0.002311 -42.282   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01037 on 237 degrees of freedom
## Multiple R-squared:  0.9294, Adjusted R-squared:  0.9282 
## F-statistic:   780 on 4 and 237 DF,  p-value: < 2.2e-16

palette <- c("darkred","orange","lightblue","darkgreen")

#provean
ggplotRegression <- function (fit) {

ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + 
  geom_point(aes(color=Species)) +
  scale_color_manual(values=c("darkred", "#E69F00", "#56B4E9","darkgreen"))+
  stat_smooth(method = "lm", col = "red") +
  labs(title = paste("R2 = ",signif(summary(fit)$r.squared, 5),
                     "Intercept =",signif(fit$coef[[1]],5 ),
                     " Slope =",signif(fit$coef[[2]], 5),
                     " P =",signif(summary(fit)$coef[2,4], 5)),
       x = "Fst", 
       y = "Genetic load Provean")
}
plot_Provean <- ggplotRegression(Ratio_lm_Provean)

pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies.pdf");
  print(plot_Provean);
  dev.off()

## png 
##   2

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)"
)

# Apply labels to the data
df_renamed <- df_merge_species
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))

# Define colors and shapes using the same expressions
colors <- c(
  "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
)

shapes <- c(
  "italic(T.~baccata)" = 8,
  "italic(P.~pinea)" = 18,
  "italic(P.~pinaster)" = 15,
  "italic(P.~sylvestris)" = 17
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_renamed, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
   scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Realised genetic load")+
  scale_y_continuous(
  limits = c(0.05, 0.25),
  breaks = seq(0.05, 0.25, by = 0.05)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new.pdf");
  print(plot);
  dev.off()

## png 
##   2

plot <- ggplot(df_renamed, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Realised genetic load")+
  scale_y_continuous(
  limits = c(0.05, 0.25),
  breaks = seq(0.05, 0.25, by = 0.05)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_SE.pdf");
  print(plot);
  dev.off()

## png 
##   2

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Skip if not enough points
  if (nrow(df_species) < 3) next
  
  # Simple linear model
  model <- lm(GLr.Provean ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = coef(summary_model)[1, 1],
      Slope = coef(summary_model)[2, 1],
      R2 = summary_model$r.squared,
      p_value = coef(summary_model)[2, 4]
    )
  )
}

# Inspect results
print(df_lm_results)

##                 Species Intercept       Slope         R2     p_value
## 1    italic(T.~baccata) 0.2140783 -0.06547539 0.20675345 0.013206130
## 2      italic(P.~pinea) 0.1627249  0.02454334 0.19987864 0.001443942
## 3   italic(P.~pinaster) 0.1123536 -0.03432457 0.09953515 0.003463933
## 4 italic(P.~sylvestris) 0.1065011 -0.01550811 0.09718620 0.004610150

gt_table_lm <- df_lm_results %>%
  dplyr::mutate(
    `GLr.Provean ~ Fst` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `GLr.Provean ~ Fst`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: GLr.Provean ~ Fst per species"
  ) %>%
  cols_label(
    Species = "Species",
    `GLr.Provean ~ Fst` = "Model summary"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    table.width = pct(100)
  )
gt_table_lm

Species	Model summary
Linear model: GLr.Provean ~ Fst per species
italic(T.~baccata)	slope=-0.065, R²=0.21, p=0.013
italic(P.~pinea)	slope=0.025, R²=0.2, p=0.0014
italic(P.~pinaster)	slope=-0.034, R²=0.1, p=0.0035
italic(P.~sylvestris)	slope=-0.016, R²=0.1, p=0.0046

We can remove the outliers

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)"
)

# Apply labels to the data
df_renamed <- df_merge_species
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))

df_filtered <- df_renamed %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

# Define colors and shapes using the same expressions
colors <- c(
  "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D")

shapes <- c(
  "italic(T.~baccata)" = 8,
  "italic(P.~pinea)" = 18,
  "italic(P.~pinaster)" = 15,
  "italic(P.~sylvestris)" = 17
)

# Plot with custom colors and shapes per species
plot <- ggplot(df_filtered, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = TRUE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
   scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Realised genetic load")+
    scale_y_continuous(
  limits = c(0.05, 0.25),
  breaks = seq(0.05, 0.25, by = 0.05)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_outliers.pdf");
  print(plot);
  dev.off()

## png 
##   2

plot <- ggplot(df_filtered, aes(x = Fst, y = GLr.Provean, shape = Species, fill = Species, color = Species)) +
 geom_point(alpha = 0.6, size = 2.5)+
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
  "italic(P.~pinea)" = 19,
  "italic(P.~pinaster)" = 19,
  "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
   facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +#
  labs(title = "Mixed model fit: One line per species",
       x = expression(italic(F)[ST]),
       y = "Realised genetic load")+
   scale_y_continuous(
  limits = c(0.05, 0.25),
  breaks = seq(0.05, 0.25, by = 0.05)
)+
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GL_new_estimates/figures/Provean/GL_relationship_allspecies_new_wo_SE_wo_outliers.pdf");
  print(plot);
  dev.off()

## png 
##   2

df_correlation_GD <- data.frame(
  Species = character(),
  cor_GDI_PSD = numeric(),
  pvalue_GDI_PSD = numeric()
)

# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Pearson correlation
  cor_result <- cor.test(df_species$Fst, df_species$GLr.Provean)
  
  df_correlation_GD <- rbind(
    df_correlation_GD,
    data.frame(
      Species = species,
      cor_GL_PSD = cor_result$estimate,
      pvalue_GL_PSD = cor_result$p.value
    )
  )
}

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Loop over unique species
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Fit simple linear model
  model <- lm(GLr.Provean ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  # Extract coefficients and stats
  intercept <- coef(summary_model)[1, 1]
  slope <- coef(summary_model)[2, 1]
  p_value <- coef(summary_model)[2, 4]
  r2 <- summary_model$r.squared
  
  # Add to results table
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = intercept,
      Slope = slope,
      R2 = r2,
      p_value = p_value
    )
  )
}

# Merge with your existing correlation table
df_results_all <- df_correlation_GD %>%
  left_join(df_lm_results, by = "Species")

# Format nicely for viewing
df_results_all %>%
  dplyr::mutate(
    `GLr.Provean ~ Fst` = paste0(
      "slope=", round(Slope, 3),
      ", R²=", round(R2, 2),
      ", p=", signif(p_value, 2)
    )
  ) %>%
  dplyr::select(Species, `GLr.Provean ~ Fst`) %>%
  gt() %>%
  tab_header(
    title = "Linear model: GLr.Provean ~ Fst per species"
  ) %>%
  cols_label(
    Species = "Species",
    `GLr.Provean ~ Fst` = "Model summary"
  )

Species	Model summary
Linear model: GLr.Provean ~ Fst per species
italic(T.~baccata)	slope=-0.065, R²=0.21, p=0.013
italic(P.~pinea)	slope=0.025, R²=0.2, p=0.0014
italic(P.~pinaster)	slope=-0.042, R²=0.12, p=0.0012
italic(P.~sylvestris)	slope=-0.026, R²=0.18, p=9.8e-05

  df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `GLr.Provean ~ PSD` = paste0(round(cor_GL_PSD, 2), " (p=", signif(pvalue_GL_PSD, 2), ")")
  ) %>%
  dplyr::select(Species, `GLr.Provean ~ PSD`)

gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between genetic load and historical realised gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `GLr.Provean ~ PSD` = "GLr.Provean ~ PSD"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )
gt_table

Species	GLr.Provean ~ PSD
Pearson's correlation between genetic load and historical realised gene flow
italic(T.~baccata)	-0.45 (p=0.013)
italic(P.~pinea)	0.45 (p=0.0014)
italic(P.~pinaster)	-0.35 (p=0.0012)
italic(P.~sylvestris)	-0.42 (p=9.8e-05)

4 GDI

list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")

for(x in 1:length(list_species)){

  name_species <- list_species[x]
  
data <- read.csv(paste0("Data/Species/Chapter2/GDI_past/GDI_",name_species,".csv"),sep=",",dec=",",h=T)
data <- data[, -1]
colnames(data) <- c("Population","GDI")

data$GDI <- as.numeric(data$GDI)

p <- ggplot(data, aes(x = GDI)) +
    geom_density(fill = "steelblue", alpha = 0.6) +
    labs(title = paste("GDI distribution -", name_species),
         x = "GDI",
         y = "Density") +
    theme_minimal()

  print(p)
assign(paste0("GDI_",name_species),data)
}

for(x in 1:length(list_species)){
  
  species <- list_species[x]
    data <- get(paste0("GDI_", species))

    write.csv(data,paste0("Results/all_species/data_",species,".csv"))
}

4.0.1 Graph

list_species_name <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")

combined_data <- data.frame()

for (x in 1:length(list_species)) {
  species <- list_species[x]
  data <- get(paste0("GDI_", species))
  data$Species <- species
  combined_data <- rbind(combined_data, data)
}

x_breaks <- seq(0, 0.3, by = 0.05)
x_limits <- c(0, 0.3)

for (species in list_species) {
  data <- subset(combined_data, Species == species)
  
  plot <- ggplot(data, aes(x = GDI)) + 
    geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
    scale_x_continuous(limits = x_limits, breaks = x_breaks) +  # fixed x-axis
    # No y limit — y axis is free
    labs(
      title = paste0("Distribution of GDI values \nacross populations of ", species),
      x = "GDI", 
      y = "Count of populations"
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.background = element_blank(),
      panel.background = element_blank(),
      strip.text = element_text(size = 11),
      plot.title = element_text(hjust = 0.5)
    )
  
  print(plot)
  
   # Save to PDF
  pdf(paste0("Results/all_species/GDI/figures/GDI_past_", species, ".pdf"))
  print(plot)
  dev.off()
}

combined_data$Species <- factor(
  combined_data$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
)

# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = GDI)) +
  geom_histogram(binwidth = 0.05, aes(fill = Species), color = "black", alpha = 0.8) +
      scale_fill_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +#
  scale_x_continuous(limits = c(0, 0.20), breaks = seq(0, 0.20, by = 0.05),labels = scales::label_number(accuracy = 0.01)) +
  facet_wrap(~ Species, scales = "free_y", ncol = 2) +
  labs(
    title = "Distribution of GDI values \nacross populations all species ",
    x = "GDI",
    y = "Count of populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    strip.text = element_text(size = 10)
  )

# Show the plot
print(combined_plot)

pdf(paste0("Results/all_species/GDI/figures/GDI_past_all_species_", species, ".pdf"))
  print(combined_plot)
  dev.off()

## png 
##   2

beeswarm_provean <- ggplot(combined_data, aes(x = Species, y = GDI, color = Species)) +
  geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.7) +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +#
  stat_summary(fun = median, geom = "crossbar", width = 0.5, color = "black", fatten = 0.07, size = 17) +
  labs(
    title = "Beeswarm Plot of GDI",
    x = "Species",
    y = "GDI"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none"
  )
print(beeswarm_provean)

#pdf("Results/all_species/Genetic_load/figures/Provean/Beeswarm_GL_Provean_all_species.pdf")
  #print(beeswarm_provean)
  #dev.off()

#zscores
combined_data$GDI.z <- ave(combined_data$GDI, combined_data$Species,
                                      FUN = function(x) scale(x)[,1])

beeswarm_snpeff_horiz <- ggplot(combined_data, aes(y = Species, x = GDI.z, color = Species)) +
  geom_beeswarm(cex = 1.2, size = 2.5, alpha = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray30") +
  scale_color_manual(values = c("darkred", "darkgreen", "#56B4E9", "#A0522D","orange","#9467BD")) +
  labs(
    title = " zscores of GDI per Species",
    x = "zscores GDI",
    y = ""
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "none",
    axis.text.y = element_text(size = 11)
  )
print(beeswarm_snpeff_horiz)

combined_data_bis <- combined_data

combined_data_bis$Species_raw <- combined_data_bis$Species

species_labels <- c(
  "Taxus" = "T. baccata",
  "Pinea" = "P. pinea",
  "Pinaster" = "P. pinaster",
  "Ash" = "F. excelsior",
  "Fagus_forg" = "F. sylvatica",
  "Sylvestris" = "P. sylvestris"
)

# Apply labels
combined_data_bis$Species <- species_labels[combined_data_bis$Species_raw]

medians_graph <- combined_data_bis %>%
  dplyr::group_by(Species_raw) %>%
  summarise(median_GDI = median(GDI, na.rm = TRUE)) %>%
  arrange(median_GDI)

ordered_species_labels <- species_labels

# Apply correct order to factors
combined_data_bis$Species <- factor(species_labels[combined_data_bis$Species_raw], levels = rev(ordered_species_labels))

medians_graph$Species_raw <- factor(species_labels[medians_graph$Species_raw], levels = rev(ordered_species_labels))

ridgeline_plot <- ggplot(combined_data_bis, aes(x = GDI, y = Species, fill = Species)) +
  geom_density_ridges(scale = 1.2, alpha = 0.8, color = "black") +
  scale_fill_manual(
    values = c(
      "T. baccata" = "darkred",
      "P. pinea" = "darkgreen",
      "P. pinaster" = "#56B4E9",
      "P. sylvestris" = "#A0522D",
      "F. sylvatica" = "#9467BD",
      "F. excelsior" = "orange"
    )
  ) +
  geom_segment(
    data = medians_graph,
    aes(x = median_GDI, xend = median_GDI,
        y = as.numeric(Species_raw) - 0.4,
        yend = as.numeric(Species_raw) + 0.4),
    inherit.aes = FALSE,
    color = "black", size = 1.5
  ) +
  labs(
    title = "Density of GDI values per species (ordered by median)",
    x = "GDI",
    y = "Species"
  ) +
  scale_y_discrete(
    labels = c(
      "F. excelsior" = expression(italic('F. excelsior')),
      "F. sylvatica" = expression(italic('F. sylvatica')),
      "P. sylvestris" = expression(italic('P. sylvestris')),
      "P. pinaster" = expression(italic('P. pinaster')),
      "P. pinea" = expression(italic('P. pinea')),
      "T. baccata" = expression(italic('T. baccata'))
    )
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    panel.grid.major = element_blank()
  )

# Save plot
print(ridgeline_plot)

pdf("Results/all_species/GDI/figures/GDI_ridgeline_all_species.pdf", width = 8, height = 5)
print(ridgeline_plot)
dev.off()

## png 
##   2

list_species <- c("Taxus","Pinea","Pinaster","Sylvestris","Ash","Fagus_forg")

df_correlation_GD <- data.frame(
  Species = character(),
  Cor_GDI_PSD = numeric(),
  pvalue_GDI_PSD = numeric()
)

for(x in 1:length(list_species)){
  
  species <- list_species[x]

  data_FST <- get(paste0("Bayescan_FST_pop_order_", species))
 
 #GD
  data_GDI <- get(paste0("GDI_",species))
  data_tot <- merge(data_FST,data_GDI,"Population")
  
  assign(paste0("df_GDI_PSD_",species),data_tot)

 #GD
  cor_GDI_PSD <- cor(data_tot$Fst,data_tot$GDI)
  pvalue_GDI_PSD <- cor.test(data_tot$Fst,data_tot$GDI)

  df_correlation_GD <- rbind(df_correlation_GD,data.frame(Species=species,cor_GDI_PSD=cor_GDI_PSD,pvalue_GDI_PSD=pvalue_GDI_PSD$p.value))
}

df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `GDI ~ PSD` = paste0(round(cor_GDI_PSD, 2), " (p=", signif(pvalue_GDI_PSD, 2), ")")
  ) %>%
  dplyr::select(Species, `GDI ~ PSD`)

# Create a nice gt table
gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between GDI and historical gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `GDI ~ PSD` = "GDI ~ PSD",
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )
gt_table

Species	GDI ~ PSD
Pearson's correlation between GDI and historical gene flow
Taxus	0.52 (p=0.004)
Pinea	0.55 (p=4.7e-05)
Pinaster	0.4 (p=0.00013)
Sylvestris	-0.1 (p=0.39)
Ash	0.48 (p=0.0061)
Fagus_forg	0.55 (p=2.6e-05)

4.1 Linear association

#merge all data
df_GDI_PSD_Taxus$Species <- factor("Taxus")
df_GDI_PSD_Pinea$Species <- factor("Pinea")
df_GDI_PSD_Pinaster$Species <- factor("Pinaster")
df_GDI_PSD_Sylvestris$Species <- factor("Sylvestris")
df_GDI_PSD_Ash$Species <- factor("Ash")
df_GDI_PSD_Fagus_forg$Species <- factor("Fagus_forg")


df_merge_species_GDI <- rbind(df_GDI_PSD_Taxus,df_GDI_PSD_Pinea,df_GDI_PSD_Pinaster,df_GDI_PSD_Sylvestris,df_GDI_PSD_Ash,df_GDI_PSD_Fagus_forg)

#linear model
Ratio_lm_GDI <- lm(GDI~Fst+Species, data=df_merge_species_GDI)

anova(Ratio_lm_GDI)

## Analysis of Variance Table
## 
## Response: GDI
##            Df   Sum Sq  Mean Sq F value    Pr(>F)    
## Fst         1 0.037616 0.037616 45.7630 6.446e-11 ***
## Species     5 0.036907 0.007381  8.9801 5.395e-08 ***
## Residuals 317 0.260565 0.000822                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(Ratio_lm_GDI)

## 
## Call:
## lm(formula = GDI ~ Fst + Species, data = df_merge_species_GDI)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.061445 -0.017440 -0.005688  0.012303  0.122270 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.045937   0.005741   8.002 2.33e-14 ***
## Fst                0.097573   0.013665   7.140 6.40e-12 ***
## SpeciesPinea      -0.037282   0.007474  -4.988 1.01e-06 ***
## SpeciesPinaster   -0.025635   0.006175  -4.151 4.25e-05 ***
## SpeciesSylvestris -0.005529   0.006376  -0.867 0.386521    
## SpeciesAsh        -0.022085   0.007530  -2.933 0.003602 ** 
## SpeciesFagus_forg -0.023687   0.006856  -3.455 0.000626 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02867 on 317 degrees of freedom
## Multiple R-squared:  0.2224, Adjusted R-squared:  0.2077 
## F-statistic: 15.11 on 6 and 317 DF,  p-value: 3.203e-15

GDI plots with Fst

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
  "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"
)

# Apply labels to the data
df_renamed <- df_merge_species_GDI
df_renamed$Species <- species_labels[df_renamed$Species]
df_renamed$Species <- factor(df_renamed$Species, levels = unique(species_labels))

# Define colors and shapes using the same expressions
colors <- c(
  "italic(T.~baccata)" = "darkred",
  "italic(P.~pinea)" = "darkgreen",
  "italic(P.~pinaster)" = "#56B4E9",
  "italic(P.~sylvestris)" = "#A0522D",
  "italic(F.~sylvatica)" = "#9467BD",
  "italic(F.~excelsior)" = "orange"
)

shapes <- c(
  "italic(T.~baccata)" = 8,
  "italic(P.~pinea)" = 18,
  "italic(P.~pinaster)" = 15,
  "italic(P.~sylvestris)" = 17,
  "italic(F.~sylvatica)" = 25,
  "italic(F.~excelsior)" = 16
)

# Plot
plot <- ggplot(df_renamed, aes(x = Fst, y = GDI, color = Species)) +
  geom_point(aes(shape = Species, fill = Species),
             position = position_jitter(width = 0, height = 0.01),
             alpha = 0.6, size = 2.5, show.legend = TRUE) +
  geom_smooth(aes(color = Species, fill = Species), method = "lm", se = TRUE, linewidth = 1, alpha = 0.2, show.legend = FALSE)+
  scale_color_manual(
    values = colors,
    labels = parse(text = levels(df_renamed$Species))
  ) +
  scale_fill_manual(
    values = colors,
    guide = "none"
  ) +
  scale_shape_manual(
    values = shapes,
    guide = "none"
  ) +
  theme_minimal() +
  labs(
    title = "Mixed model fit: One line per species",
    x = "Fst",
    y = "GDI"
  ) +
  scale_y_continuous(breaks = seq(0.00, 0.20, by = 0.05)) +
  facet_wrap(~ Species, scales = "free", ncol = 2, labeller = label_parsed) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text()
  ) +
  guides(
    color = guide_legend(override.aes = list(shape = 16, fill = NA))
  )

print(plot)

# Save to PDF
pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new.pdf")
print(plot)
dev.off()

## png 
##   2

Atypical populations Fst included

# Rename species for labels
species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
    "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)" 
)

# Update species names for plotting
df_renamed$Species <- species_labels[df_renamed$Species]

# Set factor levels in desired order
df_renamed$Species <- factor(df_renamed$Species, levels = c(
  "italic(T.~baccata)",
  "italic(P.~pinea)",
  "italic(P.~pinaster)",
  "italic(F.~excelsior)",
  "italic(F.~sylvatica)",
  "italic(P.~sylvestris)"
))

plot <- ggplot(df_renamed, aes(x = Fst, y = GDI, shape = Species, fill = Species, color = Species)) +
  geom_point( alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
  scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
  labs(
    title = "Mixed Model Fit: One Line per Species",
    x = expression(italic(F)[ST]),
    y = "GDI"
  ) +
  scale_y_continuous(
  limits = c(0, 0.15),
  breaks = seq(0, 0.15, by = 0.05)
)+
  facet_wrap(~ Species, scales = "free", ncol = 2, labeller = label_parsed) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )

print(plot)

pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new_wo_lazic.pdf");
  print(plot);
  dev.off()

## png 
##   2

df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Fit LM for each species independently
for (species in unique(df_renamed$Species)) {
  
  df_species <- df_renamed %>%
    filter(Species == species)
  
  # Skip species with too few points
  if (nrow(df_species) < 3) next
  
  # Simple linear model
  model <- lm(GDI ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  # Store coefficients and statistics
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = coef(summary_model)[1, 1],
      Slope = coef(summary_model)[2, 1],
      R2 = summary_model$r.squared,
      p_value = coef(summary_model)[2, 4]
    )
  )
}

# Inspect results
print(df_lm_results)

##                 Species  Intercept       Slope          R2      p_value
## 1    italic(T.~baccata) 0.01583221  0.28908235 0.267934165 4.029524e-03
## 2      italic(P.~pinea) 0.00862133  0.09766065 0.304852584 4.734091e-05
## 3   italic(P.~pinaster) 0.02393187  0.07418146 0.163770430 1.344716e-04
## 4 italic(P.~sylvestris) 0.04951160 -0.08555970 0.009490352 3.869296e-01
## 5  italic(F.~excelsior) 0.02179080  0.13313319 0.231777193 6.105838e-03
## 6  italic(F.~sylvatica) 0.01098127  0.37696348 0.305822674 2.572255e-05

Without atypical Fst populations

df_filtered <- df_renamed %>%
  dplyr::group_by(Species) %>%
  dplyr::mutate(
    Q1 = quantile(Fst, 0.25, na.rm = TRUE),
    Q3 = quantile(Fst, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    lower = Q1 - 2.4 * IQR,
    upper = Q3 + 2.4 * IQR
  ) %>%
  filter(Fst >= lower & Fst <= upper) %>%
  ungroup() %>%
  dplyr::select(-Q1, -Q3, -IQR, -lower, -upper)

species_labels <- c(
  "Taxus" = "italic(T.~baccata)",
  "Pinea" = "italic(P.~pinea)",
  "Pinaster" = "italic(P.~pinaster)",
  "Sylvestris" = "italic(P.~sylvestris)",
  "Ash" = "italic(F.~excelsior)",
  "Fagus_forg" = "italic(F.~sylvatica)"
)

df_renamed$Species <- species_labels[df_renamed$Species]


# Create a new data frame with updated species names
df_filtered$Species <- species_labels[df_filtered$Species]


# Set Species as a factor with desired order for consistent colors/shapes/legend
df_filtered$Species <- factor(df_filtered$Species, levels = c(
  "italic(T.~baccata)",
  "italic(P.~pinea)",
  "italic(P.~pinaster)",
  "italic(F.~excelsior)",
  "italic(F.~sylvatica)",
  "italic(P.~sylvestris)"
))


plot <- ggplot(df_filtered, aes(x = Fst, y = GDI, shape = Species, fill = Species, color = Species)) +
  geom_point(alpha = 0.6, size = 2.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1, show.legend = FALSE) +
   scale_color_manual(values = c(
    "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_fill_manual(values = c(
    "italic(T.~baccata)" = "darkred",
    "italic(P.~pinea)" = "darkgreen",
    "italic(F.~excelsior)" = "orange",
    "italic(P.~pinaster)" = "#56B4E9",
    "italic(F.~sylvatica)" = "#9467BD",
    "italic(P.~sylvestris)" = "#A0522D"
  )) +
  scale_shape_manual(values = c(
    "italic(T.~baccata)" = 19,
    "italic(P.~pinea)" = 19,
    "italic(F.~excelsior)" = 19,
    "italic(P.~pinaster)" = 19,
    "italic(F.~sylvatica)" = 19,
    "italic(P.~sylvestris)" = 19
  )) +
  theme_minimal() +
  labs(
    title = "Mixed Model Fit: One Line per Species",
    x = expression(italic(F)[ST]),
    y = "GDI"
  ) +
  scale_y_continuous(
  limits = c(0, 0.15),
  breaks = seq(0, 0.15, by = 0.05)
)+
  facet_wrap(~ Species, scales = "free", ncol = 2,labeller = label_parsed) +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.title = element_blank(),
    legend.text = element_text(face = "italic"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )
print(plot)

pdf("Results/all_species/Relationship/GDI/figures/GDI_relationship_allspecies_new_wo_outliers.pdf");
  print(plot);
  dev.off()

## png 
##   2

  df_correlation_GD <- data.frame(
  Species = character(),
  cor_GDI_PSD = numeric(),
  pvalue_GDI_PSD = numeric()
)

# Loop over unique species in the filtered dataset
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Pearson correlation
  cor_result <- cor.test(df_species$Fst, df_species$GDI)
  
  df_correlation_GD <- rbind(
    df_correlation_GD,
    data.frame(
      Species = species,
      cor_GDI_PSD = cor_result$estimate,
      pvalue_GDI_PSD = cor_result$p.value
    )
  )
}
  
  df_lm_results <- data.frame(
  Species = character(),
  Intercept = numeric(),
  Slope = numeric(),
  R2 = numeric(),
  p_value = numeric()
)

# Fit LM for each species independently on the filtered dataset
for (species in unique(df_filtered$Species)) {
  
  df_species <- df_filtered %>%
    filter(Species == species)
  
  # Skip species with too few points
  if (nrow(df_species) < 3) next
  
  # Simple linear model
  model <- lm(GDI ~ Fst, data = df_species)
  summary_model <- summary(model)
  
  # Store coefficients and stats
  df_lm_results <- rbind(
    df_lm_results,
    data.frame(
      Species = species,
      Intercept = coef(summary_model)[1, 1],
      Slope = coef(summary_model)[2, 1],
      R2 = summary_model$r.squared,
      p_value = coef(summary_model)[2, 4]
    )
  )
}

# Inspect the results
print(df_lm_results)

##                 Species  Intercept        Slope           R2      p_value
## 1    italic(T.~baccata) 0.01583221  0.289082350 0.2679341646 4.029524e-03
## 2      italic(P.~pinea) 0.00862133  0.097660648 0.3048525843 4.734091e-05
## 3   italic(P.~pinaster) 0.02690169  0.050567199 0.0730301254 1.347991e-02
## 4  italic(F.~sylvatica) 0.04647749 -0.008438308 0.0000566826 9.474960e-01
## 5 italic(P.~sylvestris) 0.02899551 -0.087471083 0.0070759232 6.704174e-01
## 6  italic(F.~excelsior) 0.01528825  0.233432367 0.0516945384 1.243016e-01

  df_gt_table <- df_correlation_GD %>%
  dplyr::mutate(
    `GDI ~ PSD` = paste0(round(cor_GDI_PSD, 2), " (p=", signif(pvalue_GDI_PSD, 2), ")")
  ) %>%
  dplyr::select(Species, `GDI ~ PSD`)

gt_table <- df_gt_table %>%
  gt() %>%
  tab_header(
    title = "Pearson's correlation between GDI and historical gene flow"
  ) %>%
  cols_label(
    Species = "Species",
    `GDI ~ PSD` = "GDI ~ PSD"
  ) %>%
  tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_column_labels(everything())
  ) %>%
  tab_options(
    table.font.size = "small",
    heading.title.font.size = 14,
    heading.subtitle.font.size = 11,
    table.width = pct(100)
  )
gt_table

Species	GDI ~ PSD
Pearson's correlation between GDI and historical gene flow
italic(T.~baccata)	0.52 (p=0.004)
italic(P.~pinea)	0.55 (p=4.7e-05)
italic(P.~pinaster)	0.27 (p=0.013)
italic(F.~sylvatica)	-0.01 (p=0.95)
italic(P.~sylvestris)	-0.08 (p=0.67)
italic(F.~excelsior)	0.23 (p=0.12)

Session info

packages <- c(
  "ggplot2", "dplyr", "gt", "lme4",
  "sf", "rnaturalearth","vegan", "ggbeeswarm",
  "ggridges"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
  Package = names(packages_used),
  Version = sapply(packages_used, function(x) x$Version)
)

##                     Package Version
## ggplot2             ggplot2   3.5.2
## dplyr                 dplyr   1.1.4
## gt                       gt  0.11.0
## lme4                   lme4  1.1-37
## sf                       sf  1.0-15
## rnaturalearth rnaturalearth   1.0.1
## vegan                 vegan   2.6-4
## ggbeeswarm       ggbeeswarm   0.7.2
## ggridges           ggridges   0.5.6

Relationships_historical_demography_genetic_metrics

Thomas Francisco

2025-06-16