rm(list = ls())
knitr::opts_chunk$set(
    message = FALSE,
    warning = FALSE,
    cache = FALSE
)

This script will calculate Nei’s genetic diversity (Hs) for all species and explain how Fst was computed using BayeScan software.

First, however, we need to format all the data in a unique way.

#Taxus 
meta_data_vcf_Taxus <- read.csv("Data/Species/Taxus_baccata/Samples/samples_taxus_baccata_adapcon_gentree.csv",h=T,sep=";",dec=",")
meta_data_pop_Taxus <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
names(meta_data_pop_Taxus)[names(meta_data_pop_Taxus) == "Elevation.DEM_90m."] <- "Altitude"

#Pinea
load("Data/Species/Pinus_pineaV4/Individuals/meta_data_vcf.Rdata")
meta_data_vcf_PineaV4 <- data.frame(meta_data_vcf[,c(1,3)],Population=meta_data_vcf$Population_ID)

load("Data/Species/Pinus_pineaV4/Population/meta_data_pop.Rdata")
meta_data_pop_PineaV4 <- data.frame(Country=meta_data_pop[,c(7)],Population=meta_data_pop$Population_ID,N=meta_data_pop[,c(2)],meta_data_pop[,c(4,5,6)])

#Pinaster
load("Data/Species/Pinus_pinaster_draft/Individual/meta_data_vcf_final.Rdata")
meta_data_vcf_Pinaster <- data.frame(VCF_ID=meta_data_vcf_final[,c(1)],Population=meta_data_vcf_final$Population_ID)

load("Data/Species/Pinus_pinaster_draft/Population/meta_data_pop_final.Rdata")
meta_data_pop_Pinaster <- data.frame(Country=meta_data_pop_final[,c(3)],Population=meta_data_pop_final$Population_ID,N="Na",meta_data_pop_final[,c(4,5,6)])

#Sylvestris
meta_data_vcf_Sylvestris <- read.csv("Data/Species/Pinus_sylvestris/individual/Psyl_individuals.csv",h=T,sep=";",dec=",")
meta_data_vcf_Sylvestris <- data.frame(meta_data_vcf_Sylvestris[,c(1,3,2)])

load("Data/Species/Pinus_sylvestris/population/meta_data_pop_retained_order.Rdata")
meta_data_pop_Sylvestris <- data.frame(meta_data_pop_retained_order[,c(2,1)],N="Na",meta_data_pop_retained_order[,c(3,4)],Altitude="Na")

#Ash
load("Data/Species/Fraxinus_excelsior/Sample/meta_data_vcf_final_418_structure.Rdata")
meta_data_vcf_final_418_structure_format <- meta_data_vcf_final_418_structure
meta_data_vcf_final_418_structure_format$VCF_ID <- gsub("\\.", "-", meta_data_vcf_final_418_structure$VCF_ID)
meta_data_vcf_Ash <- data.frame(meta_data_vcf_final_418_structure_format[,c(1,3,2)])
meta_data_vcf_Ash_format <- data.frame(meta_data_vcf_final_418_structure[,c(1,3,2)])

load("Data/Species/Fraxinus_excelsior/Population/meta_data_pop_final_31_structure.Rdata")
meta_data_pop_Ash <- data.frame(meta_data_pop_final_31_structure[,c(3,2)],N="Na",meta_data_pop_final_31_structure[,c(4,5)],Altitude="Na")

#Fagus forg
load("Data/Species/Fagus_sylvatica_forgenius/Sample/meta_data_vcf_final.Rdata")
meta_data_vcf_final$Country <- "NA"
meta_data_vcf_Fagus_forg <- data.frame(meta_data_vcf_final[,c(2,3,1)])

load("Data/Species/Fagus_sylvatica_forgenius/Population/meta_data_pop_ordered.Rdata")
meta_data_pop_Fagus_forg <- data.frame(meta_data_pop_ordered[,c(5,1)],N="Na",meta_data_pop_ordered[,c(2,3)],Altitude=meta_data_pop_ordered$ALTI)
list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","genind_Ash","Genind_Fagus_forg")

for(x in 1:length(list_genind)){
  
  load(paste0("Results/all_species/FST_index/genepop/", 
                     list_genind[x], ".Rdata"))
}

Specific format for P. sylvestris and F. excelsior

pop_map_sylvestris <- read.table("Results/all_species/FST_index/genepop/popname_Sylvestris.txt")
#load genetic data
load("Data/Species/Pinus_sylvestris/genomic/new_genomic_data/Psyl_2024.filt.Rdata")

#extract genomic info for each individual
df <- data.frame(obj$tab)

genind_Sylvestris <- obj

popmap_sylvestris_genind <- data.frame(Ind=indNames(obj),Pop=obj$pop)

colnames(popmap_sylvestris_genind) <- c("INDIVIDUALS","POP_ID")

identical(indNames(obj), popmap_sylvestris_genind$INDIVIDUALS)
## [1] TRUE
genind_Ash@loc.n.all <- rep(2, length(genind_Ash@loc.n.all))

genind_Ash$all.names <- lapply(genind_Ash$all.names, function(x) x[x != "GT"])

n_loci <- length(genind_Ash@loc.fac)

# Indices of every 3rd locus factor (3rd column per SNP)
indices_to_remove <- seq(3, n_loci, by = 3)

# Filter loc.fac to keep only 1st & 2nd columns per SNP
genind_Ash@loc.fac <- genind_Ash@loc.fac[-indices_to_remove]

# Check validity
validObject(genind_Ash)
## [1] TRUE
#removed individuals not in the popmap
pop_map_Ash <- read.table("Results/all_species/FST_index/genepop/popname_Ash.txt")
colnames(pop_map_Ash) <- c("INDIVIDUALS","POP_ID")
pop_map_Ash$INDIVIDUALS <- gsub("\\.", "-", pop_map_Ash$INDIVIDUALS)

#remove individuals fitered out and add population info 
inds_to_keep <- pop_map_Ash$INDIVIDUALS

genind_Ash_filtered <- genind_Ash[indNames(genind_Ash) %in% inds_to_keep, ]

pop_order <- match(indNames(genind_Ash_filtered), pop_map_Ash$INDIVIDUALS)

identical(indNames(genind_Ash_filtered), pop_map_Ash$INDIVIDUALS[pop_order])
## [1] TRUE
# Assigner les populations
pop(genind_Ash_filtered) <- pop_map_Ash$POP_ID[pop_order]

1 Compute genetic diversity

1.1 Hierfstat Hs

list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","genind_Sylvestris","genind_Ash_filtered","genind_Fagus_forg")#,
list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")

for(x in 1:length(list_genind)){
  genind <- get(list_genind[x])
  name_species <- list_species[x]
  
Hs <- Hs(genind)

Hs <- data.frame(Hs)

Hs$Population <- row.names(Hs)

Hs <- data.frame(Population= Hs$Population, Hs=Hs$Hs)

assign(paste0("Hs_",name_species),Hs)

write.csv(Hs,paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=";",dec=",")
}

Load genetic diversity

list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")

for(x in 1:length(list_species)){
  name_species <- list_species[x]
  data <- read.csv(paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=",",dec=",")
  data <- data[,-1]
  data$Population <- as.factor(data$Population)
  data$Hs <- as.numeric(data$Hs)
  assign(paste0("Hs_",name_species),data)
}

1.2 Graphical visualisation

combined_data <- data.frame()

# Step 1: Gather all data
for (x in 1:length(list_species)) {
  species <- list_species[x]
  data <- get(paste0("Hs_", species))
  data$Species <- species
  combined_data <- rbind(combined_data, data)
}

# Step 2: Define fixed x-axis settings only
x_breaks <- seq(0, 0.5, by = 0.05)
x_limits <- c(0, 0.5)

# Step 3: Plot each species (y-axis is free, x-axis is fixed)
for (species in list_species) {
  data <- subset(combined_data, Species == species)
  
  plot <- ggplot(data, aes(x = Hs)) + 
    geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
    scale_x_continuous(limits = x_limits, breaks = x_breaks) +  # fixed x-axis
    # No y limit — y axis is free
    labs(
      title = paste0("Distribution of Hs values \nacross populations of ", species),
      x = "Hs genetic diversity index", 
      y = "Count of populations"
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.background = element_blank(),
      panel.background = element_blank(),
      strip.text = element_text(size = 11),
      plot.title = element_text(hjust = 0.5)
    )
  
  print(plot)
  
   # Save to PDF
  pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_", species, ".pdf"))
  print(plot)
  dev.off()
}

combined_data$Species <- factor(
  combined_data$Species,
  levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Fagus","Ash","Fagus_forg")
)

# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = Hs, fill = Species)) +
  geom_histogram(binwidth = 0.05, color = "black", alpha = 0.8) +
  scale_fill_manual(values = c("darkred", "#008", "#56B4E9", "darkgreen", "darkviolet","orange","yellow")) +
  scale_x_continuous(limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.05)) +
  facet_wrap(~ Species, scales = "free_y", ncol = 2) +
  labs(
    title = "Distribution of Hs values \nacross populations all species",
    x = "Hs genetic diversity index",
    y = "Count of populations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    strip.text = element_text(size = 10)
  )

# Show the plot
print(combined_plot)

pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_all_species_", species, ".pdf"))
  print(combined_plot)
  dev.off()
## png 
##   2

2 BayeScan Fst

We used the genind files and ran the code in Linux for each species: sudo ./BayeScan2.1_linux64bits fichier_please.txt -o fst_bayescan -fstat

3 Djost Fst

list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","Genind_Fagus_forg","genind_Ash_filtered","genind_Sylvestris")

list_species <- c("Taxus","Pinea","Pinaster","Fagus_forg","Ash","Sylvestris")
#for(x in 1:length(list_genind)){
  
#  species <- list_species[x]
#  genind <- get(list_genind[x])
  
#  Djost<-mmod::pairwise_D(genind, hsht_mean= "arithmetic")
# D_matrix <- as.matrix(Djost)
  
  # Mean Jost’s D per population (excluding self)
#  mean_D_per_pop <- rowMeans(D_matrix, na.rm = TRUE)
  
#  mean_D_per_pop_df <- data.frame(mean_D_per_pop)
#mean_D_per_pop_df$Population <- row.names(mean_D_per_pop_df)

#assign(paste0("Djost_",species),mean_D_per_pop_df[,c(2,1)])

#save_name <- paste0("Djost_mean_perpop_", species, ".RData")
#  save(list = paste0("Djost_", species), file = paste0("Results/all_species/FST_index/D_jost/",save_name))

#}

4 Correlation between BayeScan and Djost Fst

list_species <- c("Taxus","Pinea","Pinaster","Fagus_forg","Ash","Sylvestris")
for(x in 1:length(list_genind)){
  
  species <- list_species[x]
  
  Djost<- get(load(paste0("Results/all_species/FST_index/D_jost/Djost_mean_perpop_",species,".Rdata")))
  
  # Mean Jost’s D per population (excluding self)
load_path <- paste0("Results/all_species/FST_index/Bayescan_FST_pop_order_", species, ".Rdata")
  load(load_path)

  # Dynamically get the Bayescan data (assuming consistent variable naming)
  bayescan_varname <- paste0("Bayescan_FST_pop_order_", species)
  bayescan_df <- get(bayescan_varname)
  
  # Merge
  merge_df <- merge(Djost, bayescan_df, by = "Population")
  names(merge_df)[2] <- "Djost"
  # Compute and print correlation matrix
  cat("\n==== Correlation for", species, "====\n")
  print(cor(merge_df[ , -1], use = "complete.obs"))
}
## 
## ==== Correlation for Taxus ====
##           Djost       Fst
## Djost 1.0000000 0.9316979
## Fst   0.9316979 1.0000000
## 
## ==== Correlation for Pinea ====
##           Djost       Fst
## Djost 1.0000000 0.9439741
## Fst   0.9439741 1.0000000
## 
## ==== Correlation for Pinaster ====
##           Djost       Fst
## Djost 1.0000000 0.9664855
## Fst   0.9664855 1.0000000
## 
## ==== Correlation for Fagus_forg ====
##           Djost       Fst
## Djost 1.0000000 0.9933868
## Fst   0.9933868 1.0000000
## 
## ==== Correlation for Ash ====
##           Djost       Fst
## Djost 1.0000000 0.9632712
## Fst   0.9632712 1.0000000
## 
## ==== Correlation for Sylvestris ====
##           Djost       Fst
## Djost 1.0000000 0.9101426
## Fst   0.9101426 1.0000000

Session info

packages <- c(
  "hierfstat", "dplyr", "genepop", "writexl",
  "mmod", "ggplot2","reshape2","textshape","tidyr",
  "vcfR","readr","radiator"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
  Package = names(packages_used),
  Version = sapply(packages_used, function(x) x$Version)
)
##             Package Version
## hierfstat hierfstat  0.5-11
## dplyr         dplyr   1.1.4
## genepop     genepop   1.2.2
## writexl     writexl   1.5.0
## mmod           mmod   1.3.3
## ggplot2     ggplot2   3.5.2
## reshape2   reshape2   1.4.4
## textshape textshape   1.7.5
## tidyr         tidyr   1.3.1
## vcfR           vcfR  1.15.0
## readr         readr   2.1.5
## radiator   radiator   1.3.6