rm(list = ls())
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
cache = FALSE
)
This script will calculate Nei’s genetic diversity (Hs) for all species and explain how Fst was computed using BayeScan software.
First, however, we need to format all the data in a unique way.
#Taxus
meta_data_vcf_Taxus <- read.csv("Data/Species/Taxus_baccata/Samples/samples_taxus_baccata_adapcon_gentree.csv",h=T,sep=";",dec=",")
meta_data_pop_Taxus <- read.csv("Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")
names(meta_data_pop_Taxus)[names(meta_data_pop_Taxus) == "Elevation.DEM_90m."] <- "Altitude"
#Pinea
load("Data/Species/Pinus_pineaV4/Individuals/meta_data_vcf.Rdata")
meta_data_vcf_PineaV4 <- data.frame(meta_data_vcf[,c(1,3)],Population=meta_data_vcf$Population_ID)
load("Data/Species/Pinus_pineaV4/Population/meta_data_pop.Rdata")
meta_data_pop_PineaV4 <- data.frame(Country=meta_data_pop[,c(7)],Population=meta_data_pop$Population_ID,N=meta_data_pop[,c(2)],meta_data_pop[,c(4,5,6)])
#Pinaster
load("Data/Species/Pinus_pinaster_draft/Individual/meta_data_vcf_final.Rdata")
meta_data_vcf_Pinaster <- data.frame(VCF_ID=meta_data_vcf_final[,c(1)],Population=meta_data_vcf_final$Population_ID)
load("Data/Species/Pinus_pinaster_draft/Population/meta_data_pop_final.Rdata")
meta_data_pop_Pinaster <- data.frame(Country=meta_data_pop_final[,c(3)],Population=meta_data_pop_final$Population_ID,N="Na",meta_data_pop_final[,c(4,5,6)])
#Sylvestris
meta_data_vcf_Sylvestris <- read.csv("Data/Species/Pinus_sylvestris/individual/Psyl_individuals.csv",h=T,sep=";",dec=",")
meta_data_vcf_Sylvestris <- data.frame(meta_data_vcf_Sylvestris[,c(1,3,2)])
load("Data/Species/Pinus_sylvestris/population/meta_data_pop_retained_order.Rdata")
meta_data_pop_Sylvestris <- data.frame(meta_data_pop_retained_order[,c(2,1)],N="Na",meta_data_pop_retained_order[,c(3,4)],Altitude="Na")
#Ash
load("Data/Species/Fraxinus_excelsior/Sample/meta_data_vcf_final_418_structure.Rdata")
meta_data_vcf_final_418_structure_format <- meta_data_vcf_final_418_structure
meta_data_vcf_final_418_structure_format$VCF_ID <- gsub("\\.", "-", meta_data_vcf_final_418_structure$VCF_ID)
meta_data_vcf_Ash <- data.frame(meta_data_vcf_final_418_structure_format[,c(1,3,2)])
meta_data_vcf_Ash_format <- data.frame(meta_data_vcf_final_418_structure[,c(1,3,2)])
load("Data/Species/Fraxinus_excelsior/Population/meta_data_pop_final_31_structure.Rdata")
meta_data_pop_Ash <- data.frame(meta_data_pop_final_31_structure[,c(3,2)],N="Na",meta_data_pop_final_31_structure[,c(4,5)],Altitude="Na")
#Fagus forg
load("Data/Species/Fagus_sylvatica_forgenius/Sample/meta_data_vcf_final.Rdata")
meta_data_vcf_final$Country <- "NA"
meta_data_vcf_Fagus_forg <- data.frame(meta_data_vcf_final[,c(2,3,1)])
load("Data/Species/Fagus_sylvatica_forgenius/Population/meta_data_pop_ordered.Rdata")
meta_data_pop_Fagus_forg <- data.frame(meta_data_pop_ordered[,c(5,1)],N="Na",meta_data_pop_ordered[,c(2,3)],Altitude=meta_data_pop_ordered$ALTI)
list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","genind_Ash","Genind_Fagus_forg")
for(x in 1:length(list_genind)){
load(paste0("Results/all_species/FST_index/genepop/",
list_genind[x], ".Rdata"))
}
Specific format for P. sylvestris and F. excelsior
pop_map_sylvestris <- read.table("Results/all_species/FST_index/genepop/popname_Sylvestris.txt")
#load genetic data
load("Data/Species/Pinus_sylvestris/genomic/new_genomic_data/Psyl_2024.filt.Rdata")
#extract genomic info for each individual
df <- data.frame(obj$tab)
genind_Sylvestris <- obj
popmap_sylvestris_genind <- data.frame(Ind=indNames(obj),Pop=obj$pop)
colnames(popmap_sylvestris_genind) <- c("INDIVIDUALS","POP_ID")
identical(indNames(obj), popmap_sylvestris_genind$INDIVIDUALS)
## [1] TRUE
genind_Ash@loc.n.all <- rep(2, length(genind_Ash@loc.n.all))
genind_Ash$all.names <- lapply(genind_Ash$all.names, function(x) x[x != "GT"])
n_loci <- length(genind_Ash@loc.fac)
# Indices of every 3rd locus factor (3rd column per SNP)
indices_to_remove <- seq(3, n_loci, by = 3)
# Filter loc.fac to keep only 1st & 2nd columns per SNP
genind_Ash@loc.fac <- genind_Ash@loc.fac[-indices_to_remove]
# Check validity
validObject(genind_Ash)
## [1] TRUE
#removed individuals not in the popmap
pop_map_Ash <- read.table("Results/all_species/FST_index/genepop/popname_Ash.txt")
colnames(pop_map_Ash) <- c("INDIVIDUALS","POP_ID")
pop_map_Ash$INDIVIDUALS <- gsub("\\.", "-", pop_map_Ash$INDIVIDUALS)
#remove individuals fitered out and add population info
inds_to_keep <- pop_map_Ash$INDIVIDUALS
genind_Ash_filtered <- genind_Ash[indNames(genind_Ash) %in% inds_to_keep, ]
pop_order <- match(indNames(genind_Ash_filtered), pop_map_Ash$INDIVIDUALS)
identical(indNames(genind_Ash_filtered), pop_map_Ash$INDIVIDUALS[pop_order])
## [1] TRUE
# Assigner les populations
pop(genind_Ash_filtered) <- pop_map_Ash$POP_ID[pop_order]
list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","genind_Sylvestris","genind_Ash_filtered","genind_Fagus_forg")#,
list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
for(x in 1:length(list_genind)){
genind <- get(list_genind[x])
name_species <- list_species[x]
Hs <- Hs(genind)
Hs <- data.frame(Hs)
Hs$Population <- row.names(Hs)
Hs <- data.frame(Population= Hs$Population, Hs=Hs$Hs)
assign(paste0("Hs_",name_species),Hs)
write.csv(Hs,paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=";",dec=",")
}
Load genetic diversity
list_species <- c("Taxus", "Pinea", "Pinaster", "Sylvestris","Ash","Fagus_forg")
for(x in 1:length(list_species)){
name_species <- list_species[x]
data <- read.csv(paste0("Results/all_species/Genetic_diversity/",name_species,"/Hs_",name_species,".csv"),sep=",",dec=",")
data <- data[,-1]
data$Population <- as.factor(data$Population)
data$Hs <- as.numeric(data$Hs)
assign(paste0("Hs_",name_species),data)
}
combined_data <- data.frame()
# Step 1: Gather all data
for (x in 1:length(list_species)) {
species <- list_species[x]
data <- get(paste0("Hs_", species))
data$Species <- species
combined_data <- rbind(combined_data, data)
}
# Step 2: Define fixed x-axis settings only
x_breaks <- seq(0, 0.5, by = 0.05)
x_limits <- c(0, 0.5)
# Step 3: Plot each species (y-axis is free, x-axis is fixed)
for (species in list_species) {
data <- subset(combined_data, Species == species)
plot <- ggplot(data, aes(x = Hs)) +
geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.8) +
scale_x_continuous(limits = x_limits, breaks = x_breaks) + # fixed x-axis
# No y limit — y axis is free
labs(
title = paste0("Distribution of Hs values \nacross populations of ", species),
x = "Hs genetic diversity index",
y = "Count of populations"
) +
theme_bw() +
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
strip.text = element_text(size = 11),
plot.title = element_text(hjust = 0.5)
)
print(plot)
# Save to PDF
pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_", species, ".pdf"))
print(plot)
dev.off()
}
combined_data$Species <- factor(
combined_data$Species,
levels = c("Taxus", "Pinea", "Pinaster", "Sylvestris","Fagus","Ash","Fagus_forg")
)
# Facetted histogram with custom order
combined_plot <- ggplot(combined_data, aes(x = Hs, fill = Species)) +
geom_histogram(binwidth = 0.05, color = "black", alpha = 0.8) +
scale_fill_manual(values = c("darkred", "#008", "#56B4E9", "darkgreen", "darkviolet","orange","yellow")) +
scale_x_continuous(limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.05)) +
facet_wrap(~ Species, scales = "free_y", ncol = 2) +
labs(
title = "Distribution of Hs values \nacross populations all species",
x = "Hs genetic diversity index",
y = "Count of populations"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(size = 10)
)
# Show the plot
print(combined_plot)
pdf(paste0("Results/all_species/Genetic_diversity/figures/Hs_hierfstat_genetic_diversity_all_species_", species, ".pdf"))
print(combined_plot)
dev.off()
## png
## 2
We used the genind files and ran the code in Linux for each species: sudo ./BayeScan2.1_linux64bits fichier_please.txt -o fst_bayescan -fstat
list_genind <- c("genind_Taxus_filtered","genind_PineaV4","genind_Pinaster","Genind_Fagus_forg","genind_Ash_filtered","genind_Sylvestris")
list_species <- c("Taxus","Pinea","Pinaster","Fagus_forg","Ash","Sylvestris")
#for(x in 1:length(list_genind)){
# species <- list_species[x]
# genind <- get(list_genind[x])
# Djost<-mmod::pairwise_D(genind, hsht_mean= "arithmetic")
# D_matrix <- as.matrix(Djost)
# Mean Jost’s D per population (excluding self)
# mean_D_per_pop <- rowMeans(D_matrix, na.rm = TRUE)
# mean_D_per_pop_df <- data.frame(mean_D_per_pop)
#mean_D_per_pop_df$Population <- row.names(mean_D_per_pop_df)
#assign(paste0("Djost_",species),mean_D_per_pop_df[,c(2,1)])
#save_name <- paste0("Djost_mean_perpop_", species, ".RData")
# save(list = paste0("Djost_", species), file = paste0("Results/all_species/FST_index/D_jost/",save_name))
#}
list_species <- c("Taxus","Pinea","Pinaster","Fagus_forg","Ash","Sylvestris")
for(x in 1:length(list_genind)){
species <- list_species[x]
Djost<- get(load(paste0("Results/all_species/FST_index/D_jost/Djost_mean_perpop_",species,".Rdata")))
# Mean Jost’s D per population (excluding self)
load_path <- paste0("Results/all_species/FST_index/Bayescan_FST_pop_order_", species, ".Rdata")
load(load_path)
# Dynamically get the Bayescan data (assuming consistent variable naming)
bayescan_varname <- paste0("Bayescan_FST_pop_order_", species)
bayescan_df <- get(bayescan_varname)
# Merge
merge_df <- merge(Djost, bayescan_df, by = "Population")
names(merge_df)[2] <- "Djost"
# Compute and print correlation matrix
cat("\n==== Correlation for", species, "====\n")
print(cor(merge_df[ , -1], use = "complete.obs"))
}
##
## ==== Correlation for Taxus ====
## Djost Fst
## Djost 1.0000000 0.9316979
## Fst 0.9316979 1.0000000
##
## ==== Correlation for Pinea ====
## Djost Fst
## Djost 1.0000000 0.9439741
## Fst 0.9439741 1.0000000
##
## ==== Correlation for Pinaster ====
## Djost Fst
## Djost 1.0000000 0.9664855
## Fst 0.9664855 1.0000000
##
## ==== Correlation for Fagus_forg ====
## Djost Fst
## Djost 1.0000000 0.9933868
## Fst 0.9933868 1.0000000
##
## ==== Correlation for Ash ====
## Djost Fst
## Djost 1.0000000 0.9632712
## Fst 0.9632712 1.0000000
##
## ==== Correlation for Sylvestris ====
## Djost Fst
## Djost 1.0000000 0.9101426
## Fst 0.9101426 1.0000000
Session info
packages <- c(
"hierfstat", "dplyr", "genepop", "writexl",
"mmod", "ggplot2","reshape2","textshape","tidyr",
"vcfR","readr","radiator"
)
info <- sessionInfo()
packages_used <- info$otherPkgs[packages]
data.frame(
Package = names(packages_used),
Version = sapply(packages_used, function(x) x$Version)
)
## Package Version
## hierfstat hierfstat 0.5-11
## dplyr dplyr 1.1.4
## genepop genepop 1.2.2
## writexl writexl 1.5.0
## mmod mmod 1.3.3
## ggplot2 ggplot2 3.5.2
## reshape2 reshape2 1.4.4
## textshape textshape 1.7.5
## tidyr tidyr 1.3.1
## vcfR vcfR 1.15.0
## readr readr 2.1.5
## radiator radiator 1.3.6