1 Introduction

In this script, we studied the population structure of T. baccata using the population structure dataset, which contains 8252 SNPs, 452 trees and 4.37% of NAs. Population structure was assessed using principal component analysis (PCA) and STRUCTURE software.

2 Distribution map

2.1 Common garden and range-wide populations

First, we visualised the populations in geographical space to gain preliminary insight into spatial structure before examining genetic structure. We plotted the 29 populations used for calculating genomic indices, as well as the 26 populations planted in experimental site (clonal bank), which are used to evaluate genomic offset predictions.

#Load metadata
meta_data_vcf <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Samples/samples_taxus_baccata_adapcon_gentree.csv",h=T,sep=";",dec=",")

meta_data_pop <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")

meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]

shapefile <- st_read("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/shapefiles/Taxus_baccata_plg_clip.shp")

## Reading layer `Taxus_baccata_plg_clip' from data source 
##   `C:\Users\tfrancisco\Documents\Thesis\Data\Species\Taxus_baccata\shapefiles\Taxus_baccata_plg_clip.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 55 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -10.11699 ymin: 32.58137 xmax: 55.22354 ymax: 62.89065
## Geodetic CRS:  WGS 84

#Clonal bank location
clonal_bank_lat <- 41.23
clonal_bank_long <- -4.0125

#Meta_data Clonal bank populations
meta_data_CG <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/Taxus_Spain_pop.csv",h=T,sep=";",dec=",")
meta_data_CG$Lat <- as.numeric(meta_data_CG$Lat)
meta_data_CG$Lon <- as.numeric(meta_data_CG$Lon)

#29 pop
meta_data_pop_order$Latitude <- as.numeric(meta_data_pop_order$Latitude)
meta_data_pop_order$Longitude<- as.numeric(meta_data_pop_order$Longitude)

meta_data_pop_order$pop_abbrev <- c("ANC","BLI","BOM","BRA","BUJA","BUJG","CAR","CED","PDT","UMB","GOR","HAR","HHR","JUR","CHO","OLY","VOU","PAT","RAS","RWM","BAU","SAV","SDF","SUE","UNS","VAL","VIS","WAL","YEW")

admin <- ne_countries(scale = "medium", returnclass = "sf")

plot <- ggplot(data = meta_data_pop_order) + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +
  geom_sf(data = shapefile, fill = "#d9f2d9", color = "black", size = 0.2) +  # Adding shapefile
  geom_point(data=meta_data_CG,aes(x = Lon, y = Lat), fill = "black", size = 2, shape = 17, color = "#FF99FF") +
  geom_point(aes(x = Longitude, y = Latitude), fill = "black", size = 2, shape = 21, color = "black") +
  geom_star(aes(x = clonal_bank_long, y = clonal_bank_lat), color = "red", size = 3.5, fill = "red", starshape = 1,show.legend=TRUE) +
  geom_sf(data = admin, fill = NA, size = 0.1) +  
  coord_sf(xlim = c(-10.5, 30), ylim = c(35, 62), expand = FALSE) +
  geom_text(data = meta_data_pop_order, aes(x = Longitude, y = Latitude+0.7, label = pop_abbrev,fontface = "bold"), size = 2.5, color = "black") +
  annotation_scale(location = "br", width_hint = 0.3) + # Scale bar at bottom left
  annotation_north_arrow(location = "tl", which_north = "true", 
                         pad_x = unit(-0.2, "cm"), pad_y = unit(0.08, "cm"),
                         style = north_arrow_fancy_orienteering) + # Compass rose
  xlab("Longitude") + ylab("Latitude") +
  theme_bw(base_size = 11) +
  theme(
    legend.position = "none", # Remove the legend
    strip.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
  ) 

print(plot)

## Warning in geom_star(aes(x = clonal_bank_long, y = clonal_bank_lat), color = "red", : All aesthetics have length 1, but the data has 29 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

## Scale on map varies by more than 10%, scale bar may be inaccurate

#pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/map_sampled/Distribution map_CG_Adapt_new.pdf");print(plot);dev.off()

2.2 All populations (CG+range-wide+SSR)

3 PCA

First, we performed a PCA to examine population structure at both the individual and population levels. We also assessed whether the structure of certain individuals or populations was influenced by the percentage of missing data. To do this, point sizes on the PCA plots were scaled according to the proportion of missing data, while point positions reflected genetic composition. PCA at the individual level was conducted using the PCADAPT function, and at the population level using RDA without covariables (effectively performing PCA; see the RDA_candidate_detection script for details on redundancy analysis).

3.1 Individual-level

We first performed a scree plot analysis to determine how much of the genetic variation is explained by the PCs axis.

#Selection number of axis => screeplot
#Initial parameters
load(file = "C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/structure/Dataset_PCA_8252SNP_452IND.Rdata")
gen_data_new <- Dataset_PCA_8252SNP_452IND[,-1]; row.names(gen_data_new) <- Dataset_PCA_8252SNP_452IND$VCF_ID
data <- gen_data_new
k <- 10 #Its the number of groups set arbitrary

#Function
screeplot_genetic <- function(data,k){
  
  data_PCa <- data  %>% dplyr::select(-c("na_percentage_indiv")) %>% t() %>% data.frame()
#Format for pcadapt
data_pcadapt <- read.pcadapt(data_PCa)

#Perform the Pca
Pcadapt_results <-pcadapt(data_pcadapt,K=k,method = "mahalanobis")

#Choose the number of axis
 #Create ggplot object
 screeplot_data <- data.frame(
    PC_axis = seq_along(Pcadapt_results$singular.values),
    Explained_variance = Pcadapt_results$singular.values^2*100
  )
 
 screeplot_data$PC_axis <- as.factor(screeplot_data$PC_axis)

  gg_screeplot <- ggplot(screeplot_data, aes(x = PC_axis, y = Explained_variance)) +
    geom_bar(stat = "identity", fill = "darkgrey") +
    labs(title = "PCA Scree Plot", x = "Principal Component", y = "Explained Variance (%)") +
    theme_minimal(base_size = 14) +
    theme(plot.title = element_text(hjust = 0.5, face = "bold"))
  
  #  geom_bar(stat = "identity") +
  # labs(x = "PC axis", y = "Explained Variance (%)") +
   # theme_minimal()
  
  
  #Display the screeplot
  gg_screeplot
  print(gg_screeplot)
  
  #Save
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/screeplot_PCA_indiv_level.pdf");print(gg_screeplot);dev.off()

  return(Pcadapt_results)
}

Pcadapt_object <- screeplot_genetic(data,k)

Interpretation: The first two PCA axes were retained as the most informative for detecting global patterns of population structure.

Plot of the PCA.

#Meta_data
meta_data_vcf_452 <- meta_data_vcf[meta_data_vcf$VCF_ID %in% row.names(gen_data_new),]

#Initial parameters
data <-Pcadapt_object #Dataframe coming from the return of the screeplot_genetic function 
names <- gen_data_new #Initial dataframe
axis <- 2 #Number of retained axis based on the screeplot
meta_data <- meta_data_vcf_452#Meta data


#Color per country

#For loop to do it for multiple PC axis like 1-2 but also 1-3, 2-3 etc
 Pca_genetic <- function(data, names, num_axes, meta_data) {
   #We can create 2 loops that would perform the Pca_function for each combination of axis retained: 
  for (i in 1:(num_axes - 1)) { #Here, the first loop will select axis between 1 and numb_axis-1 so basically, if we take 3 axis, i will take values of 1 then 2, so PCx will ether be 1 or 2.
    for (j in (i + 1):num_axes) { #Here, the second loop within the first one will take the values of j from i +1 to num axis so if num axis=3, we will have j ranging from 2 to 3 when i=1 and j=3 when i =2
      #In summary, the loops perform the function for all combinations of axes: 1–2, 1–3, and 2–3 when three axes are retained.
      
      PCx <- i
      PCy <- j
      
      #Calculate explained variance for chosen PCs
      explained_variance <- data.frame(t(round((data$singular.values[PCx:PCy]^2) * 100, digits = 1)))
      colnames(explained_variance) <- c("PCx", "PCy")
      
      #store the scores along the retained PC axis in a dataframe
      score_Pca_imputed <- data.frame(data$scores[, c(PCx,PCy)], row.names (names))
      colnames(score_Pca_imputed) <- c(paste0("PC", PCx), paste0("PC", PCy), "VCF_ID")
      
      #Add country, population information
      PCa_df_imputed <- merge(score_Pca_imputed, meta_data, "VCF_ID")
      
      #Genetic PCA
      ggplot_representation <- ggplot() +
        geom_point(data = PCa_df_imputed, aes(PCa_df_imputed[,2],PCa_df_imputed[,3],size = names$na_percentage, color = Country)) +
        scale_colour_manual(name = "Country",
                            values = c("orangered3", "gold2", "darkorchid3", "navyblue", "turquoise2", "green3", "blue", "red", "black", "gray", "orange", "darkgreen"),guide = guide_legend(override.aes = list(size = 4))) +
        scale_size(name = "Missing data (%)", breaks = c(0, 5, 10, 14), labels = c("0", "5", "10", "15")) +
        xlab(paste0("PC", PCx, " ", "(", explained_variance$PCx, "%", ")")) + 
        ylab(paste0("PC", PCy, " ", "(", explained_variance$PCy, "%", ")"))+
        theme_bw(base_size = 11) +
        theme(legend.position = "right", 
              legend.title.align = 0.5,  # Center the title above the legend
              panel.grid = element_blank(), 
              strip.text = element_text(size = 11),
              plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 12),
              axis.text.y = element_text(size = 12),
              legend.title = element_text(size = 14),
              legend.text = element_text(size = 12),
              legend.key = element_rect(fill = "transparent", color = NA))
      
      print(ggplot_representation)
#Save
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/PCA_indiv_level_color_countries.pdf");print(ggplot_representation);dev.off()      
      
    }
  }
 }
 
#Run the function 
Pca_genetic(data,names,axis,meta_data)

## Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
## 3.5.0.
## ℹ Please use theme(legend.title = element_text(hjust)) instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Pca_genetic <- function(data, names, num_axes, meta_data) {
   
  for (i in 1:(num_axes - 1)) { 
    for (j in (i + 1):num_axes) { 
      
      PCx <- i
      PCy <- j
      
      explained_variance <- data.frame(t(round((data$singular.values[PCx:PCy]^2) * 100, digits = 1)))
      colnames(explained_variance) <- c("PCx", "PCy")
      
      score_Pca_imputed <- data.frame(data$scores[, c(PCx,PCy)], row.names (names))
      colnames(score_Pca_imputed) <- c(paste0("PC", PCx), paste0("PC", PCy), "VCF_ID")
      
      PCa_df_imputed <- merge(score_Pca_imputed, meta_data, "VCF_ID")
      
      ggplot_representation <- ggplot() +
        geom_point(data = PCa_df_imputed, aes(PCa_df_imputed[,2],PCa_df_imputed[,3],size = names$na_percentage, color = Population)) +
        scale_color_manual(name="Country", values=c("#1f77b4","navyblue", "#2ca02c", "#d62728", "#9467bd",
            "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf",
            "blue", "#ffbb78", "#98df8a", "#ff9896", "#c5b0d5",
            "#c49c94", "#f7b6d2", "black", "#dbdb8d", "#9edae5",
            "darkgreen", "#c49c94", "#c7c7c7","orangered3","green3" ,
            "gold2","white", "#ff7f0e","darkorchid3" )
            ,guide = guide_legend(override.aes = list(size = 4))) +
        scale_size(name = "Missing data (%)", breaks = c(0, 5, 10, 14), labels = c("0", "5", "10", "15")) +
        xlab(paste0("PC", PCx, " ", "(", explained_variance$PCx, "%", ")")) + 
        ylab(paste0("PC", PCy, " ", "(", explained_variance$PCy, "%", ")"))+
        theme_bw(base_size = 11) +
        theme(legend.position = "right", 
              legend.title.align = 0.5,  # Center the title above the legend
              panel.grid = element_blank(), 
              strip.text = element_text(size = 11),
              plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 12),
              axis.text.y = element_text(size = 12),
              legend.title = element_text(size = 14),
              legend.text = element_text(size = 12),
              legend.key = element_rect(fill = "transparent", color = NA))
      
      print(ggplot_representation)
      
      #Save
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/PCA_indiv_level_color_populations.pdf");print(ggplot_representation);dev.off()    
    }
  }
}

#run the function 
Pca_genetic(data,names,axis,meta_data)

Without the percentage of missing data

#Meta_data
meta_data_vcf_452 <- meta_data_vcf[meta_data_vcf$VCF_ID %in% row.names(gen_data_new),]

#Initial parameters
data <-Pcadapt_object #Dataframe coming from the return of the screeplot_genetic function 
names <- gen_data_new #Initial dataframe
axis <- 2 #Number of retained axis based on the screeplot
meta_data <- meta_data_vcf_452#Meta data


#Color per country

#For loop to do it for multiple PC axis like 1-2 but also 1-3, 2-3 etc
 Pca_genetic <- function(data, names, num_axes, meta_data) {
   #We can create 2 loops that would perform the Pca_function for each combination of axis retained: 
  for (i in 1:(num_axes - 1)) { #Here, the first loop will select axis between 1 and numb_axis-1 so basically, if we take 3 axis, i will take values of 1 then 2, so PCx will ether be 1 or 2.
    for (j in (i + 1):num_axes) { #Here, the second loop within the first one will take the values of j from i +1 to num axis so if num axis=3, we will have j ranging from 2 to 3 when i=1 and j=3 when i =2
      #In summary, the loops perform the function for all combinations of axes: 1–2, 1–3, and 2–3 when three axes are retained. 
      
      PCx <- i
      PCy <- j
      
      #Calculate explained variance for chosen PCs
      explained_variance <- data.frame(t(round((data$singular.values[PCx:PCy]^2) * 100, digits = 1)))
      colnames(explained_variance) <- c("PCx", "PCy")
      
      #Store the scores along the retained PC axis in a dataframe
      score_Pca_imputed <- data.frame(data$scores[, c(PCx,PCy)], row.names (names))
      colnames(score_Pca_imputed) <- c(paste0("PC", PCx), paste0("PC", PCy), "VCF_ID")
      
      #Add country, population information
      PCa_df_imputed <- merge(score_Pca_imputed, meta_data, "VCF_ID")
      
      #Genetic PCA
      ggplot_representation <- ggplot() +
        geom_point(data = PCa_df_imputed, aes(PCa_df_imputed[,2],PCa_df_imputed[,3], color = Country,size=2.5)) +
        scale_size_identity(guide = "none") +  # Remove size legend
        scale_colour_manual(name = "Country",
                            values = c("orangered3", "gold2", "darkorchid3", "navyblue", "turquoise2", "green3", "blue", "red", "black", "gray", "orange", "darkgreen"),
                            guide = guide_legend(override.aes = list(size = 4))) +  #Match legend +
        xlab(paste0("PC", PCx, " ", "(", explained_variance$PCx, "%", ")")) + 
        ylab(paste0("PC", PCy, " ", "(", explained_variance$PCy, "%", ")"))+
        theme_bw(base_size = 11) +
        theme(legend.position = "right", 
              legend.title.align = 0.5,  #Center the title above the legend
              panel.grid = element_blank(), 
              strip.text = element_text(size = 11),
              plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 12),
              axis.text.y = element_text(size = 12),
              legend.title = element_text(size = 14),
              legend.text = element_text(size = 12),
              legend.key = element_rect(fill = "transparent", color = NA))
      
      print(ggplot_representation)
#Save
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/PCA_indiv_level_color_countries_wo_label_NA.pdf");print(ggplot_representation);dev.off()      
      
    }
  }
 }
 
#Run the function 
Pca_genetic(data,names,axis,meta_data)

3.2 Population-level

Finally, we calculated PCA at the population level to obtain population scores, which are used to account for population structure in pRDA GEA analyses. Because allelic frequencies were used, the PCADAPT function could not be applied, as it does not support allelic frequency data. Instead, we used the RDA function, which performs PCA equivalently when no covariates are included.

As a first step, we checked the number of principal components (PCs) to retain.

load("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/genetic_new/data_allelic_frequencies_non_imp_T_adapcon_gentree_452_8252snps.Rdata")
gen_data_new <- data_allelic_frequencies_non_imp_T_adapcon_gentree_452_8252snps
data <- gen_data_new
k <- 10

screeplot_genetic <- function(data, k){
  
  #Perform PCA
  RDA_structure_genetic <- rda(data, scale = TRUE)
  
  #Compute explained variance
  explained_variance <- (RDA_structure_genetic$CA$eig / sum(RDA_structure_genetic$CA$eig)) * 100
  explained_variance_df <- data.frame(PC = seq_along(explained_variance), Variance = explained_variance)
  
  #Create scree plot using explained variance
  screeplot_graph <- ggplot(explained_variance_df[1:k, ], aes(x = factor(PC), y = Variance)) +
    geom_bar(stat = "identity", fill = "darkgrey") +
    labs(title = "PCA Scree Plot", x = "Principal Component", y = "Explained Variance (%)") +
    theme_minimal(base_size = 14) +
    theme(plot.title = element_text(hjust = 0.5, face = "bold"))
  
  #Save the plot
  ggsave("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/screeplot_PCA_Pop_level.pdf",
         plot = screeplot_graph, width = 6, height = 4)
  
  return(screeplot_graph)
}

#Run the function and save the object
Pcadapt_object <- screeplot_genetic(data, 10)

#Display the plot
print(Pcadapt_object)

RDA_structure_genetic <- rda(data, scale=T)

Interpretation: The first two axes were retained for population-level PCA.

PCA at the population level:

meta_data_vcf_452 <- meta_data_vcf[meta_data_vcf$Population %in% row.names(gen_data_new),]

#Initial parameters
#data <-Pcadapt_object #dataframe coming from the return of the screeplot_genetic function 

axis <- 2 #Number of retained axis based on the screeplot
meta_data <- meta_data_vcf_452#Meta data

#For loop to do it for multiple PC axis like 1-2 but also 1-3, 2-3 etc
 Pca_genetic <- function(data, names, num_axes, meta_data) {
   #We can create 2 loops that would perform the Pca_function for each combination of axis retained: 
  for (i in 1:(num_axes - 1)) { #Here, the first loop will select axis between 1 and numb_axis-1 so basically, if we take 3 axis, i will take values of 1 then 2, so PCx will ether be 1 or 2.
    for (j in (i + 1):num_axes) { #Here, the second loop within the first one will take the values of j from i +1 to num axis so if num axis=3, we will have j ranging from 2 to 3 when i=1 and j=3 when i =2
      #In summary, the loops perform the function for all combinations of axes: 1–2, 1–3, and 2–3 when three axes are retained. 
      
      PCx <- i
      PCy <- j
      
      #Calculate explained variance for chosen PCs
      explained_variance <- data.frame(eigenvals(RDA_structure_genetic))
      proportion_variance <- data.frame(t(explained_variance[c(PCx,PCy),] / sum(explained_variance) *100)) %>% round(digit=1)
      colnames(proportion_variance) <- c("PCx", "PCy")
      
      #Store the scores along the retained PC axis in a dataframe
      store_score_rda <- scores(RDA_structure_genetic, axes = c(PCx, PCy),display="sites" ) %>% data.frame(row.names(data))
      colnames(store_score_rda) <- c(paste0("PC", PCx), paste0("PC", PCy), "Population")
      
      #Add country, population information
      PCa_score_T_adapcon_gentree <- merge(store_score_rda, meta_data, "Population") %>% group_by(Population) %>% slice_head(n = 1) %>% ungroup() %>% as.data.frame()  #Keep only the first row of each groups

      #Genetic PCA
      ggplot_representation <- ggplot() +
        geom_point(data = PCa_score_T_adapcon_gentree,   
                   aes(PCa_score_T_adapcon_gentree[,2], 
                       PCa_score_T_adapcon_gentree[,3], 
                       size = 4, 
                       color = PCa_score_T_adapcon_gentree$Country)) +
        scale_size_identity(guide = "none") +  #Remove size legend
        scale_colour_manual(name = "Country",
                            values = c("orangered3", "gold2", "darkorchid3", "navyblue", 
                                       "turquoise2", "green3", "blue", "red", "black", 
                                       "gray", "orange", "darkgreen"),
                            guide = guide_legend(override.aes = list(size = 4))) +  #Match legend point size to plot
        xlab(paste0("PC", PCx, " ", "(", proportion_variance$PCx, "%", ")")) + 
        ylab(paste0("PC", PCy, " ", "(", proportion_variance$PCy, "%", ")")) +
        theme_bw(base_size = 11) +
        theme(legend.position = "right", 
              legend.title.align = 0.5,  #Center the title above the legend
              panel.grid = element_blank(), 
              strip.text = element_text(size = 11),
              plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
              axis.title.x = element_text(size = 14),
              axis.title.y = element_text(size = 14),
              axis.text.x = element_text(size = 12),
              axis.text.y = element_text(size = 12),
              legend.title = element_text(size = 14),
              legend.text = element_text(size = 12),
              legend.key = element_rect(fill = "transparent", color = NA))

print(ggplot_representation)

 #Save
pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/PCA/PCA_Pop_level.pdf");print(ggplot_representation);dev.off()

      return(PCa_score_T_adapcon_gentree)
    }
  }
 }
#Run the function 
PCa_score_T_adapcon_gentree<-Pca_genetic(data,names,axis,meta_data)

Finally, the table of population scores along the PCA axes was saved for use in variance partitioning and GEA pRDA analyses.

Overall interpretation: The patterns observed at the population level are nearly identical to those found in the individual-level PCA. This consistency is expected when genetic structure is found across populaitons and provides a check for potential errors during the calculation of allelic frequencies or other processing steps.

4 STRUCTURE software

The next step involved analysing population structure using STRUCTURE, a Bayesian clustering approach that employs Markov Chain Monte Carlo (MCMC) iterations. Initially, individuals are randomly assigned to K groups, where K is the number of clusters chosen. Allele frequencies are then estimated for each group, and individual membership coefficients (Q-values) are calculated. Individuals are re-assigned to groups based on their Q-values, and each MCMC iteration updates allele frequencies and Q-values to maximize the likelihood of the data.

Because the initial assignments are random, a burn-in period is applied to allow the MCMC to stabilize before calculating final Q-values. Subsequent iterations after burn-in are used to compute the mean Q-values for each individual. Multiple runs for each K can be performed to ensure convergence and consistency of results across runs.

We performed the analysis in Genotoul cluster using:
- 8252 SNPs
- 452 indiv
- K between 2 and 10
- 100 000 burning
- 500 000 iterations (after the burning)
- 10 MCMC per K

Subsequently, we used STRUCTURE Harvester on Linux to estimate the optimal number of clusters (K) using the Evanno method and log-likelihood [ln(K)] values, and to output results in CLUMPP format. Because group labels (1, 2, etc.) may vary across runs for the same K, we used CLUMPP to reassign individuals to consistent group labels, enabling calculation of mean Q-values per individual across runs.

Finally, we visualised the results using the CLUMPAK web interface, which allowed plotting of delta K (Evanno method) and ln(K) values, as STRUCTURE Harvester does not provide graphical outputs. Traditional STRUCTURE plots showing individual Q-values were also generated. In this script, our focus was to plot the mean Q-value per population on a map to visualize population structure.

The first step involved loading the order of individuals from the STRUCTURE analysis to correctly assign their Q-values.

name <- read.table("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/data_split_try.txt")

Next, we calculated the mean Q-value per population for each K and visualised the results on a map.

for(i in 2:10){
  input <- paste0("K",i)
  data <- read.table(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/",input,"_mean.txt"))
  
  k_full <- data.frame(name[,c(1)],data[,-c(1:5)])
  colnames(k_full) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  
  #To maintain consistency in the colors of the main groups across different K values, columns representing the groups were ordered consistently for each K based on the values in the first row. This ensures that the same group occupies the same position and color across visualizations, preserving consistency for the main gene pools.
  
  sorted_columns <- names(k_full[,-1])[order(-apply(k_full[,-1], 2, function(x) x[1]))]

#Reorder columns based on the maximum value to keep consistency with the color of the groups
k_full_bis <- k_full[, sorted_columns, drop = FALSE]

#add ID again
k_full_final <- data.frame(k_full[,1],k_full_bis);colnames(k_full_final) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  
  k_with_pop <- merge(k_full_final,meta_data_vcf,"VCF_ID") %>% merge(meta_data_pop,"Population")
  
  k_final <- k_with_pop %>% dplyr::select(-c("Country.y","N","Elevation.DEM_90m."))
  
  mean_admixture_pop <- k_final %>% 
   group_by(Population) %>% 
  mutate(across(where(is.numeric), mean)) %>% 
  slice_head(n = 1) %>% 
  ungroup()
  
  mean_admixture_pop$Longitude=as.numeric(mean_admixture_pop$Longitude);mean_admixture_pop$Latitude=as.numeric(mean_admixture_pop$Latitude)
  
  mean_admixture_pop$Population_bis <- c("Anc","Blind","Bohin","Brank","Buja_A","Buja_G","Cardo","Castle","Fonte","Foresta","Gorenj","Harm","Horn","Jura","Cholo","Olym","Vour","Pater","Rasc","Rouds","Baume","Saro","Serra","Sueve","Unska","Valdi","Vise","Wal","Yew")
  
  groups <- paste0("Group",6:ncol(data)-5)
  admin <- ne_countries(scale = "medium", returnclass = "sf")
my.colors<- c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd","#7f7f7f","#bcbd22", "#c49c94", "#f7b6d2", "darkgreen")
#Plot the map with scatterpie
map_pop <- ggplot() + 
  geom_sf(data = admin, fill = gray(0.92), size = 0) +#Background of the map
  geom_scatterpie(
    data = mean_admixture_pop,
    aes(x = Longitude, y = Latitude),
    cols = groups, #Adjust columns accordingly to the groups
    size = 0,#Size of the bold (=gras)
    alpha = 1, #Transparence
  ) +
  geom_text(
   data = mean_admixture_pop,
    aes(x = Longitude, y = Latitude, label = meta_data_pop_order$pop_abbrev,fontface = "bold"),
    size = 3, #Size of the label text
    vjust = 2 #Vertical justification for the label
  ) +
  scale_fill_manual(values=my.colors)+
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +#Extension of the map
  annotation_scale(location = "br", width_hint = 0.3) + #Scale bar at bottom left
  annotation_north_arrow(location = "tl", which_north = "true", 
                         pad_x = unit(-0.2, "cm"), pad_y = unit(0.08, "cm"),
                         style = north_arrow_fancy_orienteering) + #Compass rose
  theme_minimal() +
  labs(title = paste0("Genetic Structure for"," ", input," ", "groups"),fill="STRUCTURE groups")

print(map_pop)
 #Save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/Structure/Map_",input,"_STRUCTURE.pdf"));print(map_pop);dev.off()
}

Interpretation: Based on the initial CLUMPAK results and biological relevance, we originally selected K = 3, as it appeared to capture the largest pattern of population structure. This K value was intended for imputing missing genomic data within the three groups. However, after including K = 1 in the analysis, it became evident that the previous procedure had not tested K = 2. With this correction, the best-supported number of clusters was found to be K = 2, rather than 3.

library(ggplot2)
library(sf)
library(scatterpie)
library(dplyr)
library(ggrepel)
library(rnaturalearth)
library(rnaturalearthdata)

## 
## Attachement du package : 'rnaturalearthdata'

## L'objet suivant est masqué depuis 'package:rnaturalearth':
## 
##     countries110

#Load map
admin <- ne_countries(scale = "medium", returnclass = "sf")

#Target projection (ETRS89 / LAEA Europe - EPSG:3035)
lambert_crs <- 3035

#Project background map
admin_proj <- st_transform(admin, crs = lambert_crs)

#Prepare STRUCTURE data
K <- 2
input <- paste0("K", K)
data <- read.table(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/", input, "_mean.txt"))

k_full <- data.frame(name[, 1], data[, -c(1:5)])
colnames(k_full) <- c("VCF_ID", paste0("Group", 1:(ncol(k_full) - 1)))
sorted_columns <- names(k_full[, -1])[order(-apply(k_full[, -1], 2, function(x) x[1]))]
k_full_bis <- k_full[, sorted_columns, drop = FALSE]
k_full_final <- data.frame(k_full[, 1], k_full_bis)
colnames(k_full_final) <- c("VCF_ID", paste0("Group", 1:(ncol(k_full_final) - 1)))

k_with_pop <- merge(k_full_final, meta_data_vcf, by = "VCF_ID") %>%
  merge(meta_data_pop, by = "Population") %>%
  select(-c("Country.y", "N", "Elevation.DEM_90m."))

mean_admixture_pop <- k_with_pop %>%
  group_by(Population) %>%
  mutate(across(where(is.numeric), mean)) %>%
  slice_head(n = 1) %>%
  ungroup()

mean_admixture_pop$Longitude <- as.numeric(mean_admixture_pop$Longitude)
mean_admixture_pop$Latitude <- as.numeric(mean_admixture_pop$Latitude)

#label cleanup
mean_admixture_pop$Population_bis <- c("Anc", "Blind", "Bohin", "Brank", "Buja_A", "Buja_G",
                                       "Cardo", "Castle", "Fonte", "Foresta", "Gorenj", "Harm",
                                       "Horn", "Jura", "Cholo", "Olym", "Vour", "Pater", "Rasc",
                                       "Rouds", "Baume", "Saro", "Serra", "Sueve", "Unska", "Valdi",
                                       "Vise", "Wal", "Yew")[1:nrow(mean_admixture_pop)]

#Convert to sf and project
pies_sf <- st_as_sf(mean_admixture_pop, coords = c("Longitude", "Latitude"), crs = 4326)
pies_proj <- st_transform(pies_sf, crs = lambert_crs)
coords <- st_coordinates(pies_proj)

mean_admixture_pop$x <- coords[, 1]
mean_admixture_pop$y <- coords[, 2]

#Group names
groups <- grep("^Group", colnames(mean_admixture_pop), value = TRUE)

#Colors
my.colors <- c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
               "#7f7f7f", "#bcbd22", "#c49c94", "#f7b6d2", "darkgreen")


plot <- ggplot() +
  geom_sf(data = admin_proj, fill = gray(0.90), color = "white", size = 0.2) +
  geom_sf(data = shapefile, fill = "#d9f2d9", color = "gray60", size = 0.2) +  #Adding shapefile
   geom_scatterpie(
    data = mean_admixture_pop,
    aes(x = x, y = y),
    cols = groups,
    pie_scale = 1.2  #Increase this value for larger pies
  ) +
  #geom_text_repel(
   # data = mean_admixture_pop,
   # aes(x = x, y = y, label = Population_bis),
   # size = 3
  #) +
  scale_fill_manual(values = my.colors) +
 coord_sf(crs = lambert_crs,
         xlim = c(2379349, 7002838),
         ylim = c(1029721, 4519987),
         expand = FALSE)+
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "azure", color = NA),
    plot.title = element_text(face = "bold", hjust = 0.5)
  ) +
  labs(
    title = paste0("Genetic Structure for ", input, " groups"),
    fill = "STRUCTURE groups"
  )
print(plot)

pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/GEA_new_var/Structure/Map_chapter2_",input,"_STRUCTURE.pdf"));print(plot);dev.off()

## png 
##   2

We also generated STRUCTURE barplots to visualize the contribution of each individual to the inferred clusters within populations.

for(i in 2:3){
  input <- paste0("K", i)

  #Load STRUCTURE results
  data <- read.table(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/", input, "_mean.txt"))
  
  k_full <- data.frame(name[,c(1)], data[,-c(1:5)])
  colnames(k_full) <- c("VCF_ID", paste0("Group", 1:(ncol(k_full)-1)))  #Adjust for group numbering
  
  #Reorder groups based on first row's values for color consistency
  sorted_columns <- names(k_full[,-1])[order(-apply(k_full[,-1], 2, function(x) x[1]))]
  k_full_bis <- k_full[, sorted_columns, drop = FALSE]

  #Add ID again
  k_full_final <- data.frame(k_full[,1], k_full_bis)
  colnames(k_full_final) <- c("VCF_ID", paste0("Group", 1:(ncol(data)-5)))
  
  #Merge with metadata, using population abbreviations from meta_data_pop_order
  k_with_pop <- merge(k_full_final, meta_data_vcf, by = "VCF_ID") %>% 
    merge(meta_data_pop, by = "Population") %>%
    merge(meta_data_pop_order[,c(2,7)], by = "Population")

  k_final <- k_with_pop %>% 
    dplyr::select(-c("Country.y", "N", "Elevation.DEM_90m."))

  #Identify the major group for each individual (the group with highest ancestry)
  k_final$Assigned_Group <- apply(k_final %>% select(starts_with("Group")), 1, function(x) {
    paste0("Group", which.max(x))  #Assign the group with the highest proportion
  })

  #Order individuals by Assigned Group, then by Population
  k_final <- k_final %>%
    arrange(Assigned_Group, Population)

  #Create a population label that appears only once per group
  k_final <- k_final %>%
    group_by(Assigned_Group, Population) %>%
    mutate(Population_Label = ifelse(row_number() == 1, as.character(pop_abbrev), "")) %>%  #Use abbreviation
    ungroup()

  #Convert to long format for ggplot
  k_long <- k_final %>%
    pivot_longer(cols = starts_with("Group"), names_to = "Group", values_to = "Admixture") %>%
    mutate(Population = factor(Population, levels = unique(k_final$Population)))  #Ensure correct order
  
  my.colors <- c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#7f7f7f", "#bcbd22", "#c49c94", "#f7b6d2", "darkgreen")

  #Create the STRUCTURE bar plot
  structure_plot <- ggplot(k_long, aes(x = factor(VCF_ID, levels = k_final$VCF_ID), y = Admixture, fill = Group)) +
    geom_bar(stat = "identity", width = 1) +
    scale_fill_manual(values = my.colors) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),  #Angled text for bias effect
          axis.ticks.x = element_blank(),
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank()) +
    scale_x_discrete(labels = k_final$Population_Label) +  #Show population codes instead of country
    labs(title = paste0("STRUCTURE Barplot for ", input, " Groups"),
         x = "Populations", y = "Admixture Proportion", fill = "STRUCTURE Groups")

  print(structure_plot)
#Save
pdf(paste0("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/figures/STRUCTURE_barplotK",i,".pdf"));print(structure_plot);dev.off()
}

Finally, we saved the STRUCTURE scores for K = 3.

#Run the function only for K=3 and save the resulting dataframe
data <- read.table("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/K3_mean.txt")
  
  k_full <- data.frame(name[,c(1)],data[,-c(1:5)])
  colnames(k_full) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  
#Ensure consistent colors for main groups across K values
#Order columns (groups) the same way for each K using the first row values
#This keeps group positions consistent and maintains color consistency across main gene pools
  
  sorted_columns <- names(k_full[,-1])[order(-apply(k_full[,-1], 2, function(x) x[1]))]

#Reorder columns based on the maximum value to keep consistency with the color of the groups
k_full_bis <- k_full[, sorted_columns, drop = FALSE]

#Add ID again
k_full_final <- data.frame(k_full[,1],k_full_bis);colnames(k_full_final) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  k_with_pop <- merge(k_full_final,meta_data_vcf,"VCF_ID") %>% merge(meta_data_pop,"Population")
  k_final <- k_with_pop %>% dplyr::select(-c("Country.y","N","Elevation.DEM_90m."))
  
  mean_admixture_pop <- k_final %>% 
   group_by(Population) %>% 
  mutate(across(where(is.numeric), mean)) %>% 
  slice_head(n = 1) %>% 
  ungroup() %>% 
  mutate(across(where(is.numeric),scale))
  
  Cluster_score_STRUCTURE_T_Adapcon_gentree <- mean_admixture_pop[,-c(2,6:8)]
  
  save(Cluster_score_STRUCTURE_T_Adapcon_gentree,file="C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/GEA/Structure_proxy/Cluster_score_STRUCTURE_T_Adapcon_gentree.Rdata")
  
write_xlsx(k_final,"C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/GEA/Structure_proxy/k_final.xlsx")

#Run the function only for K=3 and save the resulting dataframe
data <- read.table("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/STRUCTURE/clumpp_output/K2_mean.txt")
  
  k_full <- data.frame(name[,c(1)],data[,-c(1:5)])
  colnames(k_full) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  
#Ensure consistent colors for main groups across K values
#Order columns (groups) the same way for each K using the first row values
#This keeps group positions consistent and maintains color consistency across main gene pools
  
  sorted_columns <- names(k_full[,-1])[order(-apply(k_full[,-1], 2, function(x) x[1]))]

#Reorder columns based on the maximum value to keep consistency with the color of the groups
k_full_bis <- k_full[, sorted_columns, drop = FALSE]

#Add ID again
k_full_final <- data.frame(k_full[,1],k_full_bis);colnames(k_full_final) <- c("VCF_ID", paste0("Group",6:ncol(data)-5))
  k_with_pop <- merge(k_full_final,meta_data_vcf,"VCF_ID") %>% merge(meta_data_pop,"Population")
  k_final <- k_with_pop %>% dplyr::select(-c("Country.y","N","Elevation.DEM_90m."))
  
  mean_admixture_pop <- k_final %>% 
   group_by(Population) %>% 
  mutate(across(where(is.numeric), mean)) %>% 
  slice_head(n = 1) %>% 
  ungroup() %>% 
  mutate(across(where(is.numeric),scale))
  
  Cluster_score_STRUCTURE_T_Adapcon_gentreeK2 <- mean_admixture_pop[,-c(2,5:7)]
  
  save(Cluster_score_STRUCTURE_T_Adapcon_gentreeK2,file="C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/GEA/Structure_proxy/Cluster_score_STRUCTURE_T_Adapcon_gentreeK2.Rdata")
  
write_xlsx(k_final,"C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/GEA/Structure_proxy/k2_final.xlsx")

5 Fst

We mapped the FST values across populations using the European environmental classification to visualise spatial patterns of genetic differentiation. FST was calculated using the population-specific divergence index as implemented in BayeScan.

meta_data_pop <- read.csv("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/Populations/taxus_sample_29pop.csv",h=T,sep=";",dec=",")

#Alphabetic order
meta_data_pop_order <- meta_data_pop[order(meta_data_pop$Population),]
meta_data_pop_order$Latitude <- as.numeric(meta_data_pop_order$Latitude);meta_data_pop_order$Longitude <- as.numeric(meta_data_pop_order$Longitude)

load("C:/Users/tfrancisco/Documents/Thesis/Results/all_species/FST_index/Bayescan_FST_pop_order_Taxus_bis.Rdata")

meta_data_pop_order$pop_abbrev <- c("ANC","BLI","BOM","BRA","BUJA","BUJG","CAR","CED","PDT","UMB","GOR","HAR","HHR","JUR","CHO","OLY","VOU","PAT","RAS","RWM","BAU","SAV","SDF","SUE","UNS","VAL","VIS","WAL","YEW")

data_Fst <- merge(meta_data_pop_order,Bayescan_FST_pop_order_Taxus_bis,"Population")

We overlaid the map with the environmental stratification from Metzger (2018), downloaded from the EEA Geospatial Data Catalogue.

library(ggplot2)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)

#Load shapefile
ens <- st_read("C:/Users/tfrancisco/Documents/Thesis/Data/Species/Taxus_baccata/enviro_map/EnSv8/ens_v8.shp")

## Reading layer `ens_v8' from data source 
##   `C:\Users\tfrancisco\Documents\Thesis\Data\Species\Taxus_baccata\enviro_map\EnSv8\ens_v8.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 84 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 2636000 ymin: 1217000 xmax: 6526000 ymax: 5421000
## Projected CRS: ETRS89-extended / LAEA Europe

zone_colors <- c(
  "Alpine North"            = "#A020F0",  # purple
  "Boreal"                  = "#006400",
  "Nemoral"                 = "#9ACD32",
  "Atlantic North"          = "#00CED1",
  "Alpine South"            = "#AFEEEE",
  "Continental"             = "#32CD32",
  "Atlantic Central"        = "#87CEFA",
  "Pannonian"               = "#556B2F",
  "Lusitanian"              = "#4682B4",
  "Anatolian"               = "#A52A2A",
  "Mediterranean Mountains"= "#D2B48C",
  "Mediterranean North"    = "#FFA500",
  "Mediterranean South"    = "#8B4513"
)

code_to_name <- c("ALN" = "Alpine North", 
                  "BOR" = "Boreal",
                  "NEM" = "Nemoral",
                  "ATN" = "Atlantic North",
                  "ALS" = "Alpine South",
                  "CON" = "Continental",
                  "ATC" = "Atlantic Central",
                  "PAN" = "Pannonian",
                  "LUS" = "Lusitanian",
                  "ANA" = "Anatolian",
                  "MDM" = "Mediterranean Mountains",
                  "MDN" = "Mediterranean North",
                  "MDS" = "Mediterranean South")

#Reduce climates
code_to_name <- c("ALN" = "Alpine", 
                  "BOR" = "Boreo-Nemoral",
                  "NEM" = "Boreo-Nemoral",
                  "ATN" = "Atlantic",
                  "ALS" = "Alpine",
                  "CON" = "Continental",
                  "ATC" = "Atlantic",
                  "PAN" = "Missing",
                  "LUS" = "Atlantic",
                  "ANA" = "Missing",
                  "MDM" = "Mediterranean Mountains",
                  "MDN" = "Mediterranean",
                  "MDS" = "Mediterranean")

zone_colors <- c(
  "Alpine"            = "#A020F0",  # purple
  "Boreo-Nemoral"                  = "#AFEEEE",
  "Atlantic"          = "#00CED1",
  "Continental"             = "#32CD32",
  "Mediterranean Mountains"= "#8B4513",
  "Mediterranean"    = "#FFD700",
    "Missing" = "white"

)


ens$EnS_name <- sapply(substr(ens$EnS_name, 1, 3), function(code) code_to_name[code])

ens$EnS_name <- factor(ens$EnS_name, levels = c(
  "Alpine", "Boreo-Nemoral", "Atlantic", "Continental", 
  "Mediterranean Mountains", "Mediterranean","Missing"
))

#Load country borders for context map
admin <- ne_countries(scale = "medium", returnclass = "sf")

plot <- ggplot() + 
  geom_sf(data = admin, fill = gray(0.92), size = 0.1) +  #Background countries
  geom_sf(data = ens, aes(fill = EnS_name), color = "black", size = 0.1) +  # Environmental zones
  scale_fill_manual(
    values = zone_colors,
    name = "Environmental Zones"
  ) +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +  # Crop to Europe
  xlab("Longitude") + ylab("Latitude") +
  ggtitle("Environmental Zones of Europe") +
  geom_point(data = data_Fst, 
           aes(x = Longitude, y = Latitude, size = Fst), 
           shape = 21, fill = "red", color = "black", alpha = 1)+
  theme_bw(base_size = 11) +
  theme(
    legend.position = "right",
    panel.grid = element_blank(),
    strip.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
  )

print(plot)

plot <- ggplot() + 
  geom_sf(data = admin, fill = gray(0.92), size = 0.1) +
  geom_sf(data = ens, aes(fill = EnS_name), color = NA, size = 0.1, alpha = 0.30) +
  scale_fill_manual(values = zone_colors, name = "Environmental Zones") +
  geom_point(data = data_Fst, 
             aes(x = Longitude, y = Latitude, size = Fst), 
             shape = 21, fill = "red", color = "black", alpha = 1) +
  scale_size_continuous(
    name = "Fst",
    breaks = range(data_Fst$Fst),
    labels = c("0.01", "0.27")
  ) +
  geom_text(data = data_Fst, aes(x = Longitude, y = Latitude+0.7, label = pop_abbrev,fontface = "bold"), size = 3.0, color = "black",fontface = "bold") +
  coord_sf(xlim = c(-10, 30), ylim = c(35, 62), expand = FALSE) +
  xlab("Longitude") + ylab("Latitude") +
  ggtitle("Environmental Zones of Europe") +
  annotation_scale(location = "br", width_hint = 0.3) + # Scale bar at bottom left
  annotation_north_arrow(location = "tl", which_north = "true", 
                         pad_x = unit(-0.2, "cm"), pad_y = unit(0.08, "cm"),
                         style = north_arrow_fancy_orienteering) + # Compass rose
  theme_bw(base_size = 11) +
  theme(
    legend.position = "right",
    panel.grid = element_blank(),
    strip.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, color = "Black", face = "italic")
  )

print(plot)

## Scale on map varies by more than 10%, scale bar may be inaccurate

pdf("C:/Users/tfrancisco/Documents/Thesis/Results/species/taxus/map_sampled/Envi_zones_and_Fst_new.pdf");print(plot);dev.off()

## Scale on map varies by more than 10%, scale bar may be inaccurate

## png 
##   2

devtools::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31 ucrt)
##  os       Windows 11 x64 (build 26100)
##  system   x86_64, mingw32
##  ui       RTerm
##  language (EN)
##  collate  French_France.utf8
##  ctype    French_France.utf8
##  tz       Europe/Paris
##  date     2025-09-22
##  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package           * version    date (UTC) lib source
##  bigassertr          0.1.6      2023-01-10 [1] CRAN (R 4.3.2)
##  bigutilsr           0.3.4      2021-04-13 [1] CRAN (R 4.3.2)
##  bslib               0.6.1      2023-11-28 [1] CRAN (R 4.3.2)
##  cachem              1.0.8      2023-05-01 [1] CRAN (R 4.3.1)
##  class               7.3-22     2023-05-03 [2] CRAN (R 4.3.2)
##  classInt            0.4-10     2023-09-05 [1] CRAN (R 4.3.2)
##  cli                 3.6.1      2023-03-23 [1] CRAN (R 4.3.1)
##  cluster             2.1.6      2023-12-01 [2] CRAN (R 4.3.2)
##  codetools           0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace          2.1-0      2023-01-23 [1] CRAN (R 4.3.1)
##  crayon              1.5.3      2024-06-20 [1] CRAN (R 4.3.3)
##  DBI                 1.2.2      2024-02-16 [1] CRAN (R 4.3.2)
##  devtools            2.4.5      2022-10-11 [1] CRAN (R 4.3.3)
##  digest              0.6.33     2023-07-07 [1] CRAN (R 4.3.1)
##  dplyr             * 1.1.4      2023-11-17 [1] CRAN (R 4.3.2)
##  e1071               1.7-14     2023-12-06 [1] CRAN (R 4.3.2)
##  ellipsis            0.3.2      2021-04-29 [1] CRAN (R 4.3.1)
##  evaluate            0.23       2023-11-01 [1] CRAN (R 4.3.2)
##  farver              2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
##  fastmap             1.1.1      2023-02-24 [1] CRAN (R 4.3.1)
##  fs                  1.6.3      2023-07-20 [1] CRAN (R 4.3.1)
##  generics            0.1.4      2025-05-09 [1] CRAN (R 4.3.2)
##  ggforce             0.4.1      2022-10-04 [1] CRAN (R 4.3.2)
##  ggfun               0.1.4      2024-01-19 [1] CRAN (R 4.3.2)
##  ggplot2           * 3.5.2      2025-04-09 [1] CRAN (R 4.3.2)
##  ggrepel           * 0.9.5      2024-01-10 [1] CRAN (R 4.3.2)
##  ggspatial         * 1.1.9      2023-08-17 [1] CRAN (R 4.3.3)
##  ggstar            * 1.0.4      2022-11-08 [1] CRAN (R 4.3.3)
##  glue                1.7.0      2024-01-09 [1] CRAN (R 4.3.2)
##  gridExtra           2.3        2017-09-09 [1] CRAN (R 4.3.3)
##  gtable              0.3.6      2024-10-25 [1] CRAN (R 4.3.3)
##  highr               0.10       2022-12-22 [1] CRAN (R 4.3.1)
##  htmltools           0.5.7      2023-11-03 [1] CRAN (R 4.3.2)
##  htmlwidgets         1.6.4      2023-12-06 [1] CRAN (R 4.3.2)
##  httpuv              1.6.13     2023-12-06 [1] CRAN (R 4.3.2)
##  httr                1.4.7      2023-08-15 [1] CRAN (R 4.3.2)
##  jquerylib           0.1.4      2021-04-26 [1] CRAN (R 4.3.1)
##  jsonlite            1.8.8      2023-12-04 [1] CRAN (R 4.3.2)
##  KernSmooth          2.23-22    2023-07-10 [2] CRAN (R 4.3.2)
##  knitr               1.45       2023-10-30 [1] CRAN (R 4.3.2)
##  labeling            0.4.3      2023-08-29 [1] CRAN (R 4.3.1)
##  later               1.3.2      2023-12-06 [1] CRAN (R 4.3.2)
##  lattice           * 0.22-5     2023-10-24 [2] CRAN (R 4.3.2)
##  LEA               * 3.14.0     2023-10-24 [1] Bioconductor
##  lifecycle           1.0.4      2023-11-07 [1] CRAN (R 4.3.2)
##  magrittr            2.0.3      2022-03-30 [1] CRAN (R 4.3.1)
##  MASS                7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.2)
##  Matrix              1.6-5      2024-01-11 [1] CRAN (R 4.3.2)
##  memoise             2.0.1      2021-11-26 [1] CRAN (R 4.3.1)
##  mgcv                1.9-1      2023-12-21 [2] CRAN (R 4.3.2)
##  mime                0.12       2021-09-28 [1] CRAN (R 4.3.1)
##  miniUI              0.1.1.1    2018-05-18 [1] CRAN (R 4.3.2)
##  munsell             0.5.1      2024-04-01 [1] CRAN (R 4.3.3)
##  nlme                3.1-164    2023-11-27 [2] CRAN (R 4.3.2)
##  pcadapt           * 4.3.5      2023-08-29 [1] CRAN (R 4.3.2)
##  permute           * 0.9-7      2022-01-27 [1] CRAN (R 4.3.2)
##  pillar              1.10.2     2025-04-05 [1] CRAN (R 4.3.3)
##  pkgbuild            1.4.3      2023-12-10 [1] CRAN (R 4.3.2)
##  pkgconfig           2.0.3      2019-09-22 [1] CRAN (R 4.3.2)
##  pkgload             1.3.4      2024-01-16 [1] CRAN (R 4.3.2)
##  polyclip            1.10-6     2023-09-27 [1] CRAN (R 4.3.1)
##  profvis             0.3.8      2023-05-02 [1] CRAN (R 4.3.2)
##  promises            1.2.1      2023-08-10 [1] CRAN (R 4.3.2)
##  proxy               0.4-27     2022-06-09 [1] CRAN (R 4.3.2)
##  purrr               1.0.2      2023-08-10 [1] CRAN (R 4.3.2)
##  R6                  2.6.1      2025-02-15 [1] CRAN (R 4.3.3)
##  ragg                1.2.7      2023-12-11 [1] CRAN (R 4.3.2)
##  RColorBrewer      * 1.1-3      2022-04-03 [1] CRAN (R 4.3.1)
##  Rcpp                1.0.11     2023-07-06 [1] CRAN (R 4.3.1)
##  remotes             2.5.0      2024-03-17 [1] CRAN (R 4.3.3)
##  rlang               1.1.3      2024-01-10 [1] CRAN (R 4.3.2)
##  rmarkdown           2.25       2023-09-18 [1] CRAN (R 4.3.1)
##  rnaturalearth     * 1.0.1      2023-12-15 [1] CRAN (R 4.3.2)
##  rnaturalearthdata * 1.0.0      2024-02-09 [1] CRAN (R 4.3.2)
##  RSpectra            0.16-1     2022-04-24 [1] CRAN (R 4.3.2)
##  rstudioapi          0.15.0     2023-07-07 [1] CRAN (R 4.3.2)
##  sass                0.4.8      2023-12-06 [1] CRAN (R 4.3.2)
##  scales              1.3.0      2023-11-28 [1] CRAN (R 4.3.2)
##  scatterpie        * 0.2.1      2023-06-07 [1] CRAN (R 4.3.2)
##  sessioninfo         1.2.2      2021-12-06 [1] CRAN (R 4.3.2)
##  sf                * 1.0-15     2023-12-18 [1] CRAN (R 4.3.2)
##  shiny               1.8.0      2023-11-17 [1] CRAN (R 4.3.2)
##  stringi             1.8.3      2023-12-11 [1] CRAN (R 4.3.2)
##  stringr             1.5.1      2023-11-14 [1] CRAN (R 4.3.2)
##  systemfonts         1.0.5      2023-10-09 [1] CRAN (R 4.3.2)
##  terra               1.7-71     2024-01-31 [1] CRAN (R 4.3.2)
##  textshaping         0.3.7      2023-10-09 [1] CRAN (R 4.3.2)
##  tibble              3.2.1      2023-03-20 [1] CRAN (R 4.3.2)
##  tidyr             * 1.3.1      2024-01-24 [1] CRAN (R 4.3.2)
##  tidyselect          1.2.1      2024-03-11 [1] CRAN (R 4.3.3)
##  tweenr              2.0.2      2022-09-06 [1] CRAN (R 4.3.2)
##  units               0.8-5      2023-11-28 [1] CRAN (R 4.3.2)
##  urlchecker          1.0.1      2021-11-30 [1] CRAN (R 4.3.2)
##  usethis             2.2.3      2024-02-19 [1] CRAN (R 4.3.2)
##  vctrs               0.6.5      2023-12-01 [1] CRAN (R 4.3.2)
##  vegan             * 2.6-4      2022-10-11 [1] CRAN (R 4.3.2)
##  withr               3.0.2      2024-10-28 [1] CRAN (R 4.3.3)
##  writexl           * 1.5.0      2024-02-09 [1] CRAN (R 4.3.2)
##  xfun                0.41       2023-11-01 [1] CRAN (R 4.3.2)
##  xtable              1.8-4      2019-04-21 [1] CRAN (R 4.3.2)
##  yaml                2.3.8      2023-12-11 [1] CRAN (R 4.3.2)
## 
##  [1] C:/Users/tfrancisco/AppData/Local/R/win-library/4.3
##  [2] C:/Users/tfrancisco/AppData/Local/Programs/R/R-4.3.2/library
## 
## ──────────────────────────────────────────────────────────────────────────────

6 Draft

not using admixture from LEA package anymore but script below will do it if needed

Population_structure

Thomas Francisco

2024-03-14