Overview

The following analyses correspond to project “Hide-and-seek in the tundra: Accounting for cryptic species uncovers molecular variation in moss clonal populations”. The microbial communities of the moss Racomitrium lanuginosum were analyzed using metabarcoding of the 16S rRNA (V3-V4 regions).

The phyloseq object and most of the files used for analyses correspond to those of the article: Escolástico-Ortiz, D.A., Blasi, C., Bellenger, JP., Derome, N. & Villarreal-A, J.C. 2023. Differentially abundant bacteria drive the N2-fixation of a widespread moss in the forest-tundra transition zone. Symbiosis 90, 193–211.

0. Load libraries and packages

library(readr);packageVersion("readr")
library(ggplot2);packageVersion("ggplot2")
library(ggpubr)
library(tidyverse)
library(RColorBrewer)
library(viridisLite)
theme_set(theme_bw())
# install.packages("FSA")
library(FSA); packageVersion("FSA")
# install.packages("multcompView")
library(multcompView); packageVersion("multcompView")
library(vegan);packageVersion("vegan")
# Installing Phyloseq
#if (!require("BiocManager", quietly = TRUE))
#  install.packages("BiocManager")

#BiocManager::install("phyloseq")
#BiocManager::install("S4Vectors")
# install.packages("speedyseq")
#install.packages("devtools")

library(phyloseq); packageVersion("phyloseq")

# install.packages("remotes")
#remotes::install_github("mikemc/speedyseq")
#install.packages("picante")
library(picante); packageVersion("picante")
library(tidyr)
library(MASS)

1. Data processing

We load the data to create the phyloseq objects: ASV tables, Taxonomy tables, Sample data and R object previously produced.

# write.table(ps_moss.norm@tax_table,"Tax_table_moss_genotype-microbiome.txt",sep= "\t",  row.names = TRUE)

# Updated taxonomy table. Information added to ASV without genus identification.
# Load the data
Tax_table_moss_16S <- read_tsv("Tax_table_moss_genotype-microbiome.tsv")
## Rows: 1322 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): #OTU ID, Kingdom, Phylum, Class, Order, Family, Genus
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Define row names
Tax_table_moss_16S<-Tax_table_moss_16S %>%
  tibble::column_to_rownames("#OTU ID")

#Transform to matrix, otherwise the row names will not match.
Tax_table_moss_16S<- as.matrix(Tax_table_moss_16S)
Tax_table_moss_16S<- tax_table(Tax_table_moss_16S)

# Insert table
phylo_moss2@tax_table <-Tax_table_moss_16S
phylo_moss2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1322 taxa and 62 samples ]
## sample_data() Sample Data:       [ 62 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 1322 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 1322 reference sequences ]
# Update the information adding the genetic group of each moss sample.
Gen_MLG <- read.table("Microbiome_genotype_MLG.txt",head=T)

nrow(Gen_MLG)  # Check the number of rows in Gen_MLG
## [1] 62
sample_names(phylo_moss2)  # Check the sample names in the phyloseq object
##  [1] "JC2791"     "JC2792"     "JC2807"     "JC2926"     "JC2938"    
##  [6] "JC2939"     "JC2948"     "JC2949"     "JC2950"     "JC2951"    
## [11] "JC2952"     "JC2953"     "JC2957"     "JC2960"     "JC2961"    
## [16] "JC3085"     "JC3212"     "JC3213"     "JC3214"     "JC3215"    
## [21] "JC2790-16S" "JC2793-16S" "JC2794-16S" "JC2795-16S" "JC2796-16S"
## [26] "JC2797-16S" "JC2798-16S" "JC2800-16S" "JC2801-16S" "JC2802-16S"
## [31] "JC2803-16S" "JC2804-16S" "JC2805-16S" "JC2806-16S" "JC2927-16S"
## [36] "JC2929-16S" "JC2930-16S" "JC2933-16S" "JC2936-16S" "JC2937-16S"
## [41] "JC2940-16S" "JC2941-16S" "JC2942-16S" "JC2943-16S" "JC2944-16S"
## [46] "JC2945-16S" "JC2946-16S" "JC2947-16S" "JC2955-16S" "JC2956-16S"
## [51] "JC2958-16S" "JC2959-16S" "JC2962-16S" "JC2963-16S" "JC3084-16S"
## [56] "JC3086-16S" "JC3087-16S" "JC3088-16S" "JC3089-16S" "JC3090-16S"
## [61] "JC3210-16S" "JC3217-16S"
length(sample_names(phylo_moss2))  # Check the number of samples
## [1] 62
Gen_MLG <- Gen_MLG[match(sample_names(phylo_moss2), row.names(Gen_MLG)), ]

sample_data(phylo_moss2)$Genotype <- Gen_MLG[,1]
sample_data(phylo_moss2)$MLG <- as.factor(Gen_MLG[,2])
sample_data(phylo_moss2)$Lat <- Gen_MLG[,3]
sample_data(phylo_moss2)$Lon <- Gen_MLG[,4]
sample_data(phylo_moss2)$GBS <- Gen_MLG[,5]
sample_data(phylo_moss2)$ID <- Gen_MLG[,6]
sample_data(phylo_moss2)$Gen_Habitat <- Gen_MLG[,7]

# Subset data for samples with genotyping-related information
Bacteria_gbs<- subset_samples(phylo_moss2, GBS=="TRUE")
table(tax_table(Bacteria_gbs)[,"Phylum"], exclude = NULL)
## 
##    Acidobacteria   Actinobacteria  Armatimonadetes    Bacteroidetes 
##              189              223               93               17 
##       Chlamydiae      Chloroflexi    Cyanobacteria              FBP 
##                2              104               49                1 
##       Firmicutes Gemmatimonadetes  Patescibacteria   Planctomycetes 
##                1                9                2              132 
##   Proteobacteria  Verrucomicrobia            WPS-2 
##              377               48               75
Bacteria_gbs <-prune_taxa(taxa_sums(Bacteria_gbs) >=1,Bacteria_gbs)
Bacteria_gbs
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1322 taxa and 53 samples ]
## sample_data() Sample Data:       [ 53 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 1322 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 1322 reference sequences ]
# Modify the 'Habitat' variable directly in the phyloseq object
Bacteria_gbs@sam_data$Habitat <- recode(Bacteria_gbs@sam_data$Habitat, 
                                        Forest_tundra = "Forest tundra", 
                                        Shrub_tundra = "Shrub tundra")

2. Alpha diversity

2.1 Rarefaction

Now, we rarefied the data using the minimum number of sequences for all samples to equally compare alpha diversity among groups.

# Rarefaction curves
otu_tab_16S <- otu_table(Bacteria_gbs)
class(otu_tab_16S) <- "matrix" # as.matrix() will do nothing
## Warning in class(otu_tab_16S) <- "matrix": Setting class(x) to "matrix" sets
## attribute to NULL; result will no longer be an S4 object
## you get a warning here, but this is what we need to have
otu_tab_16S <- t(otu_tab_16S) # transpose observations to rows

rare_16S <- rarecurve(otu_tab_16S, step=50, lwd=1, ylab="OTU",  label=F)

sort(sample_sums(Bacteria_gbs))
##     JC2950 JC3210-16S     JC3212 JC2806-16S JC2945-16S     JC2938 JC2805-16S 
##       1073       1650       2205       2380       2577       2630       2919 
## JC2796-16S JC2927-16S     JC2948     JC3085 JC2958-16S JC2803-16S JC2801-16S 
##       3073       3462       3599       4052       4173       4355       4416 
## JC3090-16S     JC2953 JC3089-16S JC2795-16S JC2947-16S JC2794-16S JC2944-16S 
##       4421       4759       4808       4874       4985       5005       5126 
##     JC2949 JC2963-16S JC2962-16S     JC2960     JC2951 JC2790-16S JC2937-16S 
##       5472       5542       5597       6064       6151       6216       6233 
##     JC2807 JC2804-16S     JC2926     JC2791 JC2802-16S JC2800-16S JC3087-16S 
##       6566       6646       6892       7081       7504       7705       7817 
## JC2798-16S     JC3214 JC2956-16S JC3086-16S JC2955-16S     JC2961 JC2793-16S 
##       7964       8105       8354       8408       9204       9329       9340 
##     JC2939 JC2930-16S     JC2952 JC2940-16S JC2933-16S JC2797-16S JC2943-16S 
##       9793      10834      11245      11577      12917      13326      14892 
## JC3088-16S JC2929-16S     JC2792 JC2959-16S 
##      15413      16528      16940      17384
# Rarefied data without replacement
set.seed(1)
ps.rarefied_16S_gbs <- rarefy_even_depth(Bacteria_gbs, rngseed=1, sample.size=min(sample_sums(Bacteria_gbs)), replace=F) # Check the minimum sample size
## `set.seed(1)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1); .Random.seed` for the full vector
## ...
## 23OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
# Check the number of sequences per sample
sample_sums(ps.rarefied_16S_gbs) # 1073
##     JC2791     JC2792     JC2807     JC2926     JC2938     JC2939     JC2948 
##       1073       1073       1073       1073       1073       1073       1073 
##     JC2949     JC2950     JC2951     JC2952     JC2953     JC2960     JC2961 
##       1073       1073       1073       1073       1073       1073       1073 
##     JC3085     JC3212     JC3214 JC2790-16S JC2793-16S JC2794-16S JC2795-16S 
##       1073       1073       1073       1073       1073       1073       1073 
## JC2796-16S JC2797-16S JC2798-16S JC2800-16S JC2801-16S JC2802-16S JC2803-16S 
##       1073       1073       1073       1073       1073       1073       1073 
## JC2804-16S JC2805-16S JC2806-16S JC2927-16S JC2929-16S JC2930-16S JC2933-16S 
##       1073       1073       1073       1073       1073       1073       1073 
## JC2937-16S JC2940-16S JC2943-16S JC2944-16S JC2945-16S JC2947-16S JC2955-16S 
##       1073       1073       1073       1073       1073       1073       1073 
## JC2956-16S JC2958-16S JC2959-16S JC2962-16S JC2963-16S JC3086-16S JC3087-16S 
##       1073       1073       1073       1073       1073       1073       1073 
## JC3088-16S JC3089-16S JC3090-16S JC3210-16S 
##       1073       1073       1073       1073

2.2 Alpha diversity estimates

# Diversity indexes
Shannon.16S <- diversity(otu_table(ps.rarefied_16S_gbs), index = "shannon")
Simpson.16S <- diversity(otu_table(ps.rarefied_16S_gbs), index = "simpson")
Pielou.16S <- Shannon.16S/log(specnumber(otu_table(ps.rarefied_16S_gbs)))
variables_16S <- sample_data(ps.rarefied_16S_gbs)

# Merge into one dataframe
Alpha_16S <- data.frame(variables_16S,Shannon.16S,Simpson.16S,Pielou.16S)
names(Alpha_16S)[names(Alpha_16S) == "Shannon.16S"] <- "Shannon"
names(Alpha_16S)[names(Alpha_16S) == "Simpson.16S"] <- "Simpson"
names(Alpha_16S)[names(Alpha_16S) == "Pielou.16S"] <- "Pielou"
Alpha_16S$Plot <- as.factor(Alpha_16S$Plot)

# Faith phylogenetic diversity
library(phangorn)
## 
## Attaching package: 'phangorn'
## The following objects are masked from 'package:vegan':
## 
##     diversity, treedist
class(rooted_tree_moss) # Check if the tree is rooted
## [1] "phylo"
phangorn::getRoot(rooted_tree_moss)
## [1] 1323
Faith_16S<- pd(otu_table(ps.rarefied_16S_gbs),rooted_tree_moss,include.root=F)
Alpha_16S <-data.frame(Alpha_16S,Faith_16S)
names(Alpha_16S)[names(Alpha_16S) == "PD"] <- "Faith"
names(Alpha_16S)[names(Alpha_16S) == "SR"] <- "Observed"

# Check the nunmber of samples per category
table(Alpha_16S$Habitat,Alpha_16S$Genotype)
##                
##                 AB CD
##   Forest tundra 12 19
##   Shrub tundra   7 15
# Manipulate data
data_long_16S_gen <- Alpha_16S %>%
  pivot_longer(cols = c(Shannon,Pielou,Faith),
               names_to = "Alpha_Index",
               values_to = "Alpha_Value")

# Order levels
data_long_16S_gen$Alpha_Index<-factor(data_long_16S_gen$Alpha_Index, levels = c("Shannon", "Pielou","Faith"))

# Create plot
Alpha_GBS_16S <- ggplot(data_long_16S_gen, aes(x = Habitat, y = Alpha_Value, fill = Genotype)) +
  geom_boxplot() +
  facet_wrap(~ Alpha_Index, scales = "free_y") +
  labs(title = "16S rRNA gene",
       x = NULL,
       y = "Alpha diversity value",
       fill = "Genetic group") +
  scale_fill_manual(values=c("gray","#4d4d4d")) +
  theme(legend.position = "bottom",  # Placing legend at the bottom
        plot.title = element_text(face= "bold"),  # Centering plot title
        panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(colour = "black"),
        axis.line.x.top = element_line(colour = "black"),
        strip.text = element_text(size=10, face= "bold"),
        strip.background=element_rect(colour="white", fill = NA),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Alpha_GBS_16S

# We can save the image in high quality formats

# ggsave("Figure_Clonality_Alpha_diversity_16S_Gen_habitat_rarefied.tiff", units="mm", width=160, height=150, dpi=300, compression = "lzw")

2.3 Effect of variables on alpha diversity (Linear Models and Linear Mixed Models)

We can evaluate the effect of Habitat and Genetic group, an their interaction on alpha diversity while accounting for potential random effects.

# 1. Choosing the GLMM family
# Since alpha diversity indices (e.g., Shannon, Simpson) are continuous variables, you can use:
# - Gaussian (normal) distribution with identity link (if residuals are normal).
# - Gamma or inverse Gaussian if data is right-skewed.
# - Poisson or negative binomial for count-based indices (e.g., Observed richness).

# 2. Running the LMMs (Linear Mixed Models)
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
# Models

# Linear Mixed Model
# lmm_Shannon <- lmer(Shannon ~ Habitat * Genotype + (1 | Plot), data = Alpha_16S)
# VarCorr(lmm_Shannon) # Variance is 0 and then "is.singular". Try a linear model

# Linear model
lm_Shannon <- lm(Shannon ~ Habitat * Genotype , data = Alpha_16S)

# lmm_Pielou <- lmer(Pielou ~ Habitat * Genotype + (1 | Plot), data = Alpha_16S)
# VarCorr(lmm_Pielou) # Variance is 0 and then "is.singular". Try a linear model.

# Linear model
lm_Pielou <- lm(Pielou ~ Habitat * Genotype , data = Alpha_16S)

# Linear Mixed Model
lmm_Faith <- lmer(Faith ~ Habitat * Genotype + (1 | Plot), data = Alpha_16S)

# Checking the model summary
summary(lm_Shannon)
## 
## Call:
## lm(formula = Shannon ~ Habitat * Genotype, data = Alpha_16S)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96421 -0.14625  0.00121  0.19523  0.57283 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.93755    0.08045  61.375   <2e-16 ***
## HabitatShrub tundra            -0.29055    0.13254  -2.192   0.0331 *  
## GenotypeCD                     -0.05847    0.10276  -0.569   0.5719    
## HabitatShrub tundra:GenotypeCD  0.22692    0.16381   1.385   0.1722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2787 on 49 degrees of freedom
## Multiple R-squared:  0.09775,    Adjusted R-squared:  0.04251 
## F-statistic:  1.77 on 3 and 49 DF,  p-value: 0.1653
summary(lm_Pielou)
## 
## Call:
## lm(formula = Pielou ~ Habitat * Genotype, data = Alpha_16S)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15355 -0.01095  0.01026  0.02395  0.06351 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.88771    0.01178  75.349   <2e-16 ***
## HabitatShrub tundra            -0.01885    0.01941  -0.971    0.336    
## GenotypeCD                     -0.01193    0.01505  -0.793    0.432    
## HabitatShrub tundra:GenotypeCD  0.02981    0.02399   1.243    0.220    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04081 on 49 degrees of freedom
## Multiple R-squared:  0.03063,    Adjusted R-squared:  -0.02872 
## F-statistic: 0.516 on 3 and 49 DF,  p-value: 0.6732
summary(lmm_Faith )
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Faith ~ Habitat * Genotype + (1 | Plot)
##    Data: Alpha_16S
## 
## REML criterion at convergence: 150.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.83329 -0.68182  0.07466  0.66820  1.50114 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Plot     (Intercept) 0.3239   0.5692  
##  Residual             0.7867   0.8870  
## Number of obs: 53, groups:  Plot, 24
## 
## Fixed effects:
##                                Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                     8.65477    0.31690 39.94266  27.311   <2e-16
## HabitatShrub tundra            -1.01272    0.51981 42.21767  -1.948   0.0581
## GenotypeCD                     -0.13996    0.36830 46.24375  -0.380   0.7057
## HabitatShrub tundra:GenotypeCD -0.04383    0.58509 45.02241  -0.075   0.9406
##                                   
## (Intercept)                    ***
## HabitatShrub tundra            .  
## GenotypeCD                        
## HabitatShrub tundra:GenotypeCD    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) HbttSt GntyCD
## HbttShrbtnd -0.610              
## GenotypeCD  -0.707  0.431       
## HbttStn:GCD  0.445 -0.745 -0.629
# Load the DHARMa package
# install.packages("DHARMa")  # If not installed
library(DHARMa)
## Warning: package 'DHARMa' was built under R version 4.4.2
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
#  p > 0.05 indicates normality
#  p > 0.05 means variance is equal

# Shannon
res_lm_Shannon <- simulateResiduals(lm_Shannon)
plot(res_lm_Shannon)

# Pielou
res_lm_Pielou <- simulateResiduals(lm_Pielou)
plot(res_lm_Pielou)

# Faith
res_lmm_Faith <- simulateResiduals(lmm_Faith)
plot(res_lmm_Faith)

# Key assumption checks
# The residuals should follow a uniform distribution.
# If deviations exist, consider non-Gaussian families.
# testUniformity(res_lmm_Shannon) # Example

# Dispersion Test (Overdispersion)
# Detects if variance is higher than expected.
# Overdispersion suggests a negative binomial or quasi-Poisson model may be better.
# testDispersion(res_lmm_Shannon) # Example

# Check Overall Significan with ANOVA
anova(lm_Shannon)
anova(lm_Pielou)
anova(lmm_Faith)

3. Microbial profiling

For bacterial profiles, we use the non-rarefied ASV table. In order to plot the bacterial profile, we need to change the format of the data.

# Transform to relative abundance
Bacteria_gbs.ra<- transform_sample_counts(Bacteria_gbs, function(x){x / sum(x)})
# taxa_sums(Bacteria_gbs.ra) # To check if the count were transformed

# Format the data
Bacteria_gbs.ra_table <- psmelt(Bacteria_gbs.ra)

Now, we identify the most abundant phyla. We will group all phyla with an abundance lower than 0.01% as “Other”.

# Calculate total relative abundance for each Phylum. This will result in more than 1.00 per phylum because it sums the relative abundance per sample not per Phylum.
phylum_abundance_1 <- aggregate(Abundance ~ Phylum, data = Bacteria_gbs.ra_table, FUN = sum)

# Insert a column of the relative abundance per Phylum
phylum_abundance_1 <- mutate(phylum_abundance_1,percent = Abundance / sum(Abundance))

# Check the total abundance and percentage
sum(phylum_abundance_1$Abundance)
## [1] 53
sum(phylum_abundance_1$percent)
## [1] 1
# Identify Phyla with less than 0.01% abundance
low_abundance_phyla_1 <- phylum_abundance_1$Phylum[phylum_abundance_1$percent < 0.01]

# Group Phyla with less than 0.01% abundance as "Other (<0.01%)"
Bacteria_gbs.ra_table$Grouped_Phylum <- ifelse(Bacteria_gbs.ra_table$Phylum %in% low_abundance_phyla_1, "Other", Bacteria_gbs.ra_table$Phylum)
Bacteria_gbs.ra_table$Grouped_Phylum <-as.factor(Bacteria_gbs.ra_table$Grouped_Phylum)

# Calculate total abundance for each grouped Phylum
grouped_phylum_abundance_1 <- aggregate(Abundance ~ Grouped_Phylum, data = Bacteria_gbs.ra_table, FUN = sum)

# Reorder levels of Grouped_Phylum based on abundance
Bacteria_gbs.ra_table$Grouped_Phylum <- factor(Bacteria_gbs.ra_table$Grouped_Phylum, 
                                                       levels = grouped_phylum_abundance_1$Grouped_Phylum[order(desc(grouped_phylum_abundance_1$Abundance))])

# Reorder Phyla< 0.01
Bacteria_gbs.ra_table$Grouped_Phylum <- relevel(Bacteria_gbs.ra_table$Grouped_Phylum,
                                                        "Other")

# Subgroup to obtain relative abundance per group
Bacteria_gbs.ra_table_Samples <-Bacteria_gbs.ra_table %>%
  group_by(Sample) %>%
  mutate(Abundance = Abundance / sum(Abundance))

We plot the most abundant phyla

# Plot relative abundance per Phylum
BarPlot_bacteria_gbs_phylum <- Bacteria_gbs.ra_table_Samples %>%
  ggplot(aes(x = Sample, y = Abundance, fill = Grouped_Phylum)) +
  geom_bar(stat = "identity", position="stack") +
  labs(x = "Samples",
       y = "Relative Abundance",title= "16S rRNA gene",fill = "Phylum") +
  facet_grid(~ Habitat+Genotype, scales = "free", space='free') +
  theme_bw()+
  scale_fill_manual(values=Contrasting_palette_1)+
  theme(
    axis.text.y = element_text(size = 8,),
    axis.text.x = element_blank(),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 8),
    panel.background = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
    strip.background=element_rect(colour="white")
  )

BarPlot_bacteria_gbs_phylum

# ggsave("Figure_Microbial_profile_Genotype_16S_Phylum.tiff", units="mm", width=180, height=120, dpi=150, compression = "lzw")

Now, we can find the most abundant genera with a similar analysis.

# Calculate total relative abundance for each Genus. This will result in more than 1.00 per genus because it sums the relative abundance per sample not per Genus.
genus_abundance_3 <- aggregate(Abundance ~ Genus, data = Bacteria_gbs.ra_table, FUN = sum)

# Insert a column of the total relative abundance per Genus
genus_abundance_3 <- mutate(genus_abundance_3, percent = Abundance / sum(Abundance))

# Total ASV counts
sum(genus_abundance_3$Abundance)
## [1] 53
# Total relative abundance of all genera. Should be 1.
sum(genus_abundance_3$percent)
## [1] 1
# Identify the top 20 most abundant genera
top_20_genera_Moss_16S <- genus_abundance_3 %>%
  arrange(desc(Abundance)) %>%
  head(20) %>%
  pull(Genus)

# Group genera not in the top 20 as "Others"
Bacteria_gbs.ra_table$Grouped_Genus <- ifelse(
  Bacteria_gbs.ra_table$Genus %in% top_20_genera_Moss_16S, 
  Bacteria_gbs.ra_table$Genus, 
  "Others"
)

# Calculate total abundance for each grouped Genus
grouped_genus_abundance_3 <- aggregate(Abundance ~ Grouped_Genus, data = Bacteria_gbs.ra_table, FUN = sum)

# Reorder levels of Grouped_Genus based on abundance
Bacteria_gbs.ra_table$Grouped_Genus <- factor(Bacteria_gbs.ra_table$Grouped_Genus, 
                                                 levels = grouped_genus_abundance_3$Grouped_Genus[order(desc(grouped_genus_abundance_3$Abundance))])

Bacteria_gbs.ra_table$Grouped_Genus <- relevel(Bacteria_gbs.ra_table$Grouped_Genus,
                                                  "Others",)

#Change the name of the taxa.
Bacteria_gbs.ra_table<- Bacteria_gbs.ra_table %>%
  mutate(Grouped_Genus = recode(Grouped_Genus, 
                                `genus_Acidobacteriaceae_(Subgroup_1)` = "genus_Acidobacteriaceae_S1"))

# Subgroup to obtain relative abundance per group
Bacteria_gbs.ra_table_Samples <-Bacteria_gbs.ra_table %>%
  group_by(Sample) %>%
  mutate(Abundance = Abundance / sum(Abundance))

We plot the most abundant bacterial genera.

# Produce the plot
BarPlot_bacteria_gbs_genus <- Bacteria_gbs.ra_table_Samples %>%
  ggplot(aes(x = Sample, y = Abundance, fill = Grouped_Genus)) +
  geom_bar(stat = "identity", position="stack") +
  labs(x = "Samples",
       y = "Relative Abundance",title = "16S rRNA gene",fill = "Genus") +
  facet_grid(~ Habitat+Genotype, scales = "free", space='free') +
  theme_bw()+
  scale_fill_manual(values=Contrasting_palette_1)+
  theme(
    axis.text.y = element_text(size = 8,),
    axis.text.x = element_blank(),
    legend.text = element_text(size = 8),
    legend.key.size = unit(0.5, 'cm'),
    strip.text = element_text(size = 8),
    panel.background = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
    strip.background=element_rect(colour="gray"),
  ) + guides(fill=guide_legend(ncol =1))

BarPlot_bacteria_gbs_genus

# ggsave("Figure_1-1_Relative_abundance_moss_Genotype_Habitat_Genus_16S.tiff", units="mm", width=230, height=200, dpi=300, compression = "lzw")

# ggsave("Figure_1-1_Relative_abundance_moss_Genotype_Habitat_Genus_16S.svg", units="mm", width=230, height=200, dpi=300)

We could present the same bar plot by pooling all the samples per habitat and genetic group

# Plot the mean per habitat and genetic group

# Normalize abundance within each Habitat and Genetic group
Bacteria_gbs.ra_table_Samples_Habitat_Genetic <- Bacteria_gbs.ra_table_Samples %>%
  group_by(Habitat,Genotype) %>%
  mutate(Abundance = Abundance / sum(Abundance))

# Produce the plot
BarPlot_bacteria_gbs_genus_Habitat_genotype <- Bacteria_gbs.ra_table_Samples_Habitat_Genetic  %>%
  ggplot(aes(x = Genotype, y = Abundance, fill = Grouped_Genus)) +
  geom_bar(stat = "identity", position="stack") +
  labs(x = "Genetic group",y = "Relative Abundance",fill = "Genus") +
  facet_grid(~ Habitat, scales = "free", space='free') +
  theme_bw()+
  scale_fill_manual(values=Contrasting_palette_1)+
  theme(
    axis.text.y = element_text(size = 8,),
    axis.ticks.x = element_blank(),
    legend.text = element_text(size = 8),
    strip.text = element_text(size = 8),
    legend.key.size = unit(0.3, 'cm'),
    panel.background = element_blank(),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
    strip.background=element_rect(colour=NA),
  ) + guides(fill=guide_legend(ncol =1))

BarPlot_bacteria_gbs_genus_Habitat_genotype 

# ggsave("Figure_1-2_Relative_abundance_moss_Genotype_Habitat_Genus_16S.tiff", units="mm", width=230, height=200, dpi=300, compression = "lzw")

# ggsave("Figure_1-2_Relative_abundance_moss_Genotype_Habitat_Genus_16S.svg", units="mm", width=230, height=200, dpi=300)

4. Beta diversity

We analyse the beta diversity of the bacterial community.We use the non-rarefied data but we normalize the ASV counts using Geometric Mean Pairwise Ratios (GMPR).

4.1 Ordination (NMDS)

We evaluate the variation of the bacterial communities using a non-metric multidimensional scaling ordination.

# NMDS using Bray-Curtis. Genotype clustering
# Ordination using normalized data

# Normalization (GMPR)
# Extract and transform the non rarefaction ASV table
ASV_table_Bacteria_gbs <- as(t(otu_table(Bacteria_gbs)), "matrix")

# Calculating the GMPR size factors
# install.packages("GUniFrac")
library(GUniFrac)
## Registered S3 method overwritten by 'rmutil':
##   method         from
##   print.response httr
ASV_table_Bacteria_gbs_GMPR_factors <- GMPR(ASV_table_Bacteria_gbs)

# Normalize using the size factors. Divide the sample counts by the size factors.
ASV_table_Bacteria_gbs_normalized_GMPR <- sweep(ASV_table_Bacteria_gbs, 2, ASV_table_Bacteria_gbs_GMPR_factors[colnames(ASV_table_Bacteria_gbs)], FUN = "/")

# Calculate Bray-Distance matrix
bray_dist_1 <- vegdist(t(ASV_table_Bacteria_gbs_normalized_GMPR), method = "bray", binary = FALSE)

# Check quality of the data
library(pheatmap)
pheatmap(as.matrix(bray_dist_1))

# Plot relative abundance of the top taxa
# plot_bar(Bacteria_gbs, x = "Sample", fill = "Phylum") +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Transform to ASV_Table
ASV_table_Bacteria_gbs_normalized_GMPR<- otu_table(ASV_table_Bacteria_gbs_normalized_GMPR,taxa_are_rows = TRUE)

# Phyloseq normalized (GMPR)
ps_Bacteria_gbs_norm_GMPR <-phyloseq(ASV_table_Bacteria_gbs_normalized_GMPR,Tax_table_moss_16S,phylo_moss2@sam_data)
ps_Bacteria_gbs_norm_GMPR
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1322 taxa and 53 samples ]
## sample_data() Sample Data:       [ 53 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 1322 taxa by 6 taxonomic ranks ]
ps_Bacteria_gbs_norm_GMPR <- subset_taxa(ps_Bacteria_gbs_norm_GMPR , !is.na(Phylum))
ps_Bacteria_gbs_norm_GMPR
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1322 taxa and 53 samples ]
## sample_data() Sample Data:       [ 53 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 1322 taxa by 6 taxonomic ranks ]

Using the Bray-Curtis matrix of the normalized ASV counts, we perform a NMDS ordination.

# NMDS
set.seed(123456)
ord.nmds.bray_16S <- ordinate(ps_Bacteria_gbs_norm_GMPR, method="NMDS", distance = bray_dist_1) # Using the Bray-Curtis distances that we manually estimate
## Run 0 stress 0.1909486 
## Run 1 stress 0.1909436 
## ... New best solution
## ... Procrustes: rmse 0.002023078  max resid 0.01064772 
## Run 2 stress 0.1909438 
## ... Procrustes: rmse 0.0001359846  max resid 0.0006681172 
## ... Similar to previous best
## Run 3 stress 0.2444351 
## Run 4 stress 0.1909815 
## ... Procrustes: rmse 0.01126807  max resid 0.06592888 
## Run 5 stress 0.1909436 
## ... Procrustes: rmse 1.565541e-05  max resid 7.494192e-05 
## ... Similar to previous best
## Run 6 stress 0.1918951 
## Run 7 stress 0.1917329 
## Run 8 stress 0.1909437 
## ... Procrustes: rmse 0.000117161  max resid 0.0005789228 
## ... Similar to previous best
## Run 9 stress 0.1931847 
## Run 10 stress 0.2224044 
## Run 11 stress 0.1909815 
## ... Procrustes: rmse 0.0112785  max resid 0.06599578 
## Run 12 stress 0.1909814 
## ... Procrustes: rmse 0.01126288  max resid 0.06595266 
## Run 13 stress 0.1917329 
## Run 14 stress 0.1919353 
## Run 15 stress 0.1917329 
## Run 16 stress 0.1989801 
## Run 17 stress 0.2224044 
## Run 18 stress 0.1948079 
## Run 19 stress 0.1909814 
## ... Procrustes: rmse 0.01126919  max resid 0.06593713 
## Run 20 stress 0.1909436 
## ... Procrustes: rmse 7.336368e-06  max resid 2.838137e-05 
## ... Similar to previous best
## *** Best solution repeated 4 times
ord.nmds.bray_16S$stress
## [1] 0.1909436
stressplot(ord.nmds.bray_16S)

# Create the plot
NMDS.BC_Bacteria_gbs <- plot_ordination(ps_Bacteria_gbs_norm_GMPR, ord.nmds.bray_16S, color="Genotype", shape = "Habitat")
NMDS.BC_Bacteria_gbs <- NMDS.BC_Bacteria_gbs + 
  stat_ellipse(type = "norm", linetype = 2) +
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank()) + 
  geom_point(size=2) +
  scale_color_manual(values=c("gray","#363636"))+
  labs(caption="2D, stress =  0.1909", color = "Genetic group") + 
  guides(color = guide_legend(order = 1), shape = guide_legend(order = 2))

NMDS.BC_Bacteria_gbs

# ggsave("Figure_2_NMDS_moss_Genotype_Habitat_16S.tiff", units="mm", width=230, height=120, dpi=150, compression = "lzw")

4.2 PERMANOVA

We evaluate how the variation is affected by the habitat and genetic group (genotype).

permanova_16S_gen<- adonis2(t(ps_Bacteria_gbs_norm_GMPR@otu_table) ~ ps_Bacteria_gbs_norm_GMPR@sam_data$Habitat * ps_Bacteria_gbs_norm_GMPR@sam_data$Genotype, permutations = perm, method = "bray",strata = ps_Bacteria_gbs_norm_GMPR@sam_data$Plot)
permanova_16S_gen
# strata = Microbiome_gbs@sam_data$Plot 
# This adjustment ensures that the permutations are performed within each plot, accounting for the potential non-independence of samples within the same plot.

# Check variance heterogenity or dispersion among groups
set.seed(123)
BC_16S_beta_disper_1 <- betadisper(bray_dist_1,ps_Bacteria_gbs_norm_GMPR@sam_data$Gen_Habitat)
permutest(BC_16S_beta_disper_1)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     3 0.008299 0.0027662 0.5968    999  0.624
## Residuals 49 0.227121 0.0046351
# The test indicate that the there is not enough proofs 
# to confirm that the homogeneity of multivariate dispersion between groups differ

5. Differentially abundant bacteria

We can identify groups that are more abundant between habitats or genetic groups. In this case, we use ANCOMBC-2 to identify differential abundant bacterial genera with bias corrections.

We use the non-rarefied count because ANCOMBC-2 normalize the data before run the analysis.

#if (!require("BiocManager", quietly = TRUE))
#  install.packages("BiocManager")
#BiocManager::install("ANCOMBC")

library(ANCOMBC)

# Check the number of samples per category
table(Bacteria_gbs@sam_data$Habitat,Bacteria_gbs@sam_data$Genotype)
##                
##                 AB CD
##   Forest tundra 12 19
##   Shrub tundra   7 15
# Check the number of sequences per sample
sum_seq <- rowSums(Bacteria_gbs@otu_table)
plot(sort(sum_seq), ylim=c(0,25000), main=c("Number of counts per each sample"), xlab=c("Sample position"))

# Run ANCOM-BC2 for the whole dataset
set.seed(123)
out_16S_gbs  = ancombc2(
  data = Bacteria_gbs,
  tax_level = "Genus",
  fix_formula = "Habitat + Genotype",
  group = "Genotype",
  p_adj_method = "fdr",
  global = TRUE,
  verbose = TRUE
)
## Warning: The group variable has < 3 categories 
## The multi-group comparisons (global/pairwise/dunnet/trend) will be deactivated
## Obtaining initial estimates ...
## Estimating sample-specific biases ...
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: rngtools
## Sensitivity analysis for pseudo-count addition to 0s: ...
## ANCOM-BC2 primary results ...
# Family level
# out_16S_gbs_Family  = ancombc2(
#   data = Bacteria_gbs,
#   tax_level = "Family",
#   fix_formula = "Habitat + Genotype",
#   group = "Genotype",
#   p_adj_method = "fdr",
#   global = TRUE,
#   verbose = TRUE
# )

# prv_cut = 0.05, In this case, we already filtered ASVs by prevalence (previous step in Escolastico-Ortiz et al. 2023. Symbiosis.).
# Library size cut less than 1000 reads.
# If we applied the struc_zero we will find taxa that occur differentially based on presence/absence.

# Create a new data frame with the results
res_16S_gbs <- out_16S_gbs$res

# Family level
# res_16S_gbs_Family <- out_16S_gbs_Family$res

# bias_correct_log_table <- out_16S_gbs$bias_correct_log_table
# bias_correct_log_table[is.na(bias_correct_log_table)] = 0

# It is important to acknowledge that the estimation of sampling fractions in ANCOM-BC2 is limited to an additive constant. 
# This means that only the difference between bias-corrected log abundances is meaningful, rather than the absolute values themselves.

# By default, ANCOM-BC2 does not add pseudo-counts to zero counts, which can 
# result in NAs in the bias-corrected log abundances. Users have the option to 
# either leave the NAs as they are or replace them with zeros. 
# This replacement is equivalent to adding pseudo-counts of ones to the zero counts.

# The p-values from ANCOM-BC2, both with and without sensitivity analysis, exhibit a nearly uniform distribution.
# Notably, enabling sensitivity analysis significantly reduces the number of significant p-values, thereby decreasing the risk of false positives.

Now, we create a function that extract the results.

plot_differential_abundance <- function(df_Group, res_data, taxon_col, lfc_col, q_col, grouping_var, 
                                       passed_col = NULL) {
  
  # Create another data frame with the relevant grouping results
  df_group_res <- res_data %>%
    dplyr::select({{taxon_col}}, contains({{grouping_var}}), {{passed_col}})
  
# Add association based on significance, using custom group names
  df_group_res <- df_group_res %>%
    dplyr::mutate(
      association = dplyr::case_when(
        !!rlang::sym(q_col) < 0.05 & !!rlang::sym(lfc_col) > 0 ~ "Enriched",
        !!rlang::sym(q_col) < 0.05 & !!rlang::sym(lfc_col) < 0 ~ "Depleted",
        TRUE ~ "Not significant"
      ),
      font = ifelse(!is.null(passed_col) & !!rlang::sym(passed_col), "bold", "plain"),
      color = ifelse(!is.null(passed_col) & !!rlang::sym(passed_col), "blue", "black"),
    )
  
  # Extract differentially abundant groups
  results_sig <- df_group_res[df_group_res[[q_col]] < 0.05,]
  table(results_sig$association)
  
  # Order the data
  results_sig <- results_sig[order(results_sig$association, results_sig[[lfc_col]]),]
  desired_order <- results_sig[[taxon_col]]
  
  # Convert 'Genera' (taxon_col) to a factor with the specified order
  results_sig[[taxon_col]] <- factor(results_sig[[taxon_col]], levels = desired_order)

# Return the results as a list
  return(list(df_group_res = df_group_res, results_sig = results_sig))
}

# Example usage:
# plot_differential_abundance(df_Genotype, res_16S_gbs, "taxon", "lfc_GenotypeCD", "q_GenotypeCD", "Genotype", passed_col= "passed_ss_GenotypeCD")

We use the results of our function to plot the differentially abundant taxa.

# Run Script to produce the data
Results_ANCOMBC2_16S <- plot_differential_abundance(df_Genotype, res_16S_gbs, "taxon", "lfc_GenotypeCD", "q_GenotypeCD", "Genotype", passed_col= "passed_ss_GenotypeCD")

# Family level
# Results_ANCOMBC2_Family <- plot_differential_abundance(df_Genotype, res_16S_gbs_Family, "taxon", "lfc_GenotypeCD", "q_GenotypeCD", "Genotype", passed_col= "passed_ss_GenotypeCD")

# Store only the significant taxa data
Result_sig_ANCOMBC2_16S <- Results_ANCOMBC2_16S$results_sig

# Plot the significant differential abundant taxa
library(ggrepel)  # For better label positioning

# Extract the labels for the significant taxa
sig_labels_16S <- ifelse(Results_ANCOMBC2_16S$df_group_res$diff_GenotypeCD == TRUE,Results_ANCOMBC2_16S$df_group_res$taxon, NA)

# Create plot for significant abundant taxa
Plot_significant_taxa_16S <- ggplot(Results_ANCOMBC2_16S$df_group_res, aes(x = lfc_GenotypeCD, y = -log10(q_GenotypeCD))) +
  geom_point(aes(color = diff_GenotypeCD)) +
  scale_color_manual(values = c("grey", "#4d9221")) +
  labs(x = "Log2 Fold Change", y = "-Log10 Adjusted p-value",color= "Differentially abundant", title= "16S rRNA gene") +
  theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank()) +
  geom_vline(xintercept = 0, color = "darkgrey", linewidth = 0.5) +
  geom_vline(xintercept = -1, linetype = "dashed", color = "darkgrey", linewidth = 0.5) +  # LFC = -1
  geom_vline(xintercept = 1, linetype = "dashed", color = "darkgrey", linewidth = 0.5) +   # LFC = 1
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "darkgrey", linewidth = 0.5) +  # P-value = 0.05 threshold
  geom_text_repel(aes(label = sig_labels_16S), na.rm = TRUE,size = 2.5)  # Only labels significant genes

Plot_significant_taxa_16S
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# ggsave("Figure_Significant_DA_taxa_Genotype_16S.tiff", units="mm", width=200, height=150, dpi=150, compression = "lzw")


# We plot the log2 fold changes in all the differential abundant genera of the genetic group CD compare to AB.

# Order levels according to logfold changes
Result_sig_ANCOMBC2_16S$association <- factor(Result_sig_ANCOMBC2_16S$association, 
                                                 levels = c("Enriched", "Depleted"))

# Plot log2 fold changes
Plot_lfc <- ggplot(Result_sig_ANCOMBC2_16S, aes(x= lfc_GenotypeCD, y = taxon, color = association)) +
  geom_point() + 
  geom_segment(aes(x = 0, xend = lfc_GenotypeCD, y = taxon, yend = taxon)) +
  geom_vline(xintercept = 0, size = 0.3) + 
  xlab("log2 Fold Change") + ylab(NULL) +
  labs(title="16S rRNA gene (CD compared to AB)") +
  theme_classic() +
  scale_color_manual(values = c("Depleted" = "#d73027", "Enriched" = "#4575b4")) +  # Custom colors
  theme(
    legend.title = element_blank(),
    axis.text.y = element_text(face = Result_sig_ANCOMBC2_16S$font,color = Result_sig_ANCOMBC2_16S$color))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
Plot_lfc

# ggsave("Figure_3_ANCOMBC2_moss_Genotype_16S.tiff", units="mm", width=180, height=150, dpi=150, compression = "lzw")

We can extract the sequences that belong to those genera and use BLASTn to explore their identity.

# if (!requireNamespace("Biostrings", quietly = TRUE)) {
#   install.packages("Biostrings")
# }

library(Biostrings)

# Step 1: Extract taxonomy and filter by genus
tax_table_df <- as.data.frame(tax_table(Bacteria_gbs))  # Convert taxonomy table to dataframe
Target_asvs <- rownames(tax_table_df[tax_table_df$Genus == "genus_Fimbriimonadaceae", , drop = FALSE]) # Example the DA genera
Target_asvs
## [1] "ASV2179" "ASV7219" "ASV7221"
# Step 2: Extract sequences for the ASVs
if (!is.null(refseq(Bacteria_gbs))) {
  Target_seqs <- refseq(Bacteria_gbs)[Target_asvs]  # Extract sequences
} else {
  stop("No reference sequences found in the phyloseq object.")
}

# View the sequences
Target_seqs
## DNAStringSet object of length 3:
##     width seq                                               names               
## [1]   421 GCTGCAGTTGGGAATCTTGCACA...GGGTAGCAAACAGGATTAGATAC ASV2179
## [2]   421 GCAGCAGTTGGGAATCTTGCACA...GGGTAGCAAACAGGATTAGATAC ASV7219
## [3]   421 GCTGCAGTTGGGAATCTTGCACA...GGGTAGCAAACAGGATTAGATAC ASV7221
# Define output file
output_fasta <- "Target_asvs.fasta"

# Convert sequences to DNAStringSet
Target_dna <- DNAStringSet(Target_seqs)

# Write to FASTA file
writeXStringSet(Target_dna, filepath = output_fasta)

Session information

sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_Canada.utf8  LC_CTYPE=English_Canada.utf8   
## [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=English_Canada.utf8    
## 
## time zone: America/Toronto
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] Biostrings_2.72.1   GenomeInfoDb_1.40.1 XVector_0.44.0     
##  [4] IRanges_2.38.1      S4Vectors_0.42.1    BiocGenerics_0.50.0
##  [7] ggrepel_0.9.5       doRNG_1.8.6         rngtools_1.5.2     
## [10] foreach_1.5.2       ANCOMBC_2.2.2       pheatmap_1.0.12    
## [13] GUniFrac_1.8        DHARMa_0.4.7        lmerTest_3.1-3     
## [16] lme4_1.1-35.5       Matrix_1.7-0        phangorn_2.12.1    
## [19] MASS_7.3-61         picante_1.8.2       nlme_3.1-165       
## [22] ape_5.8             phyloseq_1.48.0     vegan_2.6-6.1      
## [25] lattice_0.22-6      permute_0.9-7       multcompView_0.1-10
## [28] FSA_0.9.5           viridisLite_0.4.2   RColorBrewer_1.1-3 
## [31] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.1      
## [34] dplyr_1.1.4         purrr_1.0.2         tidyr_1.3.1        
## [37] tibble_3.2.1        tidyverse_2.0.0     ggpubr_0.6.0       
## [40] ggplot2_3.5.1       readr_2.1.5        
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.5                        matrixStats_1.3.0              
##   [3] DirichletMultinomial_1.46.0     httr_1.4.7                     
##   [5] doParallel_1.0.17               numDeriv_2016.8-1.1            
##   [7] tools_4.4.1                     backports_1.5.0                
##   [9] utf8_1.2.4                      R6_2.5.1                       
##  [11] lazyeval_0.2.2                  mgcv_1.9-1                     
##  [13] rhdf5filters_1.16.0             withr_3.0.2                    
##  [15] gridExtra_2.3                   cli_3.6.3                      
##  [17] Biobase_2.64.0                  sandwich_3.1-1                 
##  [19] labeling_0.4.3                  sass_0.4.9                     
##  [21] mvtnorm_1.3-1                   proxy_0.4-27                   
##  [23] yulab.utils_0.1.7               foreign_0.8-87                 
##  [25] scater_1.32.1                   decontam_1.24.0                
##  [27] readxl_1.4.3                    rstudioapi_0.17.1              
##  [29] generics_0.1.3                  gtools_3.9.5                   
##  [31] vroom_1.6.5                     car_3.1-3                      
##  [33] inline_0.3.19                   biomformat_1.32.0              
##  [35] ggbeeswarm_0.7.2                fansi_1.0.6                    
##  [37] DescTools_0.99.57               DECIPHER_3.0.0                 
##  [39] abind_1.4-8                     lifecycle_1.0.4                
##  [41] multcomp_1.4-26                 yaml_2.3.10                    
##  [43] carData_3.0-5                   SummarizedExperiment_1.34.0    
##  [45] rhdf5_2.48.0                    SparseArray_1.4.8              
##  [47] grid_4.4.1                      crayon_1.5.3                   
##  [49] beachmat_2.20.0                 pillar_1.9.0                   
##  [51] knitr_1.48                      GenomicRanges_1.56.1           
##  [53] statip_0.2.3                    boot_1.3-31                    
##  [55] gld_2.6.6                       codetools_0.2-20               
##  [57] fastmatch_1.1-4                 glue_1.7.0                     
##  [59] data.table_1.15.4               MultiAssayExperiment_1.30.3    
##  [61] vctrs_0.6.5                     treeio_1.28.0                  
##  [63] Rdpack_2.6.1                    cellranger_1.1.0               
##  [65] gtable_0.3.6                    cachem_1.1.0                   
##  [67] xfun_0.46                       rbibutils_2.3                  
##  [69] S4Arrays_1.4.1                  modeest_2.4.0                  
##  [71] survival_3.7-0                  timeDate_4041.110              
##  [73] SingleCellExperiment_1.26.0     iterators_1.0.14               
##  [75] bluster_1.14.0                  statmod_1.5.0                  
##  [77] gmp_0.7-5                       TH.data_1.1-2                  
##  [79] gap_1.6                         bit64_4.0.5                    
##  [81] fBasics_4041.97                 bslib_0.8.0                    
##  [83] irlba_2.3.5.1                   vipor_0.4.7                    
##  [85] rpart_4.1.23                    DBI_1.2.3                      
##  [87] colorspace_2.1-1                Hmisc_5.2-0                    
##  [89] nnet_7.3-19                     ade4_1.7-22                    
##  [91] Exact_3.3                       tidyselect_1.2.1               
##  [93] timeSeries_4041.111             bit_4.0.5                      
##  [95] compiler_4.4.1                  htmlTable_2.4.3                
##  [97] BiocNeighbors_1.22.0            expm_1.0-0                     
##  [99] DelayedArray_0.30.1             checkmate_2.3.2                
## [101] scales_1.3.0                    quadprog_1.5-8                 
## [103] spatial_7.3-17                  digest_0.6.36                  
## [105] minqa_1.2.8                     rmarkdown_2.29                 
## [107] htmltools_0.5.8.1               pkgconfig_2.0.3                
## [109] base64enc_0.1-3                 sparseMatrixStats_1.16.0       
## [111] MatrixGenerics_1.16.0           highr_0.11                     
## [113] stabledist_0.7-2                fastmap_1.2.0                  
## [115] rlang_1.1.4                     htmlwidgets_1.6.4              
## [117] UCSC.utils_1.0.0                DelayedMatrixStats_1.26.0      
## [119] farver_2.1.2                    jquerylib_0.1.4                
## [121] zoo_1.8-12                      jsonlite_1.8.8                 
## [123] energy_1.7-12                   BiocParallel_1.38.0            
## [125] BiocSingular_1.20.0             magrittr_2.0.3                 
## [127] Formula_1.2-5                   scuttle_1.14.0                 
## [129] GenomeInfoDbData_1.2.12         Rhdf5lib_1.26.0                
## [131] munsell_0.5.1                   Rcpp_1.0.13                    
## [133] viridis_0.6.5                   CVXR_1.0-14                    
## [135] stringi_1.8.4                   rootSolve_1.8.2.4              
## [137] stable_1.1.6                    zlibbioc_1.50.0                
## [139] plyr_1.8.9                      parallel_4.4.1                 
## [141] lmom_3.2                        splines_4.4.1                  
## [143] multtest_2.60.0                 hms_1.1.3                      
## [145] igraph_2.0.3                    ggsignif_0.6.4                 
## [147] reshape2_1.4.4                  ScaledMatrix_1.12.0            
## [149] rmutil_1.1.10                   evaluate_1.0.1                 
## [151] nloptr_2.1.1                    tzdb_0.4.0                     
## [153] clue_0.3-65                     rsvd_1.0.5                     
## [155] broom_1.0.7                     gap.datasets_0.0.6             
## [157] Rmpfr_0.9-5                     e1071_1.7-16                   
## [159] tidytree_0.4.6                  rstatix_0.7.2                  
## [161] class_7.3-22                    gsl_2.1-8                      
## [163] beeswarm_0.4.0                  cluster_2.1.6                  
## [165] TreeSummarizedExperiment_2.12.0 timechange_0.3.0               
## [167] mia_1.12.0