#! /usr/bin/env Rscript
library(microbiome)
library(vegan)
library("ggpubr") # required for stat_compare_means
library(SpiecEasi)
library(Matrix)

colorgroup="Group"

otu_table=read.table(file="otu_table.txt",sep="\t",header=TRUE)
tax_table=read.table(file="tax_table.txt",sep="\t",header=TRUE)
meta_table=read.table(file="meta_table.txt",sep="\t",header=TRUE,colClasses="factor",row.names=1)
meta_table[ncol(meta_table)+1]=meta_table[ncol(meta_table)] #nmds plot will have no color if only one column

colnames(meta_table)[which(colnames(meta_table)==colorgroup)]="Group"

otu=otu_table(as.matrix(otu_table),taxa_are_rows=TRUE)
tax=tax_table(as.matrix(tax_table))
sampledata=sample_data(meta_table)
physeq=phyloseq(otu,tax,sampledata)
#reorder factors based on order in Group column
sample_data(physeq)$Group<-factor(sample_data(physeq)$Group, level=unique(sample_data(physeq)$Group))

################ diversity plot functions and outputs #######################


# Function to compute alpha/beta diversity summaries, save plots/tables, and prepare physeq1 for networks
run_diversity_outputs <- function(psglom, taxcolumn, fld, level, group_col) {
	##########################
	ps1=psglom
	tax_table(ps1)[,1]=taxcolumn
	tax_otu=merge(tax_table(ps1)[,1],otu_table(ps1),by="row.names")
	colnames(tax_otu)[1]="TaxonID"
	colnames(tax_otu)[2]=level
	write.table(tax_otu,paste0(fld,"tax_otu_count_",level,".txt"),sep="\t",quote=F,row.names=F)
	write.csv(tax_otu,paste0(fld,"tax_otu_count_",level,".csv"),quote=F,row.names=F)
	###
	ps1 <- microbiome::transform(psglom, "compositional")
	tax_table(ps1)[,1]=taxcolumn
	tax_otu=merge(tax_table(ps1)[,1],otu_table(ps1),by="row.names")
	colnames(tax_otu)[1]="TaxonID"
	colnames(tax_otu)[2]=level
	write.table(tax_otu,paste0(fld,"tax_otu_rel_",level,".txt"),sep="\t",quote=F,row.names=F)
	write.csv(tax_otu,paste0(fld,"tax_otu_rel_",level,".csv"),quote=F,row.names=F)
	###
	ps1 <- microbiome::transform(psglom, "clr")
	tax_table(ps1)[,1]=taxcolumn
	tax_otu=merge(tax_table(ps1)[,1],otu_table(ps1),by="row.names")
	colnames(tax_otu)[1]="TaxonID"
	colnames(tax_otu)[2]=level
	write.table(tax_otu,paste0(fld,"tax_otu_clr_",level,".txt"),sep="\t",quote=F,row.names=F)
	write.csv(tax_otu,paste0(fld,"tax_otu_clr_",level,".csv"),quote=F,row.names=F)
	##############################
	# Alpha diversity
	alpha <- estimate_richness(psglom, measures = c("Observed","Shannon","Simpson"))
	alpha_out <- data.frame(SampleID = rownames(alpha), alpha, row.names = NULL, check.names = FALSE)
	write.table(alpha_out,
		file = paste0(fld, "alpha_observed_shannon_simpson_", level, ".txt"),
		sep = "\t", quote = FALSE, row.names = FALSE)
	p_rich <- plot_richness(psglom, x = group_col,
		measures = c("Observed","Shannon","Simpson"),
		title = paste0(level, " Richness"),
		color = group_col) +
		geom_boxplot() +
		theme_bw() +
		xlab(group_col) +
		stat_compare_means(label.y = 1.5, size = 3)
	svg(paste0(fld,"alpha_diversity_plots_",level,".svg"), width=8, height=4); print(p_rich); dev.off()
	pdf(paste0(fld,"alpha_diversity_plots_",level,".pdf"), width=8, height=4); print(p_rich); dev.off()
	png(paste0(fld,"alpha_diversity_plots_",level,".png"), width=800, height=400); print(p_rich); dev.off()
	# Beta diversity - Bray-Curtis
	dist_bray <- distance(psglom, method = "bray")
	write.table(as.matrix(dist_bray),
		paste0(fld,"beta_distance_bray_",level,".txt"),
		sep = "\t", quote = FALSE, col.names = NA)
	ad_bray <- adonis2(dist_bray ~ get(group_col),
		data = as(sample_data(psglom), "data.frame"),
		na.action = na.pass)
	ad_bray_pval <- ad_bray$`Pr(>F)`[1]
	ord_bray_nmds <- ordinate(psglom, "NMDS", "bray")
	p_nmds_bray <- plot_ordination(psglom, ord_bray_nmds, type="samples", color=group_col) +
		geom_point(size=2) +
		ggtitle("NMDS Plot with Bray-Curtis Dissimilarity Metric",
		subtitle = paste0("PERMANOVA p = ", signif(ad_bray_pval, 3))) +
		stat_ellipse() +
		theme_bw()
	svg(paste0(fld,"beta_NMDS_bray_",level,".svg"),width=6,height=4); print(p_nmds_bray); dev.off()
	pdf(paste0(fld,"beta_NMDS_bray_",level,".pdf"),width=6,height=4); print(p_nmds_bray); dev.off()
	png(paste0(fld,"beta_NMDS_bray_",level,".png"),width=600,height=400); print(p_nmds_bray); dev.off()
	ord_bray_pcoa <- ordinate(psglom, "PCoA", "bray")
	p_pcoa_bray <- plot_ordination(psglom, ord_bray_pcoa, type="samples", color=group_col) +
		geom_point(size=2) +
		ggtitle("PCoA Plot with Bray-Curtis Dissimilarity Metric",
				 subtitle = paste0("PERMANOVA p = ", signif(ad_bray_pval, 3))) +
		stat_ellipse() +
		theme_bw()
	svg(paste0(fld,"beta_PCoA_bray_",level,".svg"),width=6,height=4); print(p_pcoa_bray); dev.off()
	pdf(paste0(fld,"beta_PCoA_bray_",level,".pdf"),width=6,height=4); print(p_pcoa_bray); dev.off()
	png(paste0(fld,"beta_PCoA_bray_",level,".png"),width=600,height=400); print(p_pcoa_bray); dev.off()
	# Beta diversity - CLR + Euclidean (Aitchison)
	physeq_clr <- microbiome::transform(psglom, "clr")
	dist_clr <- distance(physeq_clr, method = "euclidean")
	write.table(as.matrix(dist_clr),
		paste0(fld,"beta_distance_clr_euclidean_",level,".txt"),
		sep = "\t", quote = FALSE, col.names = NA)
	ad_clr <- adonis2(dist_clr ~ get(group_col),
		data = as(sample_data(physeq_clr), "data.frame"))
	ad_clr_pval <- ad_clr$`Pr(>F)`[1]
	ord_clr_nmds <- ordinate(physeq_clr, "NMDS", "euclidean")
	p_nmds_clr <- plot_ordination(physeq_clr, ord_clr_nmds, type="samples", color=group_col) +
		geom_point(size=2) +
		ggtitle("NMDS CLR Plot with Aitchison(Euclidean) Distance Metric",
		subtitle = paste0("PERMANOVA p = ", signif(ad_clr_pval, 3))) +
		stat_ellipse() +
		theme_bw()
	svg(paste0(fld,"beta_NMDS_clr_euclidean_",level,".svg"),width=6,height=4); print(p_nmds_clr); dev.off()
	pdf(paste0(fld,"beta_NMDS_clr_euclidean_",level,".pdf"),width=6,height=4); print(p_nmds_clr); dev.off()
	png(paste0(fld,"beta_NMDS_clr_euclidean_",level,".png"),width=600,height=400); print(p_nmds_clr); dev.off()
	ord_clr_pcoa <- ordinate(physeq_clr, "PCoA", "euclidean")
	p_pcoa_clr <- plot_ordination(physeq_clr, ord_clr_pcoa, type="samples", color=group_col) +
		geom_point(size=2) +
		ggtitle("PCoA CLR Plot with Aitchison(Euclidean) Distance Metric",
		subtitle = paste0("PERMANOVA p = ", signif(ad_clr_pval, 3))) +
		stat_ellipse() +
		theme_bw()
	svg(paste0(fld,"beta_PCoA_clr_euclidean_",level,".svg"),width=6,height=4); print(p_pcoa_clr); dev.off()
	pdf(paste0(fld,"beta_PCoA_clr_euclidean_",level,".pdf"),width=6,height=4); print(p_pcoa_clr); dev.off()
	png(paste0(fld,"beta_PCoA_clr_euclidean_",level,".png"),width=600,height=400); print(p_pcoa_clr); dev.off()
}


#############################################################################

dir.create("all_group",showWarnings = FALSE)
fld="all_group/"

group_col="Group"

level="Species"
psglom <- physeq
spnames <- paste(tax_table(physeq)[,6], tax_table(physeq)[,7], sep = " ")
tax_table(psglom)[,7] <- spnames
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],pstax[,3],pstax[,4],pstax[,5],pstax[,6],pstax[,7],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)


level="Genus"
psglom<-tax_glom(physeq,taxrank=level)
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],pstax[,3],pstax[,4],pstax[,5],pstax[,6],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)

level="Family"
psglom<-tax_glom(physeq,taxrank=level)
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],pstax[,3],pstax[,4],pstax[,5],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)

level="Order"
psglom<-tax_glom(physeq,taxrank=level)
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],pstax[,3],pstax[,4],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)

level="Class"
psglom<-tax_glom(physeq,taxrank=level)
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],pstax[,3],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)

level="Phylum"
psglom<-tax_glom(physeq,taxrank=level)
pstax=tax_table(psglom)
taxcolumn<-paste(pstax[,1],pstax[,2],sep=";")
run_diversity_outputs(psglom, taxcolumn, fld, level, group_col)


#######################################################################################

physeq1 <- physeq
spnames <- paste(taxa_names(physeq1), tax_table(physeq)[,6], tax_table(physeq)[,7], sep = " ")
tax_table(physeq1)[,7] <- spnames
# SpiecEasi network plots
se.mb <- spiec.easi(physeq1, method='mb', lambda.min.ratio=1e-2,nlambda=20, icov.select.params=list(rep.num=50))
ig2.mb <- adj2igraph(se.mb$refit$stars,vertex.attr=list(name=taxa_names(physeq1)))
p_network.mb <- plot_network(ig2.mb, physeq1, type='taxa', color="Species",title="Species Network Association - MB")+ theme(legend.position = "bottom") + theme(legend.text = element_text(size=5)) + guides(colour = guide_legend(ncol=3,bycol=TRUE,override.aes = list(size=2))) + theme(legend.key.size = unit(0.5,"line"))
legend.mb    <- get_legend(p_network.mb)
legend.mb    <- as_ggplot(legend.mb)
p_net.mb     <- plot_network(ig2.mb, physeq1, type='taxa', color="Species",title="Species Network Association - MB") + theme(legend.position = "none")
#p_net_leg.mb <- ggarrange(p_net.mb,as_ggplot(legend.mb) + rremove("x.text"), ncol=2, widths = c(1, 1))
pdf(paste0(fld,"network_mb_species.pdf"), width=12); p_net.mb; dev.off()
svg(paste0(fld,"network_mb_species.svg"), width=12); p_net.mb; dev.off()
png(paste0(fld,"network_mb_species.png"), width=900, height=900); p_net.mb; dev.off()
pdf(paste0(fld,"network_mb_legend_species.pdf"), width=12); legend.mb; dev.off()
svg(paste0(fld,"network_mb_legend_species.svg"), width=12); legend.mb; dev.off()
png(paste0(fld,"network_mb_legend_species.png"), width=900,height=900); legend.mb; dev.off()

cutoff<-0.5
ps.sparcc<-sparcc(t(otu_table(physeq1)))
sparcc.graph <- abs(ps.sparcc$Cor) >= cutoff
diag(sparcc.graph) <- 0
sparcc.graph <- Matrix(sparcc.graph, sparse=TRUE)
ig2.sparcc <- adj2igraph(sparcc.graph,vertex.attr=list(name=taxa_names(physeq1)))

p_network.sparcc <- plot_network(ig2.sparcc, physeq1, type='taxa',color="Species", title=paste0("Species Network Association - SPARCC (Correlation >= ",cutoff,")"))+ theme(legend.position = "bottom") + theme(legend.text = element_text(size=8)) + guides(colour = guide_legend(ncol=3,bycol=TRUE,override.aes = list(size=2))) + theme(legend.key.size = unit(0.5,"line"))
legend.sparcc    <- get_legend(p_network.sparcc)
legend.sparcc    <- as_ggplot(legend.sparcc)
p_net.sparcc     <- plot_network(ig2.sparcc, physeq1, type='taxa',color="Species", title=paste0("Species Network Association - SPARCC (Correlation >= ",cutoff,")")) + theme(legend.position = "none") 
#p_net_leg.sparcc <- ggarrange(p_net.sparcc,as_ggplot(legend.sparcc) + rremove("x.text"), nrow=2, widths = c(1, 1))

svg(paste0(fld,"network_sparcc.svg"), width=12)
p_net.sparcc
dev.off()
svg(paste0(fld,"network_sparcc_legend.svg"), width=12)
legend.sparcc
dev.off()

pdf(paste0(fld,"network_sparcc.pdf"), width=12)
p_net.sparcc
dev.off()
pdf(paste0(fld,"network_sparcc_legend.pdf"), width=12)
legend.sparcc
dev.off()

png(paste0(fld,"network_sparcc.png"), width=900,height=900)
p_net.sparcc
dev.off()
png(paste0(fld,"network_sparcc_legend.png"), width=900,height=900)
legend.sparcc
dev.off()



