Tutorial 14: Binning and Post-binning Workflow
This tutorial focuses on metagenomic binning and post-binning utilities. External tool execution is opt-in via environment variables.
Setup
From the Mycelia base directory, convert this tutorial to a notebook:
julia --project=. -e 'import Literate; Literate.notebook("tutorials/14_binning_workflow.jl", "tutorials/notebooks", execute=false)'import Pkg
if isinteractive()
Pkg.activate("..")
end
import Mycelia
import DataFramesConfiguration (opt-in external tools)
To run external binners, set:
MYCELIA_RUN_EXTERNAL=trueMYCELIA_BINNING_CONTIGS(FASTA)MYCELIA_BINNING_DEPTH(JGI depth table)MYCELIA_BINNING_TAXONOMY(taxonomy TSV for TaxVAMB/Taxometer)MYCELIA_BINNING_COVERAGE_TABLE(coverage table)MYCELIA_BINNING_BAM_PATH(BAM file or directory for COMEBin)MYCELIA_BINNING_GRAPH(assembly graph for MetaCoAG)MYCELIA_BINNING_MAPPING(mapping/coverage file for MetaCoAG)MYCELIA_BINNING_GENOMES(comma-separated FASTA list for dRep)MYCELIA_BINNING_BINS_DIRS(comma-separated bin directories for MAGmax)
These inputs are intentionally user-supplied to keep the tutorial lightweight.
const RUN_EXTERNAL = get(ENV, "MYCELIA_RUN_EXTERNAL", "false") == "true"
const contigs_fasta = get(ENV, "MYCELIA_BINNING_CONTIGS", "")
const depth_file = get(ENV, "MYCELIA_BINNING_DEPTH", "")
const taxonomy_file = get(ENV, "MYCELIA_BINNING_TAXONOMY", "")
const coverage_table = get(ENV, "MYCELIA_BINNING_COVERAGE_TABLE", "")
const bam_path = get(ENV, "MYCELIA_BINNING_BAM_PATH", "")
const assembly_graph = get(ENV, "MYCELIA_BINNING_GRAPH", "")
const mapping_file = get(ENV, "MYCELIA_BINNING_MAPPING", "")
const genomes = filter(!isempty, strip.(split(get(ENV, "MYCELIA_BINNING_GENOMES", ""), ',')))
const bins_dirs = filter(!isempty, strip.(split(get(ENV, "MYCELIA_BINNING_BINS_DIRS", ""), ',')))Part 1: Parser Utilities (no external tools needed)
The binning module includes parsers for common output tables.
println("=== Binning Tutorial ===")
println("Parser utilities (no external tools required)")
tmp_assignments = tempname()
open(tmp_assignments, "w") do io
write(io, "contig\tbin\tlength\n")
write(io, "ctg1\tbinA\t1500\n")
write(io, "ctg2\tbinB\t2400\n")
end
assignments_df = Mycelia.parse_bin_assignments(tmp_assignments)
println("Parsed assignments:")
println(assignments_df)
tmp_drep = tempname()
open(tmp_drep, "w") do io
write(io, "genome,secondary_cluster,representative,ani\n")
write(io, "g1,1,g1,0.99\n")
write(io, "g2,1,g1,0.99\n")
end
drep_df = Mycelia.parse_drep_clusters(tmp_drep)
println("Parsed dRep clusters:")
println(drep_df)Part 2: Binning (opt-in external tools)
The wrapper calls below require external tools and inputs.
if RUN_EXTERNAL
println("\n=== External binning runs (opt-in) ===")
if isfile(contigs_fasta) && isfile(depth_file)
println("Running VAMB and MetaBAT2...")
tmp_root = mktempdir()
outdir_vamb = joinpath(tmp_root, "vamb_out")
outdir_metabat = joinpath(tmp_root, "metabat_out")
try
Mycelia.run_vamb(contigs_fasta=contigs_fasta, depth_file=depth_file, outdir=outdir_vamb)
Mycelia.run_metabat2(contigs_fasta=contigs_fasta, depth_file=depth_file, outdir=outdir_metabat)
finally
rm(tmp_root; recursive=true, force=true)
end
else
println("Skipping VAMB/MetaBAT2 (missing contigs/depth inputs).")
end
if isfile(contigs_fasta) && isfile(depth_file) && isfile(taxonomy_file)
println("Running TaxVAMB and Taxometer...")
tmp_root = mktempdir()
outdir_taxvamb = joinpath(tmp_root, "taxvamb_out")
outdir_taxometer = joinpath(tmp_root, "taxometer_out")
try
Mycelia.run_taxvamb(
contigs_fasta=contigs_fasta,
depth_file=depth_file,
taxonomy_file=taxonomy_file,
outdir=outdir_taxvamb
)
Mycelia.run_taxometer(
contigs_fasta=contigs_fasta,
depth_file=depth_file,
taxonomy_file=taxonomy_file,
outdir=outdir_taxometer
)
finally
rm(tmp_root; recursive=true, force=true)
end
else
println("Skipping TaxVAMB/Taxometer (missing contigs/depth/taxonomy inputs).")
end
if isfile(contigs_fasta) && isfile(assembly_graph) && isfile(mapping_file)
println("Running MetaCoAG...")
outdir_metacoag = mktempdir()
try
Mycelia.run_metacoag(
contigs_fasta=contigs_fasta,
assembly_graph=assembly_graph,
mapping_file=mapping_file,
outdir=outdir_metacoag
)
finally
rm(outdir_metacoag; recursive=true, force=true)
end
else
println("Skipping MetaCoAG (missing contigs/graph/mapping inputs).")
end
if isfile(contigs_fasta) && isfile(coverage_table)
println("Skipping GenomeFace; wrapper disabled until genomeface executable is available again.")
else
println("Skipping GenomeFace (missing contigs/coverage inputs; wrapper disabled until executable is available).")
end
if isfile(contigs_fasta) && !isempty(bam_path) && (isfile(bam_path) || isdir(bam_path))
println("Running COMEBin...")
outdir_comebin = mktempdir()
try
Mycelia.run_comebin(
contigs_fasta=contigs_fasta,
bam_path=bam_path,
outdir=outdir_comebin
)
finally
rm(outdir_comebin; recursive=true, force=true)
end
else
println("Skipping COMEBin (missing contigs/BAM inputs).")
end
else
println("\nExternal tools are opt-in. Set MYCELIA_RUN_EXTERNAL=true to run binning wrappers.")
endPart 3: Post-binning (opt-in external tools)
dRep and MAGmax require external tools and user-provided inputs.
if RUN_EXTERNAL
if !isempty(genomes) && all(isfile, genomes)
println("Running dRep...")
outdir_drep = mktempdir()
try
Mycelia.run_drep_dereplicate(genomes=genomes, outdir=outdir_drep)
finally
rm(outdir_drep; recursive=true, force=true)
end
else
println("Skipping dRep (missing MYCELIA_BINNING_GENOMES inputs).")
end
if !isempty(bins_dirs) && all(isdir, bins_dirs)
println("Running MAGmax...")
outdir_magmax = mktempdir()
try
Mycelia.run_magmax_merge(bins_dirs=bins_dirs, outdir=outdir_magmax)
finally
rm(outdir_magmax; recursive=true, force=true)
end
else
println("Skipping MAGmax (missing MYCELIA_BINNING_BINS_DIRS inputs).")
end
end