version = "1.11" container_url = "lbmc/samtools:${version}" process index_fasta { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" input: tuple val(file_id), path(fasta) output: tuple val(file_id), path("*.fai"), emit: index script: """ samtools faidx ${fasta} """ } filter_bam_quality_threshold = 30 process filter_bam_quality { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*_filtered.bam"), emit: bam script: """ samtools view -@ ${task.cpus} -hb ${bam} -q ${filter_bam_quality_threshold} > \ ${bam.simpleName}_filtered.bam """ } process filter_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) path bed output: tuple val(file_id), path("*_filtered.bam"), emit: bam script: """ samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > \ ${bam.simpleName}_filtered.bam """ } process filter_bam_mapped { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*_mapped.bam"), emit: bam script: """ samtools view -@ ${task.cpus} -F 4 -hb ${bam} > \ ${bam.simpleName}_mapped.bam """ } process filter_bam_unmapped { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*_unmapped.bam"), emit: bam script: """ samtools view -@ ${task.cpus} -f 4 -hb ${bam} > ${bam.simpleName}_unmapped.bam """ } process index_bam { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path(bam), emit: bam tuple val(file_id), path("*.bam.bai"), emit: bam_idx script: """ samtools index ${bam} """ } process sort_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*.bam*"), emit: bam script: """ samtools sort -@ ${task.cpus} -O BAM -o ${bam.simpleName}_sorted.bam ${bam} """ } process split_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*_forward.bam*"), emit: bam_forward tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse script: """ samtools view --@ ${Math.round(task.cpus/2)} \ -hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam & samtools view --@ ${Math.round(task.cpus/2)} \ -hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam """ } process merge_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" cpus = 2 input: tuple val(first_file_id), path(first_bam) tuple val(second_file_id), path(second_bam) output: tuple val(file_id), path("*.bam*"), emit: bam script: """ samtools merge ${first_bam} ${second_bam} \ ${first_bam.simpleName}_${second_file.simpleName}.bam """ } process merge_multi_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" cpus = 2 input: tuple val(file_id), path(bams) output: tuple val(file_id), path("*_merged.bam*"), emit: bam script: """ samtools merge ${bams} \ ${bams[0].simpleName}_merged.bam """ } process stats_bam { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" cpus = 2 input: tuple val(file_id), path(bam) output: tuple val(file_id), path("*.tsv"), emit: tsv script: """ samtools flagstat -@ ${task.cpus} -O tsv ${bam} > ${bam.simpleName}_stats.tsv """ }