/* small pipeline to build a training dataset from whole genome data input: - fasta - fastq - chromosome - start position - stop position output: - sort fasta - sort fastq */ params.fastq_paired = "" params.fastq_single = "" log.info "fasta files : ${params.fasta}" log.info "fastq paired files : ${params.fastq_paired}" log.info "fastq single files : ${params.fastq_single}" log.info "chromosome : ${params.chromosome}" log.info "start position : ${params.start}" log.info "stop position : ${params.stop}" Channel .fromPath( params.fasta ) .ifEmpty { error "Cannot find any index files matching: ${params.fasta}" } .set { fasta_file } process build_synthetic_bed { tag "${chromosome}:${start}-${stop}" cpus 4 input: val chromosome from params.chromosome val start from params.start val stop from params.stop output: file "*.bed" into bed_files script: """ echo "${chromosome}\t${start}\t${stop}" > synthetic.bed """ } process fasta_from_bed { tag "${fasta.baseName}" cpus 4 publishDir "results/training/fasta/", mode: 'copy' input: file fasta from fasta_file file bed from bed_files output: file "*.fasta" into fasta_files_extracted script: """ bedtools getfasta -name \ -fi ${fasta} -bed ${bed} -fo ${fasta.baseName}_S.fasta """ } process index_fasta { tag "$fasta.baseName" cpus 4 publishDir "results/training/mapping/index/", mode: 'copy' input: file fasta from fasta_files_extracted output: file "*.index*" into index_files file "*_report.txt" into indexing_report script: """ bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then exit 1 fi """ } if ( params.fastq_paired != "" ) { Channel .fromFilePairs( params.fastq_paired ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_paired}" } .set { fastq_files_paired } process mapping_fastq_paired { tag "$pair_id" cpus 4 input: set pair_id, file(reads) from fastq_files_paired file index from index_files.collect() output: set pair_id, "*.bam" into bam_files_paired file "*_report.txt" into mapping_report script: index_id = index[0] for (index_file in index) { if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] } } """ bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ -1 ${reads[0]} -2 ${reads[1]} 2> \ ${pair_id}_bowtie2_report.txt | \ samtools view -Sb - > ${pair_id}.bam if grep -q "Error" ${pair_id}_bowtie2_report.txt; then exit 1 fi """ } bam_files_paired.into{ bam_files_paired_fa; bam_files_paired_ba} process bam_2_fastq_paired { tag "$file_id" publishDir "results/training/fastq/", mode: 'copy' input: set file_id, file(bam) from bam_files_paired_fa output: set file_id, "*.fastq" into fastq_files_extracted script: """ samtools fastq -1 ${file_id}_SR1.fastq -2 ${file_id}_SR2.fastq -f 0x2 ${bam} """ } process filter_bam_paired { tag "$file_id" publishDir "results/training/bams/", mode: 'copy' cpus 4 input: set file_id, file(bam) from bam_files_paired_ba file bed from bed_files output: set file_id, "*.bam" into filtered_bam_files script: """ samtools view -@ ${task.cpus} -hb ${bam} -f 0x2 > ${file_id}_S.bam """ } } if ( params.fastq_single != "" ) { Channel .fromPath( params.fastq_single ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq_single}" } .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { fastq_files_single } process mapping_fastq_single { tag "$file_id" cpus 4 input: set file_id, file(reads) from fastq_files_single file index from index_files.collect() output: set file_id, "*.bam" into bam_files_single file "*_report.txt" into mapping_report script: index_id = index[0] for (index_file in index) { if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] } } """ bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ -U ${reads} 2> \ ${file_id}_bowtie2_report.txt | \ samtools view -Sb - > ${file_id}.bam if grep -q "Error" ${file_id}_bowtie2_report.txt; then exit 1 fi """ } bam_files_single.into{ bam_files_single_fa; bam_files_single_ba} process bam_2_fastq_single { tag "$file_id" publishDir "results/training/fastq/", mode: 'copy' input: set file_id, file(bam) from bam_files_single_fa output: set file_id, "*.fastq" into fastq_files_extracted script: """ samtools fastq -s ${file_id}_S.fastq -f 0x2 ${bam} """ } process filter_bam_single { tag "$file_id" publishDir "results/training/bams/", mode: 'copy' cpus 4 input: set file_id, file(bam) from bam_files_single_ba file bed from bed_files output: set file_id, "*_S.bam" into filtered_bam_files script: """ samtools view -@ ${task.cpus} -hb ${bam} -f 0x2 > ${file_id}_S.bam """ } }