Skip to content
Snippets Groups Projects
Commit bf2956d2 authored by vvanoost's avatar vvanoost
Browse files

pipeline bowtie2

parent 8a0dd906
No related branches found
No related tags found
No related merge requests found
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$pair_id"
publishDir "results/fastq/fastqc/raw", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process adaptor_removal {
tag "$pair_id"
input:
set pair_id, file(reads) from fastq_files_adaptor
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${pair_id}_cut_R1.fastq.gz \
${reads[0]} > ${pair_id}_report.txt
cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \
-o ${pair_id}_cut_R2.fastq.gz \
${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$pair_id"
publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process random_bases_4_trimming {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_randombp
output:
set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$pair_id"
publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$pair_id"
publishDir "results/fastq/fastqc/urqt/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_align
file index from index_files.toList()
output:
set pair_id, "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams spliting */
process split_bam {
tag "$pair_id"
cpus 2
input:
set pair_id, file(bam) from bam_files
output:
set pair_id, "*_forward.bam" into forward_bam_files
set pair_id, "*_reverse.bam" into reverse_bam_files
script:
"""
samtools view -hb -F 0x10 ${bam} > ${pair_id}_forward.bam &
samtools view -hb -f 0x10 ${bam} > ${pair_id}_reverse.bam
"""
}
/* bams sorting */
process sort_bam_forward {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_bam_files
output:
set pair_id, "*_sorted.bam" into forward_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_forward_sorted.bam ${bam}
"""
}
process sort_bam_reverse {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_bam_files
output:
set pair_id, "*_sorted.bam" into reverse_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_reverse_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam_forward {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_sorted_bam_files
output:
set pair_id, "*bam*" into forward_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
process index_bam_reverse {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_sorted_bam_files
output:
set pair_id, "*bam*" into reverse_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment