Skip to content
Snippets Groups Projects
Commit 133a4f03 authored by vvanoost's avatar vvanoost
Browse files

new pipeline_bowtie2_nosplit bam

parent 1d9c4ba6
No related branches found
No related tags found
No related merge requests found
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$pair_id"
publishDir "results/fastq/fastqc/raw", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process adaptor_removal {
tag "$pair_id"
input:
set pair_id, file(reads) from fastq_files_adaptor
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${pair_id}_cut_R1.fastq.gz \
${reads[0]} > ${pair_id}_report.txt
cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \
-o ${pair_id}_cut_R2.fastq.gz \
${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$pair_id"
publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process random_bases_4_trimming {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_randombp
output:
set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$pair_id"
publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$pair_id"
publishDir "results/fastq/fastqc/urqt/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_align
file index from index_files.collect()
output:
set pair_id, "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive --fr -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from bam_files
output:
set pair_id, "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from sorted_bam_files
output:
set pair_id, "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment