Skip to content
Snippets Groups Projects
Commit 7e73ae00 authored by vvanoost's avatar vvanoost
Browse files

CCA_local

parent 6bc557f4
No related branches found
No related tags found
No related merge requests found
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/raw", mode: 'copy'
input:
file reads from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process adaptor_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_adaptor
output:
file "*_cut.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${reads.baseName}_cut.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/adaptor_removal/", mode: 'copy'
input:
file (reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process random_bases_4_trimming {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/adaptor_removal/", mode: 'copy'
input:
file reads from fastq_files_cut_randombp
output:
file "*_cut4.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${reads.baseName}_cut4.fastq.gz \
${reads[0]} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
file reads from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for single-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/trimming/", mode: 'copy'
input:
file reads from fastq_files_trim
output:
file "*_trim.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads} \
--out ${reads.baseName}_trim.fastq.gz \
> ${reads.baseName}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_CCA; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/urqt/", mode: 'copy'
input:
file reads from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process CCA_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_CCA
output:
file "*_cut_CCA.fastq.gz" into fastq_files_cut_CCA
script:
"""
cutadapt -a CCA -g TGG \
-o ${reads.baseName}_cut_CCA.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping_CCA_local/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for single-end data
*/
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file reads from fastq_files_cut_CCA
file index from index_files.collect()
output:
file "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive-local -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${reads.baseName}_bowtie2_report.txt | \
samtools view -Sb - > ${reads.baseName}.bam
if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* filtering mapping quality >= 2
*/
process quality_filtered_bam {
tag "$bam.baseName"
cpus 2
input:
file bam from bam_files
output:
file "*_filtered.bam*" into filtered_bam_files
script:
"""
samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/multiqc/CCA_removal", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$bam.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from filtered_bam_files
output:
file "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$bam.baseName"
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from sorted_bam_files
output:
file "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment