Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • LBMC/regards/nextflow
  • elabaron/nextflow
  • lanani/nextflow
  • mlepetit/nextflow
  • mdjaffar/nextflow
  • LBMC/RMI2/rmi2_pipelines
  • lpicard/nextflow
  • rseraphi/nextflow
  • hregue/nextflow
  • letien02/nextflow
  • mshamjal/nextflow
  • z483801/nextflow
  • fduveau/nextflow
  • cginevra/nextflow
  • dtorresc/nextflow
  • fmortreu/nextflow
  • jshapiro/nextflow
  • carpin/nextflow
  • LBMC/Delattre/JU28_59vs17_SNP
  • jclaud01/nextflow
  • dchalopi/nextflow
  • mvilcot/nextflow
  • mherbett/nextflow
  • lestrada/nextflow
  • nfontrod/nextflow
  • gbenoit/nextflow
  • gyvert/nextflow
  • aguill09/nextflow
  • alapendr/nextflow
  • jprobin/nextflow
  • vvanoost/nextflow
  • jblin/nextflow
  • mparis/nextflow
  • ogandril/nextflow
  • cbourgeo/nextflow
  • ggirau03/nextflow
  • ecombe01/nextflow
  • acorbin/nextflow
  • pberna01/nextflow
  • pmarie01/nextflow
  • rhoury/nextflow
  • lgely/nextflow
  • jvalat/nextflow
  • cfournea/nextflow
  • mprieux/nextflow
  • hpolvech/nextflow
  • LBMC/nextflow
  • mcariou/nextflow
  • z483800/nextflow
  • yjia01/nextflow
  • jkleine/nextflow
  • LBMC/Palladino/RNAseq_nextflow
  • jseimand/nextflow
  • nlecouvr/nextflow-nathan
54 results
Show changes
Commits on Source (61)
Showing
with 3315 additions and 0 deletions
......@@ -2,3 +2,5 @@ nextflow
.nextflow.log*
.nextflow/
work/
.DS_Store
.Rhistory
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$index_fasta {
container = "bowtie:1.2.2"
}
$mapping_fastq {
container = "bowtie:1.2.2"
}
$sort_bam_forward {
container = "samtools:1.7"
}
$index_bam_forward {
container = "samtools:1.7"
}
$sort_bam_reverse {
container = "samtools:1.7"
}
$index_bam_reverse {
container = "samtools:1.7"
}
$split_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie/1.2.2"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie/1.2.2"
}
$sort_bam_forward {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam_forward {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam_reverse {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam_reverse {
beforeScript = "module purge; module load SAMtools/1.7"
}
$split_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$pair_id"
publishDir "results/fastq/fastqc/raw", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process adaptor_removal {
tag "$pair_id"
input:
set pair_id, file(reads) from fastq_files_adaptor
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${pair_id}_cut_R1.fastq.gz \
${reads[0]} > ${pair_id}_report.txt
cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \
-o ${pair_id}_cut_R2.fastq.gz \
${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$pair_id"
publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process random_bases_4_trimming {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_randombp
output:
set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$pair_id"
publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$pair_id"
publishDir "results/fastq/fastqc/urqt/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* Bowtie1 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping_bowtie1/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
file "*_report.txt" into indexing_report
script:
"""
bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then
exit 1
fi
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping_bowtie1/bams/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_align
file index from index_files.toList()
output:
set pair_id, "*.bam" into bam_files
file "*_bowtie_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) {
index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1]
}
}
"""
# -v specify the max number of missmatch, -k the number of match reported per
# reads
bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie_report.txt; then
exit 1
fi
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams spliting */
process split_bam {
tag "$pair_id"
cpus 2
input:
set pair_id, file(bam) from bam_files
output:
set pair_id, "*_forward.bam" into forward_bam_files
set pair_id, "*_reverse.bam" into reverse_bam_files
script:
"""
samtools view -hb -F 0x10 ${bam} > ${pair_id}_forward.bam &
samtools view -hb -f 0x10 ${bam} > ${pair_id}_reverse.bam
"""
}
/* bams sorting */
process sort_bam_forward {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_bam_files
output:
set pair_id, "*_sorted.bam" into forward_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_forward_sorted.bam ${bam}
"""
}
process sort_bam_reverse {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_bam_files
output:
set pair_id, "*_sorted.bam" into reverse_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_reverse_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam_forward {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_sorted_bam_files
output:
set pair_id, "*bam*" into forward_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
process index_bam_reverse {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_sorted_bam_files
output:
set pair_id, "*bam*" into reverse_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
./nextflow src/RNASeq_sen1D.nf -c src/RNASeq_sen1D.config -profile docker -resume -with-dag results/RNASeq_dag.pdf -with-timeline results/RNASeq_timeline --fasta data/bam/S_pombe_full_genome_ncbi_2017_04_27.fasta --fastq "data/fastq/*_R{1,2}.fastq.gz"
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$sort_bam_forward {
container = "samtools:1.7"
}
$index_bam_forward {
container = "samtools:1.7"
}
$sort_bam_reverse {
container = "samtools:1.7"
}
$index_bam_reverse {
container = "samtools:1.7"
}
$split_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$sort_bam_forward {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam_forward {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam_reverse {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam_reverse {
beforeScript = "module purge; module load SAMtools/1.7"
}
$split_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$pair_id"
publishDir "results/fastq/fastqc/raw", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process adaptor_removal {
tag "$pair_id"
input:
set pair_id, file(reads) from fastq_files_adaptor
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${pair_id}_cut_R1.fastq.gz \
${reads[0]} > ${pair_id}_report.txt
cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \
-o ${pair_id}_cut_R2.fastq.gz \
${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$pair_id"
publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process random_bases_4_trimming {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_randombp
output:
set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$pair_id"
publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$pair_id"
publishDir "results/fastq/fastqc/urqt/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_align
file index from index_files.collect()
output:
set pair_id, "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive --fr -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams spliting */
process split_bam {
tag "$pair_id"
cpus 2
input:
set pair_id, file(bam) from bam_files
output:
set pair_id, "*_forward.bam" into forward_bam_files
set pair_id, "*_reverse.bam" into reverse_bam_files
script:
"""
samtools view -hb -F 0x10 ${bam} > ${pair_id}_forward.bam &
samtools view -hb -f 0x10 ${bam} > ${pair_id}_reverse.bam
"""
}
/* bams sorting */
process sort_bam_forward {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_bam_files
output:
set pair_id, "*_sorted.bam" into forward_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_forward_sorted.bam ${bam}
"""
}
process sort_bam_reverse {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_bam_files
output:
set pair_id, "*_sorted.bam" into reverse_sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_reverse_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam_forward {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from forward_sorted_bam_files
output:
set pair_id, "*bam*" into forward_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
process index_bam_reverse {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from reverse_sorted_bam_files
output:
set pair_id, "*bam*" into reverse_indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$quality_filtered_bam {
container = "samtools:1.7"
}
$sort_bam {
container = "samtools:1.7"
}
$index_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$quality_filtered_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/raw", mode: 'copy'
input:
file reads from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process adaptor_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_adaptor
output:
file "*_cut.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${reads.baseName}_cut.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/adaptor_removal/", mode: 'copy'
input:
file (reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process random_bases_4_trimming {
tag "$reads.baseName"
publishDir "results/fastq_SE/adaptor_removal/", mode: 'copy'
input:
file reads from fastq_files_cut_randombp
output:
file "*_cut4.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${reads.baseName}_cut4.fastq.gz \
${reads[0]} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
file reads from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for single-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq_SE/trimming/", mode: 'copy'
input:
file reads from fastq_files_trim
output:
file "*_trim.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads} \
--out ${reads.baseName}_trim.fastq.gz \
> ${reads.baseName}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/urqt/", mode: 'copy'
input:
file reads from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for single-end data
*/
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file reads from fastq_files_align
file index from index_files.collect()
output:
file "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${reads.baseName}_bowtie2_report.txt | \
samtools view -Sb - > ${reads.baseName}.bam
if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* filtering mapping quality >= 2
*/
process quality_filtered_bam {
tag "$bam.baseName"
cpus 2
input:
file bam from bam_files
output:
file "*_filtered.bam*" into filtered_bam_files
script:
"""
samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq_SE/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$bam.baseName"
cpus 4
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file bam from filtered_bam_files
output:
file "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$bam.baseName"
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file bam from sorted_bam_files
output:
file "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$CCA_removal {
container = "cutadapt:1.14"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$quality_filtered_bam {
container = "samtools:1.7"
}
$sort_bam {
container = "samtools:1.7"
}
$index_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$CCA_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$quality_filtered_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/fastqc/raw", mode: 'copy'
input:
file reads from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process adaptor_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_adaptor
output:
file "*_cut.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${reads.baseName}_cut.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/fastqc/adaptor_removal/", mode: 'copy'
input:
file (reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process random_bases_4_trimming {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/adaptor_removal/", mode: 'copy'
input:
file reads from fastq_files_cut_randombp
output:
file "*_cut4.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${reads.baseName}_cut4.fastq.gz \
${reads[0]} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
file reads from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for single-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/trimming/", mode: 'copy'
input:
file reads from fastq_files_trim
output:
file "*_trim.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads} \
--out ${reads.baseName}_trim.fastq.gz \
> ${reads.baseName}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_CCA; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/fastqc/urqt/", mode: 'copy'
input:
file reads from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process CCA_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_CCA
output:
file "*_cut_CCA.fastq.gz" into fastq_files_cut_CCA
script:
"""
cutadapt -a CCA -g TGG \
-o ${reads.baseName}_cut_CCA.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for single-end data
*/
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing/mapping_SE_CCA/bams/", mode: 'copy'
input:
file reads from fastq_files_cut_CCA
file index from index_files.collect()
output:
file "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${reads.baseName}_bowtie2_report.txt | \
samtools view -Sb - > ${reads.baseName}.bam
if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* filtering mapping quality >= 2
*/
process quality_filtered_bam {
tag "$bam.baseName"
cpus 2
input:
file bam from bam_files
output:
file "*_filtered.bam*" into filtered_bam_files
script:
"""
samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/SE_CCA_sequencing/fastq_SE_filtered/multiqc/CCA_removal", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$bam.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from filtered_bam_files
output:
file "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$bam.baseName"
publishDir "results/SE_CCA_sequencing/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from sorted_bam_files
output:
file "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$CCA_removal {
container = "cutadapt:1.14"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$quality_filtered_bam {
container = "samtools:1.7"
}
$sort_bam {
container = "samtools:1.7"
}
$index_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$CCA_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$quality_filtered_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/raw", mode: 'copy'
input:
file reads from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process adaptor_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_adaptor
output:
file "*_cut.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${reads.baseName}_cut.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/adaptor_removal/", mode: 'copy'
input:
file (reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process random_bases_4_trimming {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/adaptor_removal/", mode: 'copy'
input:
file reads from fastq_files_cut_randombp
output:
file "*_cut4.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${reads.baseName}_cut4.fastq.gz \
${reads[0]} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
file reads from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for single-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/trimming/", mode: 'copy'
input:
file reads from fastq_files_trim
output:
file "*_trim.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads} \
--out ${reads.baseName}_trim.fastq.gz \
> ${reads.baseName}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_CCA; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$reads.baseName"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/fastqc/urqt/", mode: 'copy'
input:
file reads from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process CCA_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_CCA
output:
file "*_cut_CCA.fastq.gz" into fastq_files_cut_CCA
script:
"""
cutadapt -a CCA -g TGG \
-o ${reads.baseName}_cut_CCA.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing_local/mapping_CCA_local/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for single-end data
*/
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file reads from fastq_files_cut_CCA
file index from index_files.collect()
output:
file "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive-local -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${reads.baseName}_bowtie2_report.txt | \
samtools view -Sb - > ${reads.baseName}.bam
if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* filtering mapping quality >= 2
*/
process quality_filtered_bam {
tag "$bam.baseName"
cpus 2
input:
file bam from bam_files
output:
file "*_filtered.bam*" into filtered_bam_files
script:
"""
samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/SE_CCA_sequencing_local/fastq_SE_filtered/multiqc/CCA_removal", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$bam.baseName"
cpus 4
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from filtered_bam_files
output:
file "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$bam.baseName"
publishDir "results/SE_CCA_sequencing_local/mapping_SE_CCA/bams/", mode: 'copy'
input:
file bam from sorted_bam_files
output:
file "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$quality_filtered_bam {
container = "samtools:1.7"
}
$sort_bam {
container = "samtools:1.7"
}
$index_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$quality_filtered_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq_SE/*.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/raw", mode: 'copy'
input:
file reads from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process adaptor_removal {
tag "$reads.baseName"
input:
file reads from fastq_files_adaptor
output:
file "*_cut.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${reads.baseName}_cut.fastq.gz \
${reads} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/adaptor_removal/", mode: 'copy'
input:
file (reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
process random_bases_4_trimming {
tag "$reads.baseName"
publishDir "results/fastq_SE/adaptor_removal/", mode: 'copy'
input:
file reads from fastq_files_cut_randombp
output:
file "*_cut4.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${reads.baseName}_cut4.fastq.gz \
${reads[0]} > ${reads.baseName}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
file reads from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for single-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq_SE/trimming/", mode: 'copy'
input:
file reads from fastq_files_trim
output:
file "*_trim.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads} \
--out ${reads.baseName}_trim.fastq.gz \
> ${reads.baseName}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$reads.baseName"
publishDir "results/fastq_SE/fastqc/urqt/", mode: 'copy'
input:
file reads from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for single-end data
*/
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file reads from fastq_files_align
file index from index_files.collect()
output:
file "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive-local -p ${task.cpus} -x ${index_id} \
-U ${reads} 2> \
${reads.baseName}_bowtie2_report.txt | \
samtools view -Sb - > ${reads.baseName}.bam
if grep -q "Error" ${reads.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* filtering mapping quality >= 2
*/
process quality_filtered_bam {
tag "$bam.baseName"
cpus 2
input:
file bam from bam_files
output:
file "*_filtered.bam*" into filtered_bam_files
script:
"""
samtools view -hb -q 2 ${bam} > ${bam}_filtered.bam
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq_SE/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$bam.baseName"
cpus 4
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file bam from filtered_bam_files
output:
file "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$bam.baseName"
publishDir "results/mapping_SE/bams/", mode: 'copy'
input:
file bam from sorted_bam_files
output:
file "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$fastqc_fastq {
container = "fastqc:0.11.5"
}
$fastqc_fastq_cutadapt {
container = "fastqc:0.11.5"
}
$fastqc_fastq_randombp {
container = "fastqc:0.11.5"
}
$fastqc_fastq_urqt {
container = "fastqc:0.11.5"
}
$adaptor_removal {
container = "cutadapt:1.14"
}
$multiqc {
container = "multiqc:1.0"
}
$random_bases_4_trimming {
container = "cutadapt:1.14"
}
$trimming {
container = "urqt:d62c1f8"
}
$index_fasta {
container = "bowtie2:2.3.4.1"
}
$mapping_fastq {
container = "bowtie2:2.3.4.1"
}
$sort_bam {
container = "samtools:1.7"
}
$index_bam {
container = "samtools:1.7"
}
}
}
sge {
process{
$fastqc_fastq {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_cutadapt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_randombp {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$fastqc_fastq_urqt {
beforeScript = "module purge; module load FastQC/0.11.5"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$multiqc {
beforeScript = "module purge; module load FastQC/1.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'monointeldeb128'
}
$adaptor_removal {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$random_bases_4_trimming {
beforeScript = "module purge; module load cutadapt/1.14"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$trimming {
beforeScript = "module purge; module load UrQt/d62c1f8"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$index_fasta {
beforeScript = "module purge; module load Bowtie2/2.3.4.1"
}
$mapping_fastq {
beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
}
$sort_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
$index_bam {
beforeScript = "module purge; module load SAMtools/1.7"
}
}
}
}
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Small RNA-seq Illumina adaptor removal NEXTflex Small RNA Seq Kit v3 */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_R{1,2}.fastq.gz"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
fastq_files.into{fastq_files_adaptor; fastq_files_fastq}
process fastqc_fastq {
tag "$pair_id"
publishDir "results/fastq/fastqc/raw", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_fastq
output:
file "*.{zip,html}" into fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process adaptor_removal {
tag "$pair_id"
input:
set pair_id, file(reads) from fastq_files_adaptor
output:
set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a TGGAATTCTCGGGTGCCAAGG -g CCTTGGCACCCGAGAATTCCA \
-o ${pair_id}_cut_R1.fastq.gz \
${reads[0]} > ${pair_id}_report.txt
cutadapt -a GATCGTCGGACTGTAGAACTCTGAAC -g GTTCAGAGTTCTACAGTCCGACGATC \
-o ${pair_id}_cut_R2.fastq.gz \
${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut.into{fastq_files_cut_randombp; fastq_files_cut_fastq}
process fastqc_fastq_cutadapt {
tag "$pair_id"
publishDir "results/fastq/fastqc/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_fastq
output:
file "*.{zip,html}" into cutadapt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
process random_bases_4_trimming {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut_randombp
output:
set pair_id, "*_cut4_R{1,2}.fastq.gz" into fastq_files_cut4
script:
"""
cutadapt -u 4 -u -4 \
-o ${pair_id}_cut4_R1.fastq.gz -p ${pair_id}_cut4_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
fastq_files_cut4.into{fastq_files_trim; fastq_files_cut4_fastq}
process fastqc_fastq_randombp {
tag "$pair_id"
publishDir "results/fastq/fastqc/random_bases_4_trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_cut4_fastq
output:
file "*.{zip,html}" into randombp_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_trim
output:
set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_urqt
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
> ${pair_id}_trimming_report.txt
"""
}
fastq_files_urqt.into{fastq_files_align; fastq_files_urqt_fastq}
process fastqc_fastq_urqt {
tag "$pair_id"
publishDir "results/fastq/fastqc/urqt/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_urqt_fastq
output:
file "*.{zip,html}" into urqt_fastqc_repport
script:
"""
fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
${reads[0]} ${reads[1]}
"""
}
/*
* Bowtie2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
bowtie2-build --threads ${task.cpus} ${fasta} ${fasta.baseName}.index &> ${fasta.baseName}_bowtie2_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie2_report.txt; then
exit 1
fi
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files_align
file index from index_files.collect()
output:
set pair_id, "*.bam" into bam_files
file "*_bowtie2_report.txt" into mapping_fastq_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/) ) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --very-sensitive --fr -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \
${pair_id}_bowtie2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* MultiQC */
process multiqc {
tag "$repport"
publishDir "results/fastq/multiqc/", mode: 'copy'
cpus = 1
input:
file repport from fastqc_repport.collect()
file repport_urqt from urqt_fastqc_repport.collect()
file repport_cutadapt from cutadapt_fastqc_repport.collect()
file repport_randombp from randombp_fastqc_repport.collect()
output:
file "*multiqc_*" into multiqc_report
script:
"""
multiqc -f .
"""
}
/* bams sorting */
process sort_bam {
tag "$pair_id"
cpus 4
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from bam_files
output:
set pair_id, "*_sorted.bam" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${pair_id}_sorted.bam ${bam}
"""
}
/* bams indexing */
process index_bam {
tag "$pair_id"
publishDir "results/mapping/bams/", mode: 'copy'
input:
set pair_id, file(bam) from sorted_bam_files
output:
set pair_id, "*bam*" into indexed_bam_file
script:
"""
samtools index ${bam}
"""
}