Skip to content
Snippets Groups Projects
Commit ac3dbc46 authored by elabaron's avatar elabaron
Browse files

Add PASeq pipeline

parent f99e684b
Branches
No related tags found
No related merge requests found
profiles {
sge {
process{
withName: trimming {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/cutadapt_2.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: rRNA_removal {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 16
memory = "30GB"
time = "24h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: hisat2_human {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "hisat2/2.1.0:samtools/1.7"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: sort_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: index_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: dedup {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "umi_tools/1.0.0"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: counting {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "htseq/0.11.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
}
}
docker {
docker.temp = 'auto'
docker.enabled = true
process {
withName: adaptor_removal {
container = "lbmc/cutadapt:2.1"
cpus = 1
}
withName: rRNA_removal {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: hisat2_human {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: sort_bam {
container = "lbmc/samtools:1.7"
cpus = 1
}
withName: index_bam {
container = "lbmc/samtools:1.7"
cpus = 1
}
withName: dedup {
container = "lbmc/umi_tools:1.0.0"
cpus = 1
}
withName: counting {
container = "lbmc/htseq:0.11.2"
cpus = 1
}
}
}
}
/*
* RibosomeProfiling Analysis pipeline
*/
/* Trimming */
params.output = "results"
params.fastq_raw = "${params.output}/00_demultiplexing/*.fastq.gz"
Channel
.fromPath( params.fastq_raw )
.ifEmpty { error "Cannot find any files matching: ${params.fastq_raw}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { fastq_raw_flow }
process trimming {
tag "$file_id"
publishDir "${params.output}/01_trimming/", mode: 'copy'
input:
set file_id, file(fastq_raw) from fastq_raw_flow
output:
set file_id, "*_cut.fastq.gz" into fastq_trim_filt
file "*.txt" into log_trim
script:
"""
cutadapt -a AAAAAAAAAAAAAAAAAAAAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG -m 15\
-u 2 -o ${file_id}_cut.fastq.gz \
${fastq_raw} > ${file_id}_report.txt
"""
}
/* rRNA and tRNA filtering */
params.indexrRNA = "results/human_rRNA_tRNA/*.bt2"
log.info "index files rRNA : ${params.indexrRNA}"
Channel
.fromPath( params.indexrRNA )
.ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
.set { rRNA_index_files }
process rRNA_removal {
tag "$file_id"
publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
input:
set file_id, file(reads) from fastq_trim_filt
file index from rRNA_index_files.toList()
output:
set file_id, "*.fastq.gz" into rRNA_removed_reads
file "*.txt" into bowtie_report
script:
"""
zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
-U - --un-gz ${file_id}_mRNA.fastq.gz 2> \
${file_id}_bowtie2_report.txt > /dev/null
if grep -q "Error " ${file_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* mapping against human genome with hisat2 */
params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
log.info "index : ${params.index_hg38}"
Channel
.fromPath ( params.index_hg38 )
.ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
.set { index_file_hg38 }
process hisat2_human {
tag "$file_id"
input:
set file_id, file(fastq_filtred) from rRNA_removed_reads
file index from index_file_hg38.toList()
output:
set file_id, "*.fastq.gz" into reads_non_aligned_hg38
set file_id, "*.bam" into reads_aligned_hg38
file "*.txt" into hisat_report
script:
"""
hisat2 -x genome_tran -p ${task.cpus} \
-U ${fastq_filtred} --un-gz ${file_id}_notaligned_hg38.fastq.gz \
--end-to-end --rna-strandness 'F' \
2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
"""
}
/* sorting */
process index_bam {
tag "$file_id"
publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy'
input:
set file_id, file(bam) from reads_aligned_hg38
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
"""
}
sorted_bam_files.into{for_dedup;for_htseq}
/* deduplicating reads */
params.dedup_options = ""
process dedup {
tag "$file_id"
input:
set file_id, file(bam) from for_dedup
output:
set file_id, "*dedup.bam" into dedup_bam
file "*.txt" into dedup_report
script:
"""
umi_tools dedup -I ${bam[0]} \
${params.dedup_options} \
-S ${file_id}_dedup.bam > report.txt
"""
}
process sort_bam {
tag "$file_id"
publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy'
input:
set file_id, file(bam) from dedup_bam
file dedup from dedup_report
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2
file "*.txt" into report_dedup
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
cat ${dedup} > ${file_id}_dedup_report.txt
"""
}
/* HTseq */
params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process counting {
tag "$file_id"
publishDir "${params.output}/04_HTseq/", mode: 'copy'
input:
set file_id, file(bam) from for_htseq
file gtf from gtf_file.toList()
output:
file "*.count" into count_files
script:
"""
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t CDS \
-i gene_id \
-r pos \
-f bam \
> ${file_id}_CDS.count
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t exon \
-i gene_id \
-r pos \
-f bam \
> ${file_id}_exon.count
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment