Skip to content
Snippets Groups Projects
Commit c2bf2271 authored by elabaron's avatar elabaron
Browse files

add RNAseq pipeline

parent 4d9aa8e9
No related branches found
No related tags found
No related merge requests found
profiles {
sge {
process{
withName: trimming {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/cutadapt_2.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: rRNA_removal {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 16
memory = "30GB"
time = "24h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: hisat2_human {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "hisat2/2.1.0:samtools/1.7"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: sort_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: index_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: dedup {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "umi_tools_1.0.0"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: counting {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "htseq/0.11.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
}
}
docker {
docker.temp = 'auto'
docker.enabled = true
process {
withName: adaptor_removal {
container = "lbmc/cutadapt:2.1"
cpus = 1
}
withName: rRNA_removal {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withName: hisat2_human {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: sort_bam {
container = "lbmc/samtools:1.7"
cpus = 1
}
withName: index_bam {
container = "lbmc/samtools:1.7"
cpus = 1
}
withName: dedup {
container = "lbmc/umi_tools:1.0.0"
cpus = 1
}
withName: counting {
container = "lbmc/htseq:0.11.2"
cpus = 1
}
}
}
}
/*
* RNAseq Analysis pipeline
*/
params.input = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
Channel
.fromFilePairs(params.input)
.ifEmpty { error "Cannot find any file matching: ${params.input}" }
.set {input_channel}
/* Trimming by quality */
process trimming {
tag "$file_id"
cpus 4
publishDir "results/RNAseq/01_cutadapt/", mode: 'copy'
echo true
input:
set file_id, file(reads) from input_channel
output:
set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
file "*.txt" into rapport_UrQt
script:
"""
cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \
-o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_tmp_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
cutadapt -u -14 -o ${pair_id}_cut_R2.fastq.gz ${pair_id}_tmp_R2.fastq.gz \
> ${pair_id}_cut_report.txt
"""
}
/* rRNA and tRNA filtering */
params.indexrRNA = "/Xnfs/lbmcdb/Ricci_team/shared_data/genomes/human_rRNA_tRNA/*.bt2"
log.info "index files : ${params.indexrRNA}"
Channel
.fromPath( params.indexrRNA )
.ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
.set { rRNA_index_files }
process rRNA_removal {
tag "$file_id"
cpus 8
publishDir "results/RNAseq/U937/02_rRNA_depletion/", mode: 'copy'
input:
set file_id, file(reads) from fastq_files_cut
file index from rRNA_index_files.toList()
output:
set file_id, "*.fastq.gz" into rRNA_removed_files
file "*.txt" into bowtie_report
script:
"""
bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
-1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
${file_id}_bowtie2_report.txt > /dev/null
if grep -q "Error " ${file_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* mapping against human genome with hisat2 */
params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
log.info "index : ${params.index_hg38}"
Channel
.fromPath ( params.index_hg38 )
.ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
.set { index_file_hg38 }--paired
process hisat2_human {
tag "$file_id"
input:
set file_id, file(fastq_filtred) from rRNA_removed_reads
file index from index_file_hg38.toList()
output:
set file_id, "*.fastq.gz" into reads_non_aligned_hg38
set file_id, "*.bam" into reads_aligned_hg38
file "*.txt" into hisat_report
script:
"""
hisat2 -x genome_tran -p ${task.cpus} \
-1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_hg38_R%.fastq.gz \
--rna-strandness 'F' \
2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
"""
}
/* sorting */
process index_bam {
tag "$file_id"
publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy'
input:
set file_id, file(bam) from reads_aligned_hg38
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
"""
}
sorted_bam_files.into{for_dedup;for_htseq}
/* deduplicating reads
params.dedup_options = "--paired"
process dedup {
tag "$file_id"
input:
set file_id, file(bam) from for_dedup
output:
set file_id, "*dedup.bam" into dedup_bam
file "*.txt" into dedup_report
script:
"""
umi_tools dedup -I ${bam[0]} \
${params.dedup_options} \
-S ${file_id}_dedup.bam > report.txt
"""
}
process sort_bam {
tag "$file_id"
publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy'
input:
set file_id, file(bam) from dedup_bam
file dedup from dedup_report
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2
file "*.txt" into report_dedup
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
cat ${dedup} > ${file_id}_dedup_report.txt
"""
} */
/* HTseq */
params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process counting {
tag "$file_id"
publishDir "${params.output}/04_HTseq/", mode: 'copy'
input:
set file_id, file(bam) from for_htseq
file gtf from gtf_file.toList()
output:
file "*.count" into count_files
script:
"""
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t CDS \
-i gene_id \
-r pos \
-f bam \
> ${file_id}_CDS.count
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t exon \
-i gene_id \
-r pos \
-f bam \
> ${file_id}_exon.count
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment