Skip to content
Snippets Groups Projects
Commit ba76a948 authored by elabaron's avatar elabaron
Browse files

add RNAseq worflow for illumina libraries

parent 12b1d9fb
No related branches found
No related tags found
No related merge requests found
/*
* RNAseq Analysis pipeline
*/
params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
params.output = "results"
Channel
.fromFilePairs(params.fastq_raw)
.ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" }
.set {fastq_raw_channel}
/* Trimming by quality */
process trimming {
tag "$file_id"
cpus 4
publishDir "${params.output}/01_cutadapt/", mode: 'copy'
echo true
input:
set file_id, file(reads) from fastq_raw_channel
output:
set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
file "*.txt" into rapport_UrQt
script:
"""
cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
--minimum-length 50 \
-o ${file_id}_cut_R1.fastq.gz -p ${file_id}_cut_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${file_id}_report.txt
"""
}
/* rRNA and tRNA filtering */
params.indexrRNA = "/Xnfs/lbmcdb/Ricci_team/shared_data/genomes/human_rRNA_tRNA/*.bt2"
log.info "index files : ${params.indexrRNA}"
Channel
.fromPath( params.indexrRNA )
.ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
.set { rRNA_index_files }
process rRNA_removal {
tag "$file_id"
cpus 8
publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
input:
set file_id, file(reads) from fastq_files_cut
file index from rRNA_index_files.toList()
output:
set file_id, "*.fastq.gz" into rRNA_removed_files
file "*.txt" into bowtie_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
"""
bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
${file_id}_bowtie2_report.txt > /dev/null
if grep -q "Error " ${file_id}_bowtie2_report.txt; then
exit 1
fi
"""
}
/* mapping against human genome with hisat2 */
params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
log.info "index : ${params.index_hg38}"
Channel
.fromPath ( params.index_hg38 )
.ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" }
.set { index_file_hg38 }
process hisat2_human {
tag "$file_id"
publishDir "${params.output}/03_hisat2/", mode: 'copy'
errorStrategy 'finish'
input:
set file_id, file(fastq_filtred) from rRNA_removed_files
file index from index_file_hg38.toList()
output:
file "*.fastq.gz" into reads_non_aligned_hg38
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
file "*.txt" into hisat_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
}
}
"""
hisat2 -x ${index_id} -p ${task.cpus} \
-1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
--rna-strandness 'F' 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
if grep -q "ERR" ${file_id}_hisat2_hg38.txt; then
exit 1
fi
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${file_id}.bam
samtools index ${file_id}_sorted.bam
"""
}
/* HTseq */
process sort_bam {
tag "$file_id"
input:
set file_id, file(bam) from sorted_bam_files
output:
set file_id, "*_htseq.bam" into sorted_bam_files_2
script:
"""
samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
"""
}
params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}"
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { gtf_file }
process counting {
tag "$file_id"
publishDir "${params.output}/04_HTseq/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_files_2
file gtf from gtf_file.toList()
output:
file "*.count" into count_files
script:
"""
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t CDS \
-i gene_id \
-f bam \
> ${file_id}_CDS.count
htseq-count ${bam[0]} ${gtf} \
--mode=intersection-nonempty \
-a 10 \
-s yes \
-t exon \
-i gene_id \
-f bam \
> ${file_id}_exon.count
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment