Skip to content
Snippets Groups Projects
Commit bd9fc9ee authored by elabaron's avatar elabaron
Browse files

fix beug in RNAseq pipeline

parent 65202d59
Branches
Tags
No related merge requests found
...@@ -41,16 +41,6 @@ profiles { ...@@ -41,16 +41,6 @@ profiles {
time = "12h" time = "12h"
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: index_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/hisat2_2.1.0"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
}
withName: counting { withName: counting {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "htseq/0.11.2" module = "htseq/0.11.2"
...@@ -83,14 +73,6 @@ profiles { ...@@ -83,14 +73,6 @@ profiles {
container = "lbmc/samtools:1.7" container = "lbmc/samtools:1.7"
cpus = 1 cpus = 1
} }
withName: index_bam {
container = "lbmc/samtools:1.7"
cpus = 1
}
withName: dedup {
container = "lbmc/umi_tools:1.0.0"
cpus = 1
}
withName: counting { withName: counting {
container = "lbmc/htseq:0.11.2" container = "lbmc/htseq:0.11.2"
cpus = 1 cpus = 1
......
...@@ -94,14 +94,16 @@ Channel ...@@ -94,14 +94,16 @@ Channel
process hisat2_human { process hisat2_human {
tag "$file_id" tag "$file_id"
publishDir "${params.output}/03_hisat2/", mode: 'copy'
errorStrategy 'finish'
input: input:
set file_id, file(fastq_filtred) from rRNA_removed_files set file_id, file(fastq_filtred) from rRNA_removed_files
file index from index_file_hg38.toList() file index from index_file_hg38.toList()
output: output:
set file_id, "*.fastq.gz" into reads_non_aligned_hg38 file "*.fastq.gz" into reads_non_aligned_hg38
set file_id, "*.bam" into reads_aligned_hg38 set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
file "*.txt" into hisat_report file "*.txt" into hisat_report
script: script:
...@@ -115,51 +117,33 @@ process hisat2_human { ...@@ -115,51 +117,33 @@ process hisat2_human {
hisat2 -x ${index_id} -p ${task.cpus} \ hisat2 -x ${index_id} -p ${task.cpus} \
-1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \ -1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \ --un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
--rna-strandness 'F' \ --rna-strandness 'F' 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
"""
}
reads_aligned_hg38.into{for_mapping;for_htseq}
/* sorting */
process index_bam {
tag "$file_id"
publishDir "${params.output}/03_hisat2/", mode: 'copy'
input: if grep -q "ERR" ${file_id}_hisat2_hg38.txt; then
set file_id, file(bam) from for_mapping exit 1
file report from hisat_report fi
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
file "*.txt" into report_hisat
script: samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${file_id}.bam
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam samtools index ${file_id}_sorted.bam
cat ${report} > ${file_id}_hisat_hg38.txt
""" """
} }
/* HTseq */ /* HTseq */
process sort_bam { process sort_bam {
tag "$file_id" tag "$file_id"
input: input:
set file_id, file(bam) from for_htseq set file_id, file(bam) from sorted_bam_files
output: output:
set file_id, "*_sorted.bam" into sorted_bam_files_2 set file_id, "*_htseq.bam" into sorted_bam_files_2
script: script:
""" """
samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_sorted.bam ${bam} samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
""" """
} }
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
params.fasta = "data/genome/NC001802.1.fa" params.fasta = "data/genome/NC001802.1.fa"
log.info "fasta files : ${params.fasta}" log.info "fasta files : ${params.fasta}"
params.output = "results"
log.info "saving file into ${params.output}"
Channel Channel
.fromPath( params.fasta ) .fromPath( params.fasta )
...@@ -122,15 +124,15 @@ for (index_file in index) { ...@@ -122,15 +124,15 @@ for (index_file in index) {
} }
} }
""" """
bowtie2 --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ bowtie2 --sensitive --fr -p ${task.cpus} -x ${index_id} \
-1 ${reads[0]} -2 ${reads[1]} 2> \ -1 ${reads[0]} -2 ${reads[1]} 2> \
${file_id}_bowtie2_report_tmp.txt | \ ${file_id}_bowtie2_report_tmp.txt | \
samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam
if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then if grep -q "Error" ${file_id}_bowtie2_report_tmp.txt; then
exit 1 exit 1
fi fi
tail -n 19 ${file_id}_bowtie_report_tmp.txt > ${file_id}_bowtie_report.txt tail -n 19 ${file_id}_bowtie2_report_tmp.txt > ${file_id}_bowtie2_report.txt
""" """
} }
...@@ -144,7 +146,7 @@ bam_bowtie.join(bam_hisat) ...@@ -144,7 +146,7 @@ bam_bowtie.join(bam_hisat)
//merged_bam.println() //merged_bam.println()
process merge_bam{ process merge_bam{
publishDir "results/05_${index_id}_mergedBAM/", mode: 'copy' publishDir "${params.output}/05_${index_id}_mergedBAM/", mode: 'copy'
input: input:
set file_id, index_id, file(bam_bowtie), file(bam_hisat) from merged_bam set file_id, index_id, file(bam_bowtie), file(bam_hisat) from merged_bam
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment