Skip to content
Snippets Groups Projects
Commit 5c3896e2 authored by elabaron's avatar elabaron
Browse files

src/RibosomeProfiling.nf : genome mapping with star

parent a6974060
Branches
Tags v0.3.0
No related merge requests found
......@@ -211,9 +211,13 @@ profiles {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 8
}
withName: index_genome {
container = "lbmc/star:2.7.3a"
cpus = 16
}
withName: hisat2_genome {
container = "lbmc/star:2.7.3a"
cpus = 8
container = "lbmc/hisat2:2.1.0"
}
withName: fastqc_genome {
container = "lbmc/fastqc:0.11.5"
......
......@@ -14,6 +14,7 @@ params.fastq_raw = "data/fastq/*.fastq.gz"
params.output = "results"
params.filter = "data/filter/human_rRNA_tRNA/*.bt2"
params.index_genome = "data/genome/*.ht2"
params.fasta_genome = "data/genome/*.fasta"
params.gtf = "data/annotation/*.gtf"
params.gtf_collapse = "data/annotation/*.gtf"
params.index_postgenome = "data/post_genome/*.ht2"
......@@ -41,7 +42,7 @@ params.strand = "F"
log.info "input raw : ${params.fastq_raw}"
log.info "outut directory : ${params.output}"
log.info "filter index files : ${params.filter}"
log.info "genome index : ${params.index_genome}"
log.info "genome fasta : ${params.fasta_genome}"
log.info "gtf file : ${params.gtf}"
log.info "collapsed gtf file for rnaseqc: ${params.gtf_collapse}"
log.info "post-genome index : ${params.index_postgenome}"
......@@ -72,13 +73,14 @@ Channel
Channel
.fromPath ( params.index_genome )
.ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" }
.set { GENOME_INDEX }
.ifEmpty { error "Cannot find any index files matching: ${params.fasta_genome}" }
.set { INDEX_GENOME }
Channel
.fromPath( params.gtf )
.ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
.set { GTF_FILE }
.into { GTF_FILE ;
GTF_INDEXING}
Channel
.fromPath( params.gtf_collapse )
......@@ -248,46 +250,58 @@ process fastqc_filter {
///////////////////////////////////////////////
/* mapping against human genome with hisat2 */
///////////////////////////////////////////////
/*process index_genome {
tag "$fasta.baseName"
publishDir "results/star_index/", mode: 'copy'
input:
file fasta from FASTA_GENOME
file annotation from GTF_INDEXING
output:
file "*" into INDEX_FILES
script:
"""
STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
--genomeDir ./ \
--genomeFastaFiles ${fasta} \
--sjdbGTFfile ${annotation} \
--genomeSAindexNbases 14 # min(14, log2(GenomeLength)/2 - 1)
"""
}*/
process hisat2_genome {
tag "$file_id"
publishDir "${params.output}/03_hisat2/", mode: 'copy'
input:
set file_id, file(fastq_filtred) from FILTER_FASTQ_HISAT
file index from GENOME_INDEX.toList()
set file_id, file(reads) from FILTER_FASTQ_HISAT
file index from INDEX_GENOME.collect()
output:
set file_id, "*_notaligned.fastq.gz" into HISAT_UNALIGNED
set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED
file "*.txt" into HISAT_LOG
set file_id, "*Aligned.sortedByCoord.out.bam" into HISAT_ALIGNED
file "*.out" into HISAT_LOG
set file_id, "*Aligned.toTranscriptome.out.bam" into ALIGNED_TRANSCRIPTOME
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
}
}
"""
hisat2 -x ${index_id} \
-p ${task.cpus} \
-U ${fastq_filtred[0]} \
--un-gz ${file_id}_notaligned.fastq.gz \
--rna-strandness ${params.strand} \
--dta \
--no-softclip \
--trim5 1\
--trim3 1\
2> ${file_id}_genome.txt \
| samtools view -bS -F 4 - \
| samtools sort -@ ${task.cpus} -o ${file_id}_genome.bam \
&& samtools index ${file_id}_genome.bam
if grep -q "ERR" ${file_id}.txt; then
exit 1
fi
"""
"""
mkdir -p index
mv ${index} index/
STAR --outFilterType BySJout \
--runThreadN ${task.cpus} \
--outFilterMismatchNmax 2 \
--genomeDir index\
--readFilesIn ${reads} \
--readFilesCommand zcat \
--outFileNamePrefix ${file_id} \
--outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM GeneCounts \
--outFilterMultimapNmax 1 \
--outFilterMatchNmin 16 \
--alignEndsType EndToEnd
"""
}
HISAT_ALIGNED.into{HISAT_ALIGNED_FASTQC;
......@@ -504,7 +518,7 @@ if (params.do_dedup){
when:
params.do_dedup
shell:
'''
samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
......@@ -594,7 +608,7 @@ if (params.do_postgenome){
echo "total counts : $total" >> !{file_id}.txt
echo "scaling factor : !{factor}" >> !{file_id}.txt
if [ !{genome} -gt 0 ]
then
then
bamCoverage -p !{task.cpus} --binSize 1 --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
fi
if [ !{postgenome} -gt 0 ]
......@@ -642,7 +656,7 @@ if (params.do_postgenome){
echo "genome aligment : !{genome} counts" >> !{file_id}.txt
echo "scaling factor : !{factor}" >> !{file_id}.txt
if [ !{genome} -gt 0 ]
then
then
bamCoverage -p !{task.cpus} --binSize 1 --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
fi
'''
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment