Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
* RSEM :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
params.annotation = "$baseDir/data/bam/*.gff3"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
Channel
.fromPath( params.annotation )
.ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
.set { annotation_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
file annotation from annotation_file
output:
file "*.index*" into index_files
script:
def cmd_annotation = "--gff3 ${annotation}"
if(annotation ==~ /.*\.gtf$/){
cmd_annotation = "--gtf ${annotation}"
}
"""
rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
${fasta.baseName}_rsem_bowtie2_report.txt
"""
}
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
output:
file "*" into counts_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt
"""
}
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
params.mean = 300
params.sd = 100
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
log.info "mean read size: ${params.mean}"
log.info "sd read size: ${params.sd}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
file reads from fastq_files
output:
file "*" into count_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt
"""
}