Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
* RSEM :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
params.annotation = "$baseDir/data/bam/*.gff3"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
Channel
.fromPath( params.annotation )
.ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
.set { annotation_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
file annotation from annotation_file
output:
file "*.index*" into index_files
script:
def cmd_annotation = "--gff3 ${annotation}"
if(annotation ==~ /.*\.gtf$/){
cmd_annotation = "--gtf ${annotation}"
}
"""
rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
${fasta.baseName}_rsem_bowtie2_report.txt
"""
}
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
file index from index_files.toList()
output:
file "*" into counts_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt
"""
}
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
params.mean = 300
params.sd = 100
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
log.info "mean read size: ${params.mean}"
log.info "sd read size: ${params.sd}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
file reads from fastq_files
file index from index_files.toList()
output:
file "*" into count_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt
"""
}