Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/*
* cutadapt :
* Imputs : fastq files
* Output : fastq files
*/
/* Illumina adaptor removal */
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
log.info "fastq files : ${params.fastq}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
process adaptor_removal {
tag "$pair_id"
publishDir "results/fastq/adaptor_removal/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
output:
file "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
script:
"""
cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
-o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${pair_id}_report.txt
"""
}
/*
* urqt :
* Imputs : fastq files
* Output : fastq files
*/
/* quality trimming */
/*
* for paired-end data
*/
process trimming {
tag "${reads}"
cpus 4
publishDir "results/fastq/trimming/", mode: 'copy'
input:
output:
file "*_trim_R{1,2}.fastq.gz" into fastq_files_trim
script:
"""
UrQt --t 20 --m ${task.cpus} --gz \
--in ${reads[0]} --inpair ${reads[1]} \
--out ${reads[0].baseName}_trim_R1.fastq.gz --outpair ${reads[1].baseName}_trim_R2.fastq.gz \
> ${reads[0].baseName}_trimming_report.txt
"""
}
/*
* bedtools :
* Imputs : fastq files
* Output : fastq files
*/
/* fasta extraction */
params.fasta = "$baseDir/data/fasta/*.fasta"
params.bed = "$baseDir/data/annot/*.bed"
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
log.info "fasta file : ${params.fasta}"
log.info "bed file : ${params.bed}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_files }
Channel
.fromPath( params.bed )
.ifEmpty { error "Cannot find any bed files matching: ${params.bed}" }
.set { bed_files }
process fasta_from_bed {
tag "${bed.baseName}"
cpus 4
publishDir "results/fasta/", mode: 'copy'
input:
file fasta from fasta_files
file bed from bed_files
output:
file "*_extracted.fasta" into fasta_files_extracted
script:
"""
bedtools getfasta -name \
-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta
"""
}
/*
* Kallisto :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
process index_fasta {
tag "$fasta.baseName"
publishDir "results/mapping/index/", mode: 'copy'
input:
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
output:
file "*.index*" into index_files
script:
"""
kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \
> ${fasta.baseName}_kallisto_report.txt
"""
}
/*
* for paired-end data
*/
process mapping_fastq {
tag "$reads"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
file reads from fastq_files_trim
file index from index_files.toList()
output:
file "*" into counts_files
script:
"""
mkdir ${reads[0].baseName}
kallisto quant -i ${index} -t ${task.cpus} \
--bias --bootstrap-samples 100 -o ${reads[0].baseName} \
${reads[0]} ${reads[1]} &> ${reads[0].baseName}_kallisto_report.txt
"""
}