diff --git a/src/1_JU28_59vs17_SNP_calling.sh b/src/1_JU28_59vs17_SNP_calling.sh index 5937da14f2e2884dd3ab9789abc23698eca3bb73..be184a5d28a761426da572ccad00f2a05a7b6634 100644 --- a/src/1_JU28_59vs17_SNP_calling.sh +++ b/src/1_JU28_59vs17_SNP_calling.sh @@ -11,7 +11,7 @@ cd ~/projects/JU28_59vs17_SNP/ # training set analysis -./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC-output2.fasta" --fastq "data/samples/*_{1,2}.fastq.gz" -resume -w ~/data/work/ --tumor "[\"NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"MR_550_clean\", \"MR_350_clean\"]" +./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC-output2.fasta" --fastq "data/samples/*_{1,2}.fastq.gz" -resume -w ~/data/work_s/ --tumor "[\"s_NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"s_MR_550_clean\", \"s_MR_350_clean\"]" ~/scripts/sms.sh "SNP done" # real set analysis diff --git a/src/SNP_calling.config b/src/SNP_calling.config index 4b693893951517f76dd13ab6bc51448a8575c488..24d288442ac8b9c32b4b06bdca044a2bf80623d5 100644 --- a/src/SNP_calling.config +++ b/src/SNP_calling.config @@ -10,10 +10,10 @@ profiles { container = "urqt:d62c1f8" } withName: index_fasta { - container = "bwa:0.7.17" + container = "bowtie2:2.3.4.1" } withName: mapping_fastq { - container = "bwa:0.7.17" + container = "bowtie2:2.3.4.1" } withName: merge_bam { container = "sambamba:0.6.7" @@ -21,9 +21,6 @@ profiles { withName: sort_bam { container = "sambamba:0.6.7" } - withName: name_fasta { - container = "samtools:1.7" - } withName: index_bam { container = "sambamba:0.6.7" } diff --git a/src/SNP_calling.nf b/src/SNP_calling.nf index 56f0568b08dda9aa69343b7c98a2dc170bcf5f1a..3d040bfced133f1ef6c794a20e4e805e369d3604 100644 --- a/src/SNP_calling.nf +++ b/src/SNP_calling.nf @@ -62,62 +62,56 @@ UrQt --t 20 --m ${task.cpus} --gz \ } process index_fasta { - tag "$fasta_id" + tag "$file_id" cpus 4 publishDir "results/mapping/index/", mode: 'copy' input: - set fasta_id, file(fasta) from fasta_file + set file_id, file(fasta) from fasta_file output: - set fasta_id, "${fasta.baseName}.*" into index_files - file "*_bwa_report.txt" into index_files_report + file "*.index*" into index_files + file "*_report.txt" into indexing_report script: """ -bwa index -p ${fasta_id} ${fasta} \ -&> ${fasta.baseName}_bwa_report.txt -""" -} - +bowtie2-build --threads ${task.cpus} ${fasta} ${file_id}.index &> ${file_id}_bowtie2_report.txt -fastq_files_trim.into { - fastq_files_trim_norm; - fastq_files_trim_tumor +if grep -q "Error" ${file_id}_bowtie2_report.txt; then + exit 1 +fi +""" } -collect_fastq_files_trim_norm = fastq_files_trim_norm - .filter{ normal_sample.contains(it[0]) } - .map { it -> ["normal_sample", it[0], it[1]]} - -collect_fastq_files_trim_tumor = fastq_files_trim_tumor - .filter{ tumor_sample.contains(it[0]) } - .map { it -> ["tumor_sample", it[0], it[1]]} - -collect_fastq_files_trim = Channel.create() - .mix(collect_fastq_files_trim_norm, collect_fastq_files_trim_tumor) - process mapping_fastq { tag "$pair_id" - cpus 6 - publishDir "results/mapping/bam/", mode: 'copy' + cpus 4 + publishDir "results/mapping/bams/", mode: 'copy' input: - set sample_name, pair_id, file(reads) from collect_fastq_files_trim - set index_id, file(index) from index_files.collect() + set pair_id, file(reads) from fastq_files_trim + file index from index_files.collect() output: - set pair_id, "${pair_id}.bam" into bam_files - file "${pair_id}_bwa_report.txt" into mapping_repport_files + set pair_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ -bwa mem -t ${task.cpus} -M \ --R '@RG\\tID:${sample_name}\\tSM:${sample_name}\\tPL:Illumina' \ -${index_id} ${reads[0]} ${reads[1]} | \ -samblaster --addMateTags -M -i /dev/stdin | \ -sambamba view -t ${task.cpus} --valid -S -f bam -l 0 /dev/stdin -o ${pair_id}.bam \ -2> ${pair_id}_bwa_report.txt +bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ +-1 ${reads[0]} -2 ${reads[1]} 2> \ +${pair_id}_bowtie2_report.txt | \ +samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_report.txt; then + exit 1 +fi """ } @@ -192,7 +186,7 @@ process index_bam { set file_id, file(bam) from index_merged_bam_files output: - set file_id, "*.bam*" into index_bam_files + set file_id, "*.bam.bai" into index_bam_files script: """