From 315a6d3a0b9a2bc363c5912cd7fa9d5e903abe80 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Tue, 16 Oct 2018 15:19:06 +0200
Subject: [PATCH] rm SNP_calling scripts

---
 src/1_JU28_59vs17_SNP_calling.sh |  58 -----
 src/SNP_calling.config           | 187 --------------
 src/SNP_calling.nf               | 417 -------------------------------
 src/intersect_SNP.R              | 100 --------
 4 files changed, 762 deletions(-)
 delete mode 100644 src/1_JU28_59vs17_SNP_calling.sh
 delete mode 100644 src/SNP_calling.config
 delete mode 100644 src/SNP_calling.nf
 delete mode 100755 src/intersect_SNP.R

diff --git a/src/1_JU28_59vs17_SNP_calling.sh b/src/1_JU28_59vs17_SNP_calling.sh
deleted file mode 100644
index b31f6c51..00000000
--- a/src/1_JU28_59vs17_SNP_calling.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-# generate training set
-ll *.gz | sed 's/.gz//g' | awk '{system("gunzip -c "$9".gz > ~/data/JU28_59vs17_SNP/data/samples/"$9)}'
-cd ~/data/JU28_59vs17_SNP/data/samples/
-~/scripts/fastq_sampler/fastq_sampler.py -i 100000 -f MR_350_clean_1.fastq -g MR_350_clean_2.fastq
-~/scripts/fastq_sampler/fastq_sampler.py -i 100000 -f MR_550_clean_1.fastq -g MR_550_clean_2.fastq
-~/scripts/fastq_sampler/fastq_sampler.py -i 100000 -f NG-10944_JU2859_bis_lib169352_5217_1_1.fastq -g NG-10944_JU2859_bis_lib169352_5217_1_2.fastq
-ll s_*.fastq | awk '{system("gzip < "$9" > "$9".gz")}'
-cd ~/projects/JU28_59vs17_SNP/
-
-# training set analysis
-
-mkdir tests
-cd tests
-../nextflow ../src/SNP_calling.nf -c ../src/SNP_calling.config -profile docker --fasta "../data/fasta/DBG2OLC_output2.fasta" --fastq "../data/samples/*_{1,2}.fastq.gz" -resume -w ~/data/work_s/ --tumor "[\"s_NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"s_MR_550_clean\", \"s_MR_350_clean\"]" --seq_number 800000
-~/scripts/sms.sh "SNP done"
-
-# real set analysis
-
-./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC_output2.fasta" --fastq "data/fastq/*_{1,2}.fastq.gz" -resume -w ~/data/work/ --tumor "[\"NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"MR_550_clean\", \"MR_350_clean\"]"
-~/scripts/sms.sh "SNP done"
-src/intersect_SNP.R \
-  results/SNP/vcf_samtools/normal_sample_filtered.csv \
-  results/SNP/vcf_samtools/tumor_sample_filtered.csv \
-  results/fasta/DBG2OLC_output2_filtered.fasta \
-  data/list_of_enzymes.csv
-~/scripts/sms.sh "SNP analysis done"
-
-./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/final_assembly.fasta" --fastq "data/fastq/*_{1,2}.fastq.gz" -resume -w ~/data/work/ --tumor "[\"NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"MR_550_clean\", \"MR_350_clean\"]"
-~/scripts/sms.sh "SNP done"
-src/intersect_SNP.R \
-  results/SNP/vcf_samtools/normal_sample_filtered.csv \
-  results/SNP/vcf_samtools/tumor_sample_filtered.csv \
-  results/fasta/final_assembly_filtered.fasta \
-  data/list_of_enzymes.csv
-~/scripts/sms.sh "SNP analysis done"
-
-
-# on the PSMN
-find ~/data/ -name "MR_350_clean*"
-find ~/data/ -name "MR_550_clean*"
-find ~/data/ -name "10944_JU2859_bis_lib169352_5217_1*"
-
-./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile sge --fasta "data/fasta/final_assembly.fasta" --fastq "/Xnfs/lbmcdb/Delattre_team/Request/Clean_data_for_assembly/*_{1,2}.fastq.gz" -resume -w /scratch/lmodolo/work/ --tumor "[\"NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"MR_550_clean\", \"MR_350_clean\"]"
-
-mkdir -p results/blastall/
-makeblastdb -in data/fasta/DBG2OLC_output2.fasta -parse_seqids -dbtype nucl
-blastn -query data/RNA5S_belari.fasta -db data/fasta/DBG2OLC_output2.fasta -out results/blastall/RNA5S_2.out
-less results/blastall/RNA5S_2.out
-
-makeblastdb -in data/fasta/DBG2OLC_output1.fasta -parse_seqids -dbtype nucl
-blastn -query data/RNA5S_belari.fasta -db data/fasta/DBG2OLC_output1.fasta -out results/blastall/RNA5S_1.out
-less results/blastall/RNA5S_1.out
-
-makeblastdb -in data/fasta/nanoport_denovo.fasta -parse_seqids -dbtype nucl
-blastn -query data/RNA5S_belari.fasta -db data/fasta/nanoport_denovo.fasta -out results/blastall/RNA5S_nanoport.out
-less results/blastall/RNA5S_nanoport.out
diff --git a/src/SNP_calling.config b/src/SNP_calling.config
deleted file mode 100644
index 1ca9ac2c..00000000
--- a/src/SNP_calling.config
+++ /dev/null
@@ -1,187 +0,0 @@
-profiles {
-  docker {
-    docker.temp = 'auto'
-    docker.enabled = true
-    process {
-      withName: adaptor_removal {
-        container = "cutadapt:1.14"
-      }
-      withName: trimming {
-        container = "urqt:d62c1f8"
-      }
-      withName: filter_fasta {
-        container = "bioawk:1.0"
-      }
-      withName: index_fasta {
-        container = "bowtie2:2.3.4.1"
-      }
-      withName: mapping_fastq {
-        container = "bowtie2:2.3.4.1"
-      }
-      withName: merge_bam {
-        container = "sambamba:0.6.7"
-      }
-      withName: sort_bam {
-        container = "sambamba:0.6.7"
-      }
-      withName: index_bam {
-        container = "sambamba:0.6.7"
-      }
-      withName: index2_fasta {
-        container = "gatk:4.0.8.1"
-      }
-      withName: index3_fasta {
-        container = "samtools:1.7"
-      }
-      withName: samtools_SNP_tumor {
-        container = "bcftools:1.7"
-      }
-      withName: samtools_SNP_norm {
-        container = "bcftools:1.7"
-      }
-      withName: vcf_to_csv_tumor {
-        container = "gatk:4.0.8.1"
-      }
-      withName: vcf_to_csv_norm {
-        container = "gatk:4.0.8.1"
-      }
-    }
-  }
-  sge {
-    process{
-      queueSize = 1000
-      pollInterval = '60sec'
-      withName: adaptor_removal {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "cutadapt/1.14"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: trimming {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "UrQt/d62c1f8"
-        executor = "sge"
-        cpus = 16
-        memory = "5GB"
-        time = "12h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-      withName: filter_fasta {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "bioawk/1.0"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: index_fasta {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "Bowtie2/2.3.4.1:sambamba/0.6.7:samblaster/0.1.24"
-        executor = "sge"
-        cpus = 1
-        memory = "30GB"
-        time = "6h"
-        queue = 'monointeldeb128'
-      }
-      withName: mapping_fastq {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "Bowtie2/2.3.4.1:sambamba/0.6.7:samblaster/0.1.24"
-        executor = "sge"
-        cpus = 16
-        memory = "30GB"
-        time = "12h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-      withName: merge_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        cpus = 16
-        memory = "30GB"
-        time = "12h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-      withName: sort_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        cpus = 16
-        memory = "30GB"
-        time = "12h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-      withName: index_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        cpus = 16
-        memory = "30GB"
-        time = "12h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-      withName: index2_fasta {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "GATK/4.0.10.1"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: index3_fasta {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "SAMtools/1.7"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: samtools_SNP_tumor {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "BCFtools/1.7"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: samtools_SNP_norm {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "BCFtools/1.7"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: vcf_to_csv_tumor {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "GATK/4.0.10.1"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-      withName: vcf_to_csv_norm {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "GATK/4.0.10.1"
-        executor = "sge"
-        cpus = 1
-        memory = "20GB"
-        time = "12h"
-        queue = 'monointeldeb128'
-      }
-    }
-  }
-}
diff --git a/src/SNP_calling.nf b/src/SNP_calling.nf
deleted file mode 100644
index 096afb9f..00000000
--- a/src/SNP_calling.nf
+++ /dev/null
@@ -1,417 +0,0 @@
-params.fastq = "$baseDir/data/*.fastq"
-params.fasta = "$baseDir/data/*.fasta"
-params.seq_length = 800000
-log.info "fastq files : ${params.fastq}"
-log.info "fasta files : ${params.fasta}"
-log.info "fasta length to retain : ${params.seq_length}"
-def normal_sample = Eval.me(params.normal)
-def tumor_sample = Eval.me(params.tumor)
-log.info "normal : ${normal_sample}"
-log.info "tumor : ${tumor_sample}"
-
-Channel
-  .fromPath( params.fasta )
-  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { fasta_file  }
-Channel
-  .fromFilePairs( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .set { fastq_files }
-
-process adaptor_removal {
-  tag "$pair_id"
-  publishDir "results/fastq/adaptor_removal/", mode: 'copy'
-
-  input:
-  set pair_id, file(reads) from fastq_files
-
-  output:
-  set pair_id, file("*.fastq.gz") into fastq_files_cut
-  file "*_cutadapt_report.txt" into cut_files_report
-
-  script:
-"""
-cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
--o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
-${reads[0]} ${reads[1]} > ${pair_id}_cutadapt_report.txt
-"""
-}
-
-process trimming {
-  tag "${pair_id}"
-  cpus 4
-  publishDir "results/fastq/trimming/", mode: 'copy'
-
-  input:
-  set pair_id, file(reads) from fastq_files_cut
-
-  output:
-  set pair_id, file("*.fastq.gz") into fastq_files_trim
-  file "*_trimming_report.txt" into trimming_files_report
-
-  script:
-"""
-UrQt --t 20 --m ${task.cpus} --gz \
---in ${reads[0]} --inpair ${reads[1]} \
---out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
-> ${pair_id}_trimming_report.txt
-"""
-}
-
-process filter_fasta {
-  tag "$fasta_id"
-  cpus 4
-  publishDir "results/fasta/", mode: 'copy'
-
-  input:
-    set fasta_id, file(fasta) from fasta_file
-
-  output:
-    set fasta_idf, "*_filtered.fasta" into filter_fasta_files
-
-  script:
-    fasta_idf = "${fasta_id}_filtered"
-"""
-bioawk -c fastx '{ if(length(\$seq) > $params.seq_length) { print ">"\$name; print \$seq }}' ${fasta} > \
-${fasta_id}_filtered.fasta
-"""
-}
-
-filter_fasta_files.into{
-  filtered_fasta_files;
-  indel_fasta_file;
-  recalibration_fasta_file;
-  haplotypecaller_fasta_file
-}
-
-process index_fasta {
-  tag "$file_id"
-  cpus 12
-  publishDir "results/mapping/index/", mode: 'copy'
-
-  input:
-    set file_id, file(fasta) from filtered_fasta_files
-
-  output:
-    file "*.index*" into index_files
-    file "*_report.txt" into indexing_report
-
-  script:
-"""
-bowtie2-build --threads ${task.cpus} ${fasta} ${file_id}.index &> ${file_id}_bowtie2_report.txt
-
-if grep -q "Error" ${file_id}_bowtie2_report.txt; then
-  exit 1
-fi
-"""
-}
-
-fastq_files_trim.into{
-  fastq_files_trim_norm;
-  fastq_files_trim_tumor
-}
-
-collect_fastq_files_trim_norm = fastq_files_trim_norm
-  .filter{ normal_sample.contains(it[0]) }
-  .map { it -> ["normal_sample", it[0], it[1]]}
-
-collect_fastq_files_trim_tumor = fastq_files_trim_tumor
-  .filter{ tumor_sample.contains(it[0]) }
-  .map { it -> ["tumor_sample", it[0], it[1]]}
-
-collect_fastq_files_trim = Channel.create()
-  .mix(collect_fastq_files_trim_norm, collect_fastq_files_trim_tumor)
-
-process mapping_fastq {
-  tag "$pair_id"
-  cpus 12
-  publishDir "results/mapping/bam/", mode: 'copy'
-
-  input:
-  set sample_name, pair_id, file(reads) from collect_fastq_files_trim
-  file index from index_files.collect()
-
-  output:
-  set pair_id, "*.bam" into bam_files
-  file "*_report.txt" into mapping_report
-
-  script:
-  index_id = index[0]
-  for (index_file in index) {
-    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
-        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
-    }
-  }
-"""
-bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
---rg-id ${sample_name} \
---rg PL:Illumina \
---rg SM:${sample_name} \
--1 ${reads[0]} -2 ${reads[1]} 2> \
-${pair_id}_bowtie2_report.txt | \
-samblaster --addMateTags -M -i /dev/stdin | \
-sambamba view -t ${task.cpus} --valid -S -f bam -l 0 /dev/stdin \
--o ${pair_id}.bam
-
-if grep -q "Error" ${pair_id}_bowtie2_report.txt; then
-  exit 1
-fi
-"""
-}
-
-process sort_bam {
-  tag "$file_id"
-  cpus 4
-
-  input:
-    set file_id, file(bam) from bam_files
-
-  output:
-    set file_id, "*_sorted.bam" into sorted_bam_files
-
-  script:
-"""
-sambamba sort -t ${task.cpus} --tmpdir=./tmp -o ${file_id}_sorted.bam ${bam}
-"""
-}
-
-sorted_bam_files.into {
-  sorted_bam_file_norm;
-  sorted_bam_file_tumor;
-}
-
-collect_sorted_bam_file_norm = sorted_bam_file_norm
-  .filter{ normal_sample.contains(it[0]) }
-  .map { it -> it[1]}
-  .buffer( size: normal_sample.size())
-  .map { it -> ["normal_sample", it]}
-collect_sorted_bam_file_tumor = sorted_bam_file_tumor
-  .filter{ tumor_sample.contains(it[0]) }
-  .map { it -> it[1]}
-  .buffer( size: tumor_sample.size())
-  .map { it -> ["tumor_sample", it]}
-
-collect_sorted_bam_file = Channel.create()
-  .mix(collect_sorted_bam_file_norm, collect_sorted_bam_file_tumor)
-
-process merge_bam {
-  tag "$file_id"
-  cpus 4
-  publishDir "results/mapping/bam/", mode: 'copy'
-
-  input:
-    set file_id, file(bam) from collect_sorted_bam_file
-
-  output:
-    set file_id, "*.bam" into merged_bam_files
-
-  script:
-"""
-if ((\$(ls -l *.bam | wc -l) > 1)); then
-sambamba merge -t ${task.cpus} ${file_id}.bam ${bam}
-else
-cp ${bam} ${file_id}.bam
-fi
-"""
-}
-
-merged_bam_files.into{
-  index_merged_bam_files;
-  haplo_bam_files_norm;
-  haplo_bam_files_tumor
-}
-
-process index_bam {
-  tag "$file_id"
-  cpus 4
-  publishDir "results/mapping/bam/", mode: 'copy'
-
-  input:
-    set file_id, file(bam) from index_merged_bam_files
-
-  output:
-    set file_id, "*.bam.bai" into index_bam_files
-
-  script:
-"""
-sambamba index -t ${task.cpus} ${bam}
-"""
-}
-
-index_bam_files.into{
-  named_index_bam_files;
-  indexed_bam_files
-}
-
-haplotypecaller_fasta_file.into{
-    final_fasta_file;
-    index2_fasta_file
-    index3_fasta_file
-  }
-
-process index2_fasta {
-  tag "$genome_id"
-  publishDir "results/fasta/", mode: 'copy'
-
-  input:
-    set genome_id, file(fasta) from index2_fasta_file
-
-  output:
-    set genome_id, "*.dict" into indexed2_fasta_file
-
-  script:
-"""
-gatk CreateSequenceDictionary -R ${fasta} &> gatk_output.txt
-"""
-}
-
-process index3_fasta {
-  tag "$genome_id"
-  publishDir "results/fasta/", mode: 'copy'
-
-  input:
-    set genome_id, file(fasta) from index3_fasta_file
-
-  output:
-    set genome_id, "*.fai" into indexed3_fasta_file
-
-  script:
-"""
-samtools faidx ${fasta}
-"""
-}
-
-final_bam_files_norm = haplo_bam_files_norm
-  .filter{ "normal_sample" == it[0] }
-final_bam_files_tumor = haplo_bam_files_tumor
-  .filter{ "tumor_sample" == it[0] }
-
-indexed_bam_files.into {
-  index_bam_files_norm;
-  index_bam_files_tumor
-}
-final_indexed_bam_files_norm = index_bam_files_norm
-  .filter{ "normal_sample" == it[0] }
-final_indexed_bam_files_tumor = index_bam_files_tumor
-   .filter{ "tumor_sample" == it[0] }
-
-final_bam_files_norm.set{
-  samtools_SNP_bam_files_norm
-}
-final_bam_files_tumor.set{
-  samtools_SNP_bam_files_tumor;
-}
-final_indexed_bam_files_norm.set{
-  samtools_SNP_index_bam_files_norm
-}
-final_indexed_bam_files_tumor.set{
-  samtools_SNP_index_bam_files_tumor;
-}
-final_fasta_file.into{
-  samtools_SNP_fasta_file_tumor;
-  samtools_SNP_fasta_file_norm;
-}
-indexed2_fasta_file.into{
-  samtools_SNP_indexed2_fasta_file_tumor;
-  samtools_SNP_indexed2_fasta_file_norm;
-}
-indexed3_fasta_file.into{
-  samtools_SNP_indexed3_fasta_file_tumor;
-  samtools_SNP_indexed3_fasta_file_norm;
-}
-
-process samtools_SNP_tumor {
-  tag "$file_id_tumor"
-  cpus 1
-  publishDir "results/SNP/vcf_samtools/", mode: 'copy'
-
-  input:
-    set file_id_tumor, file(bam_tumor) from samtools_SNP_bam_files_tumor
-    set file_ididx_tumor, file(bamidx_tumor) from samtools_SNP_index_bam_files_tumor
-    set genome_id, file(fasta) from samtools_SNP_fasta_file_tumor
-    set genome2_idx, file(fasta2idx) from samtools_SNP_indexed2_fasta_file_tumor
-    set genome3_idx, file(fasta3idx) from samtools_SNP_indexed3_fasta_file_tumor
-
-  output:
-    set file_id_tumor, "*.vcf" into vcf_files_tumor
-
-  script:
-"""
-bcftools mpileup -AE -f ${fasta} ${bam_tumor} --output-type v \
--a FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR | \
-bcftools call -mv --output-type v > ${file_id_tumor}_raw.vcf
-bcftools filter -s LowQual -e '%QUAL<20 || DP>100' ${file_id_tumor}_raw.vcf \
-> ${file_id_tumor}_filtered.vcf
-"""
-}
-
-process samtools_SNP_norm {
-  tag "$file_id_norm"
-  cpus 1
-  publishDir "results/SNP/vcf_samtools/", mode: 'copy'
-
-  input:
-    set file_id_norm, file(bam_norm) from samtools_SNP_bam_files_norm
-    set file_ididx_norm, file(bamidx_norm) from samtools_SNP_index_bam_files_norm
-    set genome_id, file(fasta) from samtools_SNP_fasta_file_norm
-    set genome2_idx, file(fasta2idx) from samtools_SNP_indexed2_fasta_file_norm
-    set genome3_idx, file(fasta3idx) from samtools_SNP_indexed3_fasta_file_norm
-
-  output:
-    set file_id_norm, "*.vcf" into vcf_files_norm
-
-  script:
-"""
-bcftools mpileup -AE -f ${fasta} ${bam_norm} --output-type v \
--a FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR | \
-bcftools call -mv --output-type v  > ${file_id_norm}_raw.vcf
-bcftools filter -s LowQual -e '%QUAL<20 || DP>100' ${file_id_norm}_raw.vcf \
-> ${file_id_norm}_filtered.vcf
-"""
-}
-
-process vcf_to_csv_tumor {
-  tag "$file_id_tumor"
-  cpus 1
-  publishDir "results/SNP/vcf_samtools/", mode: 'copy'
-
-  input:
-    set file_id_tumor, file(vcf) from vcf_files_tumor
-
-  output:
-    set file_id_tumor, "*.csv" into csv_files_tumor
-
-  script:
-"""
-gatk VariantsToTable -V ${file_id_tumor}_raw.vcf \
--F CHROM -F POS -F TYPE -GF GT -GF AD -F AD -F DP \
--O ${file_id_tumor}_raw.csv
-gatk VariantsToTable -V ${file_id_tumor}_filtered.vcf \
--F CHROM -F POS -F TYPE -GF GT -GF AD -F AD -F DP \
--O ${file_id_tumor}_filtered.csv
-"""
-}
-
-process vcf_to_csv_norm {
-  tag "$file_id_norm"
-  cpus 1
-  publishDir "results/SNP/vcf_samtools/", mode: 'copy'
-
-  input:
-    set file_id_norm, file(vcf) from vcf_files_norm
-
-  output:
-    set file_id_norm, "*.csv" into csv_files_norm
-
-  script:
-"""
-gatk VariantsToTable -V ${file_id_norm}_raw.vcf \
--F CHROM -F POS -F TYPE -GF GT -GF AD -F AD -F DP \
--O ${file_id_norm}_raw.csv
-gatk VariantsToTable -V ${file_id_norm}_filtered.vcf \
--F CHROM -F POS -F TYPE -GF GT -GF AD -F AD -F DP \
--O ${file_id_norm}_filtered.csv
-"""
-}
-
diff --git a/src/intersect_SNP.R b/src/intersect_SNP.R
deleted file mode 100755
index 50fa2e8c..00000000
--- a/src/intersect_SNP.R
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/Rscript
-rm(list = ls())
-library("tidyverse")
-library("seqinr")
-
-args <- c(
-  "results/SNP/vcf_samtools/normal_sample_filtered.csv",
-  "results/SNP/vcf_samtools/tumor_sample_filtered.csv",
-  "results/fasta/DBG2OLC_output2_filtered.fasta",
-  "data/list_of_enzymes.csv"
-  )
-seq_restric_size <- 21
-
-args <- commandArgs(trailingOnly = TRUE)
-snp_a <- read_delim(args[1], delim = "\t") %>%
-  mutate(cords = paste0(CHROM, POS))
-snp_b <- read_delim(args[2], delim = "\t") %>%
-  mutate(cords = paste0(CHROM, POS))
-
-only_b <- snp_b %>%
-  select(cords) %>%
-  setdiff(snp_a %>% select(cords)) %>%
-  pull(cords)
-
-snp <- snp_b %>%
-  filter(cords %in% only_b) %>%
-  filter(tumor_sample.GT %in% c("A/A", "T/T", "G/G", "C/C")) %>%
-  mutate(REF = do.call(rbind, strsplit(AD, split = ",", fixed = TRUE))[,1],
-         VAR = do.call(rbind, strsplit(AD, split = ",", fixed = TRUE))[,2],
-         REF = as.integer(REF),
-         VAR = as.integer(VAR),
-         tumor_sample.AD = NULL,
-         cords = NULL
-  ) %>%
-  filter(REF == 0) %>%
-  filter(VAR >= 10) %>%
-  arrange(CHROM, desc(VAR))
-
-fastafile <- read.fasta(file = args[3],
-                        as.string = TRUE)
-
-snp$seq_list <- snp %>%
-  apply(1, FUN = function(x, fastafile, POS, CHROM, seq_restric_size){
-      begin <- as.integer(x[ POS ]) - ((seq_restric_size - 1) / 2)
-      end <- as.integer(x[ POS ]) + ((seq_restric_size - 1) / 2)
-      chrom <- x[ CHROM ]
-      seq_restric <- fastafile[[ chrom ]] %>%
-        c2s() %>%
-        substr(begin, end) %>%
-        s2c()
-      seq_restric[((seq_restric_size - 1) / 2) + 1] <-
-        toupper(seq_restric[((seq_restric_size - 1) / 2) + 1])
-      print(paste0(chrom, ":", begin, "-", end, " ", seq_restric %>% c2s()))
-      return(seq_restric %>% c2s())
-    },
-    fastafile = fastafile,
-    POS = which(colnames(snp) %in% "POS"),
-    CHROM = which(colnames(snp) %in% "CHROM"),
-    seq_restric_size = seq_restric_size
-  )
-
-snp %>%
-  write_csv(paste0(args[2], "only.csv" ))
-
-
-snp <- read_csv(paste0(args[2], "only.csv" ))
-enzyme_list <- read_csv(args[4]) %>%
-  mutate(size = nchar(seq))
-
-snp <- snp %>%
-  mutate(enzyme = NA,
-         enzyme_pos = NA,
-         enzyme_seq = NA)
-for (i in seq_len(nrow(enzyme_list))) {
-  enzyme <- enzyme_list[i, ]
-  enzyme_search <- gregexpr(toupper(enzyme$seq),
-                            toupper(snp$seq_list),
-                            fixed = TRUE) %>%
-    lapply(FUN = function(x, enzyme, seq_restric_size) {
-      if (x[1] < 0) {
-        return(c(NA, NA, NA))
-      } else {
-        if (x[1] <= (seq_restric_size - 1) / 2 + 1 &
-            x[1] + enzyme$size >=  (seq_restric_size - 1) / 2 + 1) {
-        return(c(enzyme$enzyme, x[1], enzyme$seq))
-        }
-        return(c(NA, NA, NA))
-      }
-    },
-    enzyme = enzyme,
-    seq_restric_size = seq_restric_size)
-  enzyme_search <- do.call(rbind, enzyme_search)
-  snp[is.na(snp$enzyme), c("enzyme", "enzyme_pos", "enzyme_seq")] <-
-    enzyme_search[is.na(snp$enzyme), ]
-}
-
-snp %>%
-  filter(!is.na(enzyme)) %>%
-  write_csv(paste0(args[2], "only_enzyme.csv" ))
-
-- 
GitLab