diff --git a/src/fasta_sampler.nf b/src/fasta_sampler.nf new file mode 100644 index 0000000000000000000000000000000000000000..d1200ed496c77756cde525835f581b71b2528990 --- /dev/null +++ b/src/fasta_sampler.nf @@ -0,0 +1,18 @@ +Channel + .fromPath( "data/tiny_dataset/fasta/*.fasta" ) + .set { fasta_file } + +process sample_fasta { + publishDir "results/sampling/", mode: 'copy' + + input: +file fasta from fasta_file + + output: +file "*_sample.fasta" into fasta_sample + + script: +""" +head ${fasta} > ${fasta.baseName}_sample.fasta +""" +} diff --git a/src/solution_RNASeq.config b/src/solution_RNASeq.config new file mode 100644 index 0000000000000000000000000000000000000000..3e8ba47bdb12f98c134ca4ebf96a2593d6ba6c2c --- /dev/null +++ b/src/solution_RNASeq.config @@ -0,0 +1,172 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: adaptor_removal { + container = "lbmc/cutadapt:2.1" + cpus = 1 + } + withName: trimming { + cpus = 4 + container = "lbmc/urqt:d62c1f8" + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: adaptor_removal { + container = "lbmc/cutadapt:2.1" + cpus = 1 + } + withName: trimming { + cpus = 4 + container = "lbmc/urqt:d62c1f8" + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + } + } + psmn{ + process{ + withName: adaptor_removal { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/cutadapt_2.1" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128' + } + withName: trimming { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/urqt_d62c1f8" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp32' + + } + withName: fasta_from_bed { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bedtools_2.25.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128' + } + withName: index_fasta { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/kallisto_0.44.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp32' + } + withName: mapping_fastq { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/kallisto_0.44.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 32 + memory = "30GB" + time = "24h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp32' + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_in2p3/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: adaptor_removal { + container = "lbmc/cutadapt:2.1" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + withName: trimming { + container = "lbmc/urqt:d62c1f8" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + } + } +} diff --git a/src/solution_RNASeq.nf b/src/solution_RNASeq.nf new file mode 100644 index 0000000000000000000000000000000000000000..73940d6595aa3828629292cff067fcf20d3603f0 --- /dev/null +++ b/src/solution_RNASeq.nf @@ -0,0 +1,111 @@ +log.info "fastq files : ${params.fastq}" +log.info "fasta file : ${params.fasta}" +log.info "bed file : ${params.bed}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_files } +Channel + .fromPath( params.bed ) + .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } + .set { bed_files } +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } + +process adaptor_removal { + tag "$pair_id" + publishDir "results/fastq/adaptor_removal/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + + output: + set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut + + script: + """ + + cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ + -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +} + +process trimming { + tag "${reads}" + publishDir "results/fastq/trimming/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_cut + + output: + set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ +--in ${reads[0]} --inpair ${reads[1]} \ +--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ +> ${pair_id}_trimming_report.txt +""" +} + +process fasta_from_bed { + tag "${bed.baseName}" + publishDir "results/fasta/", mode: 'copy' + + input: + file fasta from fasta_files + file bed from bed_files + + output: + file "*_extracted.fasta" into fasta_files_extracted + + script: +""" +bedtools getfasta -name \ +-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta +""" +} + +process index_fasta { + tag "$fasta.baseName" + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_files_extracted + + output: + file "*.index*" into index_files + file "*_kallisto_report.txt" into index_files_report + + script: +""" +kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +2> ${fasta.baseName}_kallisto_report.txt +""" +} + +process mapping_fastq { + tag "$reads" + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_trim + file index from index_files.collect() + + output: + file "*" into counts_files + + script: +""" +mkdir ${pair_id} + +kallisto quant -i ${index} -t ${task.cpus} \ +--bias --bootstrap-samples 100 -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}/kallisto_report.txt +""" +}