diff --git a/doc/available_tools.md b/doc/available_tools.md index 10b97ea32253586727954a4a4019b23a055aa67f..297cc27e9211f15023d415a8e20b13b54c8aac18 100644 --- a/doc/available_tools.md +++ b/doc/available_tools.md @@ -34,6 +34,7 @@ sambamba | ok | ok | ok | ok samblaster | ok | ok | ok | ok SAMtools | ok | ok | ok | ok SRAtoolkit | ok | ok | ok | ok +STAR | ok | ok | ok | ok subread | **no** | ok | ok | ok TopHat | **no** | ok | ok | ok Trimmomatic | **no** | ok | ok | ok diff --git a/src/nf_modules/star/indexing.config b/src/nf_modules/star/indexing.config new file mode 100644 index 0000000000000000000000000000000000000000..3e4017b92dc87fa553c890055f1769beef836468 --- /dev/null +++ b/src/nf_modules/star/indexing.config @@ -0,0 +1,54 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: index_fasta { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: index_fasta { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + psmn{ + process{ + withName: index_fasta { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/star_2.7.3a" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "20GB" + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "/sps/lbmc/common/singularity/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: index_fasta { + container = "lbmc/star:2.7.3a" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + } + } +} diff --git a/src/nf_modules/star/indexing.nf b/src/nf_modules/star/indexing.nf new file mode 100644 index 0000000000000000000000000000000000000000..0f340b2d3d11ff5fd79d6b1e5e3f8c56d35f2154 --- /dev/null +++ b/src/nf_modules/star/indexing.nf @@ -0,0 +1,36 @@ +params.fasta = "$baseDir/data/bam/*.fasta" +params.annotation = "$baseDir/data/bam/*.gtf" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_file } +Channel + .fromPath( params.annotation ) + .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" } + .set { annotation_file } + +process index_fasta { + tag "$fasta.baseName" + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + file annotation from annotation_file + + output: + file "*" into index_files + + script: +""" +STAR --runThreadN ${task.cpus} --runMode genomeGenerate \ +--genomeDir ./ \ +--genomeFastaFiles ${fasta} \ +--sjdbGTFfile ${annotation} \ +--genomeSAindexNbases 3 # min(14, log2(GenomeLength)/2 - 1) +""" +} + + diff --git a/src/nf_modules/star/mapping_paired.config b/src/nf_modules/star/mapping_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..c2de5c50997fa856cf9923522ca2cd54f6313a29 --- /dev/null +++ b/src/nf_modules/star/mapping_paired.config @@ -0,0 +1,54 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + psmn{ + process{ + withName: mapping_fastq { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/star_2.7.3a" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "/sps/lbmc/common/singularity/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + } + } +} diff --git a/src/nf_modules/star/mapping_paired.nf b/src/nf_modules/star/mapping_paired.nf new file mode 100644 index 0000000000000000000000000000000000000000..9ea901751da50944d23327f47bc3813ff3a49859 --- /dev/null +++ b/src/nf_modules/star/mapping_paired.nf @@ -0,0 +1,40 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + set pair_id, "*.bam" into bam_files + file "*.out" into mapping_report + + script: +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesIn ${reads[0]} ${reads[1]} \ +--outFileNamePrefix ${pair_id} \ +--outSAMmapqUnique 0 \ +--outSAMtype BAM SortedByCoordinate +""" +} + diff --git a/src/nf_modules/star/mapping_single.config b/src/nf_modules/star/mapping_single.config new file mode 100644 index 0000000000000000000000000000000000000000..c2de5c50997fa856cf9923522ca2cd54f6313a29 --- /dev/null +++ b/src/nf_modules/star/mapping_single.config @@ -0,0 +1,54 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + cpus = 4 + } + } + } + psmn{ + process{ + withName: mapping_fastq { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/star_2.7.3a" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "/sps/lbmc/common/singularity/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: mapping_fastq { + container = "lbmc/star:2.7.3a" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + } + } +} diff --git a/src/nf_modules/star/mapping_single.nf b/src/nf_modules/star/mapping_single.nf new file mode 100644 index 0000000000000000000000000000000000000000..9d3d51b38da6cc7cf7dff985e5c0052b2924168a --- /dev/null +++ b/src/nf_modules/star/mapping_single.nf @@ -0,0 +1,39 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$file_id" + publishDir "results/mapping/bams/", mode: 'copy' + + input: + set file_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files + file "*.out" into mapping_report + + script: +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesIn ${reads} \ +--outFileNamePrefix ${file_id} \ +--outSAMmapqUnique 0 \ +--outSAMtype BAM SortedByCoordinate +""" +} diff --git a/src/nf_modules/star/tests.sh b/src/nf_modules/star/tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..046ffe4f32bc78467a61440774553335f888b288 --- /dev/null +++ b/src/nf_modules/star/tests.sh @@ -0,0 +1,43 @@ +./nextflow src/nf_modules/star/indexing.nf \ + -c src/nf_modules/star/indexing.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + --annotation "data/tiny_dataset/annot/tiny.gtf" \ + -resume + +./nextflow src/nf_modules/star/mapping_single.nf \ + -c src/nf_modules/star/mapping_single.config \ + -profile docker \ + --index "results/mapping/index/*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \ + -resume + +./nextflow src/nf_modules/star/mapping_paired.nf \ + -c src/nf_modules/star/mapping_paired.config \ + -profile docker \ + --index "results/mapping/index/*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \ + -resume + +if [ -x "$(command -v singularity)" ]; then +./nextflow src/nf_modules/star/indexing.nf \ + -c src/nf_modules/star/indexing.config \ + -profile singularity \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + --annotation "data/tiny_dataset/annot/tiny.gtf" \ + -resume + +./nextflow src/nf_modules/star/mapping_single.nf \ + -c src/nf_modules/star/mapping_single.config \ + -profile singularity \ + --index "results/mapping/index/*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \ + -resume + +./nextflow src/nf_modules/star/mapping_paired.nf \ + -c src/nf_modules/star/mapping_paired.config \ + -profile singularity \ + --index "results/mapping/index/*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \ + -resume +fi