diff --git a/src/.docker_modules/star/2.7.8a/Dockerfile b/src/.docker_modules/star/2.7.8a/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..32d712d67e2905fc28b9c7114d5acfcc3560b60f --- /dev/null +++ b/src/.docker_modules/star/2.7.8a/Dockerfile @@ -0,0 +1,2 @@ +FROM quay.io/biocontainers/star:2.7.8a--0 +MAINTAINER Xavier Grand diff --git a/src/.docker_modules/star/2.7.8a/docker_init.sh b/src/.docker_modules/star/2.7.8a/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..f39c19203c5cbba0f64b91c6cab13f4936814a5d --- /dev/null +++ b/src/.docker_modules/star/2.7.8a/docker_init.sh @@ -0,0 +1,5 @@ +#!/bin/sh +docker pull xgrand/star:2.7.8a +# docker build src/.docker_modules/star/2.7.8a/ -t 'xgrand/star:2.7.8a' +# docker push xgrand/star:2.7.8a +docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/star:2.7.8a" --push src/.docker_modules/star/2.7.8a \ No newline at end of file diff --git a/src/RNAseq_XGR.nf b/src/RNAseq_XGR.nf index a5bc73ee3644efe3acbb4ea32732ae7257b9b3c0..84e426aa452751a4b71eb432fab46ae301f745af 100644 --- a/src/RNAseq_XGR.nf +++ b/src/RNAseq_XGR.nf @@ -22,7 +22,7 @@ def helpMessage() { Usage: The typical command for running the pipeline is as follows: - nextflow ./src/star_fusion.nf -c ./src/nextflow.config -profile singularity + nextflow ./src/RNAseq_XGR.nf -c ./src/nextflow.config -profile singularity Mandatory arguments: --project [path] Path to the project folder. Results are saved in this folder. diff --git a/src/arriba_fusion.nf b/src/arriba_fusion.nf index c46b936689428c78e7abe878d294fb44a5ba9133..7c1b29aa2e0a0b53d8ab9e5738a2e7abb37894d6 100644 --- a/src/arriba_fusion.nf +++ b/src/arriba_fusion.nf @@ -30,10 +30,10 @@ def helpMessage() { Available: docker, singularity, podman, psmn, ccin2p3 Input: - --fastq [path] Path to fastq folder. - --bam [path] Path to the bam-containing folder. + --fastq [path] Path to fastq files. + --bam [path] Path to the bam files. - References: + References: Can be downloaded with download_references.sh (not implemented in pipeline). --genome [path] Path to genome reference fasta file. --gtf [path] Path to genome annotation gtf file. @@ -61,10 +61,11 @@ if (params.help || params.h) { */ params.project = "" +params.bam_folder = "" +params.genome = "" +params.gtf = "" params.bam = "" params.fastq = "" -if (params.genome) { params.genome = path(params.genome, checkIfExists: true) } else { exit 1, "No genome specified." } -if (params.gtf) { params.gtf = path(params.gtf, checkIfExists: true) } else { exit 1, "No annotation specified." } /* Params out */ params.fastp_out = "$params.project/fastp/" @@ -80,6 +81,12 @@ params.index_bam_out = "$params.project/Bam_filt_sort_indexed/" log.info "Reference genome : ${params.genome}" log.info "Genome annotation : ${params.gtf}" +if(params.bam_folder != "") { + log.info "bam files (--bam): ${bam}" +} +else { + log.info "fastq files (--fastq): ${params.fastq}" +} /* **************************************************************** @@ -87,23 +94,28 @@ log.info "Genome annotation : ${params.gtf}" **************************************************************** */ -if(params.bam != "") { +if(params.bam_folder != "") { Channel .fromPath( params.bam ) + .ifEmpty { error "Cannot find any bam files in: ${params.bam}" } + .map { it -> [it.simpleName, it]} .set { bam_files } } else { Channel - .fromFilePairs( params.fastq, size = -1 ) - .set(fastq_files) + .fromFilePairs( params.fastq, size: -1) + .set { fastq_files } } Channel .fromPath( params.genome ) + .ifEmpty { error "Cannot find any fasta files in: ${params.genome}" } + .map { it -> [it.simpleName, it]} .set { genome } Channel .fromPath( params.gtf ) + .ifEmpty { error "Cannot find any annotation files in: ${params.gtf}" } .set { gtf } /* @@ -113,9 +125,11 @@ Channel */ include { fastp } from './nf_modules/fastp/main.nf' -include { fastqc_fastq as fastqc_raw } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/") -include { fastqc_fastq as fastqc_preprocessed } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/") +include { fastqc_fastq as fastqc_raw } from './nf_modules/fastqc/main.nf' addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/") +include { fastqc_fastq as fastqc_preprocessed } from './nf_modules/fastqc/main.nf' addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/") include { multiqc } from './nf_modules/multiqc/main.nf' addParams(multiqc_out: "$params.project/QC/") +include { index_with_gtf } from './nf_modules/star/main_2.7.8a.nf' addParams(star_mapping_fastq_out: "$params.project/STAR_index/") +include { mapping_fastq_withChimeric } from './nf_modules/star/main_2.7.8a.nf' addParams(star_mapping_fastq_out: "$params.project/STAR/") include { arriba } from "./nf_modules/arriba/main.nf" /* @@ -127,26 +141,20 @@ include { arriba } from "./nf_modules/arriba/main.nf" workflow { if(params.bam == ""){ - fastp() - fastqc_raw() - fastqc_preprocessed() - multiqc() - .mix( - fastqc_preprocessed.out.report - ).collect() - index_fasta() - mapping_fastq() - filter_bam_quality() - sort_bam() - index_bam() + fastp(fastq_files) + // fastqc_raw(fastq_files.collect()) + // fastqc_preprocessed(fastp_out.fastq.collect()) + // multiqc(fastqc_raw_out.report) + // .mix( + // fastqc_preprocessed.out.report + // ).collect() + index_with_gtf(genome, gtf) + // mapping_fastq_withChimeric(index_fasta_out.index, fastp_out.fastq) + // filter_bam_quality(mapping_fastq_withChimeric_out.bam) + // arriba() + } + else { + arriba(bam_files, gtf, genome) } - - - - //###################### ARRIBA FUSION ######################## - - arriba(fastq_files, gtf, genome) - - //################ GRAPHICAL REPRESENTATIONS ################## } \ No newline at end of file diff --git a/src/nextflow.config b/src/nextflow.config index d954a5318b14d575366ce5a3e87963138f0549b8..5758b7d165b14e2970d85c7bf53b33a80d86d865 100644 --- a/src/nextflow.config +++ b/src/nextflow.config @@ -18,7 +18,7 @@ profiles { docker.enabled = true process { errorStrategy = 'finish' - memory = '16GB' + memory = '15GB' withLabel: big_mem_mono_cpus { cpus = 1 } @@ -47,7 +47,7 @@ profiles { podman.enabled = true process { errorStrategy = 'finish' - memory = '16GB' + memory = '15GB' withLabel: big_mem_mono_cpus { cpus = 1 } @@ -77,7 +77,7 @@ profiles { singularity.cacheDir = "./bin/" process { errorStrategy = 'finish' - memory = '16GB' + memory = '15GB' withLabel: big_mem_mono_cpus { cpus = 1 } diff --git a/src/nf_modules/star/main_2.7.8a.nf b/src/nf_modules/star/main_2.7.8a.nf new file mode 100644 index 0000000000000000000000000000000000000000..a711426d86db32b46866e40b75daaadf6ef3049e --- /dev/null +++ b/src/nf_modules/star/main_2.7.8a.nf @@ -0,0 +1,183 @@ +version = "2.7.8a" +container_url = "xgrand/star:${version}" + +params.star_mapping_fastq_out = "" + + +process gff3_2_gtf { + container = "dceoy/cufflinks" + label "small_mem_mono_cpus" + + input: + tuple val(genome_id), path(gff3_file) + output: + path "${genome_id}.gtf", emit: gtf + script: +""" +gffread ${gff3_file} -T -o ${genome_id}.gtf +""" +} + + +process index_with_gtf { + container = "${container_url}" + label "big_mem_multi_cpus" + + input: + tuple val(genome_id), path(genome_fasta) + path gtf_file + + output: + tuple val(genome_id), path ("*"), emit: index + + script: +""" +STAR --runThreadN ${task.cpus} --runMode genomeGenerate \ +--genomeDir ./ \ +--genomeFastaFiles ${genome_fasta} \ +--sjdbGTFfile ${gtf_file} \ +--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1) +""" +} + +workflow index_with_gff { + take: + genome_fasta + gff_file + main: + gff3_2_gtf(gff_file) + index_with_gtf(genome_fasta,gff3_2_gtf.out.gtf) + emit: + report = index_with_gtf.out.index +} + + +process index_without_gff { + container = "${container_url}" + label "big_mem_multi_cpus" + + input: + tuple val(genome_id), path(genome_fasta) + + output: + tuple val(genome_id), path ("*"), emit: index + + script: +""" +STAR --runThreadN ${task.cpus} --runMode genomeGenerate \ +--genomeDir ./ \ +--genomeFastaFiles ${genome_fasta} \ +--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1) +""" +} + + +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + if (params.star_mapping_fastq_out != "") { + publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(reads_id), path(reads) + + output: + path "*.Log.final.out", emit: report + tuple val(reads_id), path("*.bam"), emit: bam + + script: +if (reads_id instanceof List){ + file_prefix = reads_id[0] + } else { + file_prefix = reads_id + } + +if (reads.size() == 2) +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesCommand zcat \ +--readFilesIn ${reads[0]} ${reads[1]} \ +--outFileNamePrefix ${reads_id}. \ +--alignIntronMax 10000 \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif + +mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam +""" +else +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesCommand zcat \ +--readFilesIn ${reads} \ +--outFileNamePrefix ${reads_id}. \ +--alignIntronMax 10000 \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif + +mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam +""" +} + +process mapping_fastq_withChimeric { + container = "${container_url}" + label "big_mem_multi_cpus" + if (params.star_mapping_fastq_out != "") { + publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy' + } + + input: + tuple val(index_id), path(index) + tuple val(reads_id), path(reads) + + output: + path "*.Log.final.out", emit: report + tuple val(reads_id), path("*.bam"), emit: bam + + script: +if (reads_id instanceof List){ + file_prefix = reads_id[0] + } else { + file_prefix = reads_id + } + +if (reads.size() == 2) +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesCommand zcat \ +--readFilesIn ${reads[0]} ${reads[1]} \ +--outFileNamePrefix ${reads_id}. \ +--alignIntronMax 10000 \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif \ +--chimOutType WithinBAM + +mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam +""" +else +""" +mkdir -p index +mv ${index} index/ +STAR --runThreadN ${task.cpus} \ +--genomeDir index/ \ +--readFilesCommand zcat \ +--readFilesIn ${reads} \ +--outFileNamePrefix ${reads_id}. \ +--alignIntronMax 10000 \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif \ +--chimOutType WithinBAM + +mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam +""" +} \ No newline at end of file