Skip to content
Snippets Groups Projects
Commit fd4214ae authored by Xavier Grand's avatar Xavier Grand
Browse files

Add STAR:2.7.8 modif RNAseq_XGR.nf & arriba_fusion.nf

parent 82f27265
No related branches found
No related tags found
No related merge requests found
FROM quay.io/biocontainers/star:2.7.8a--0
MAINTAINER Xavier Grand
#!/bin/sh
docker pull xgrand/star:2.7.8a
# docker build src/.docker_modules/star/2.7.8a/ -t 'xgrand/star:2.7.8a'
# docker push xgrand/star:2.7.8a
docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/star:2.7.8a" --push src/.docker_modules/star/2.7.8a
\ No newline at end of file
...@@ -22,7 +22,7 @@ def helpMessage() { ...@@ -22,7 +22,7 @@ def helpMessage() {
Usage: Usage:
The typical command for running the pipeline is as follows: The typical command for running the pipeline is as follows:
nextflow ./src/star_fusion.nf -c ./src/nextflow.config -profile singularity nextflow ./src/RNAseq_XGR.nf -c ./src/nextflow.config -profile singularity
Mandatory arguments: Mandatory arguments:
--project [path] Path to the project folder. Results are saved in this folder. --project [path] Path to the project folder. Results are saved in this folder.
......
...@@ -30,10 +30,10 @@ def helpMessage() { ...@@ -30,10 +30,10 @@ def helpMessage() {
Available: docker, singularity, podman, psmn, ccin2p3 Available: docker, singularity, podman, psmn, ccin2p3
Input: Input:
--fastq [path] Path to fastq folder. --fastq [path] Path to fastq files.
--bam [path] Path to the bam-containing folder. --bam [path] Path to the bam files.
References: References: Can be downloaded with download_references.sh (not implemented in pipeline).
--genome [path] Path to genome reference fasta file. --genome [path] Path to genome reference fasta file.
--gtf [path] Path to genome annotation gtf file. --gtf [path] Path to genome annotation gtf file.
...@@ -61,10 +61,11 @@ if (params.help || params.h) { ...@@ -61,10 +61,11 @@ if (params.help || params.h) {
*/ */
params.project = "" params.project = ""
params.bam_folder = ""
params.genome = ""
params.gtf = ""
params.bam = "" params.bam = ""
params.fastq = "" params.fastq = ""
if (params.genome) { params.genome = path(params.genome, checkIfExists: true) } else { exit 1, "No genome specified." }
if (params.gtf) { params.gtf = path(params.gtf, checkIfExists: true) } else { exit 1, "No annotation specified." }
/* Params out */ /* Params out */
params.fastp_out = "$params.project/fastp/" params.fastp_out = "$params.project/fastp/"
...@@ -80,6 +81,12 @@ params.index_bam_out = "$params.project/Bam_filt_sort_indexed/" ...@@ -80,6 +81,12 @@ params.index_bam_out = "$params.project/Bam_filt_sort_indexed/"
log.info "Reference genome : ${params.genome}" log.info "Reference genome : ${params.genome}"
log.info "Genome annotation : ${params.gtf}" log.info "Genome annotation : ${params.gtf}"
if(params.bam_folder != "") {
log.info "bam files (--bam): ${bam}"
}
else {
log.info "fastq files (--fastq): ${params.fastq}"
}
/* /*
**************************************************************** ****************************************************************
...@@ -87,23 +94,28 @@ log.info "Genome annotation : ${params.gtf}" ...@@ -87,23 +94,28 @@ log.info "Genome annotation : ${params.gtf}"
**************************************************************** ****************************************************************
*/ */
if(params.bam != "") { if(params.bam_folder != "") {
Channel Channel
.fromPath( params.bam ) .fromPath( params.bam )
.ifEmpty { error "Cannot find any bam files in: ${params.bam}" }
.map { it -> [it.simpleName, it]}
.set { bam_files } .set { bam_files }
} }
else { else {
Channel Channel
.fromFilePairs( params.fastq, size = -1 ) .fromFilePairs( params.fastq, size: -1)
.set(fastq_files) .set { fastq_files }
} }
Channel Channel
.fromPath( params.genome ) .fromPath( params.genome )
.ifEmpty { error "Cannot find any fasta files in: ${params.genome}" }
.map { it -> [it.simpleName, it]}
.set { genome } .set { genome }
Channel Channel
.fromPath( params.gtf ) .fromPath( params.gtf )
.ifEmpty { error "Cannot find any annotation files in: ${params.gtf}" }
.set { gtf } .set { gtf }
/* /*
...@@ -113,9 +125,11 @@ Channel ...@@ -113,9 +125,11 @@ Channel
*/ */
include { fastp } from './nf_modules/fastp/main.nf' include { fastp } from './nf_modules/fastp/main.nf'
include { fastqc_fastq as fastqc_raw } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/") include { fastqc_fastq as fastqc_raw } from './nf_modules/fastqc/main.nf' addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/")
include { fastqc_fastq as fastqc_preprocessed } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/") include { fastqc_fastq as fastqc_preprocessed } from './nf_modules/fastqc/main.nf' addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/")
include { multiqc } from './nf_modules/multiqc/main.nf' addParams(multiqc_out: "$params.project/QC/") include { multiqc } from './nf_modules/multiqc/main.nf' addParams(multiqc_out: "$params.project/QC/")
include { index_with_gtf } from './nf_modules/star/main_2.7.8a.nf' addParams(star_mapping_fastq_out: "$params.project/STAR_index/")
include { mapping_fastq_withChimeric } from './nf_modules/star/main_2.7.8a.nf' addParams(star_mapping_fastq_out: "$params.project/STAR/")
include { arriba } from "./nf_modules/arriba/main.nf" include { arriba } from "./nf_modules/arriba/main.nf"
/* /*
...@@ -127,26 +141,20 @@ include { arriba } from "./nf_modules/arriba/main.nf" ...@@ -127,26 +141,20 @@ include { arriba } from "./nf_modules/arriba/main.nf"
workflow { workflow {
if(params.bam == ""){ if(params.bam == ""){
fastp() fastp(fastq_files)
fastqc_raw() // fastqc_raw(fastq_files.collect())
fastqc_preprocessed() // fastqc_preprocessed(fastp_out.fastq.collect())
multiqc() // multiqc(fastqc_raw_out.report)
.mix( // .mix(
fastqc_preprocessed.out.report // fastqc_preprocessed.out.report
).collect() // ).collect()
index_fasta() index_with_gtf(genome, gtf)
mapping_fastq() // mapping_fastq_withChimeric(index_fasta_out.index, fastp_out.fastq)
filter_bam_quality() // filter_bam_quality(mapping_fastq_withChimeric_out.bam)
sort_bam() // arriba()
index_bam() }
else {
arriba(bam_files, gtf, genome)
} }
//###################### ARRIBA FUSION ########################
arriba(fastq_files, gtf, genome)
//################ GRAPHICAL REPRESENTATIONS ##################
} }
\ No newline at end of file
...@@ -18,7 +18,7 @@ profiles { ...@@ -18,7 +18,7 @@ profiles {
docker.enabled = true docker.enabled = true
process { process {
errorStrategy = 'finish' errorStrategy = 'finish'
memory = '16GB' memory = '15GB'
withLabel: big_mem_mono_cpus { withLabel: big_mem_mono_cpus {
cpus = 1 cpus = 1
} }
...@@ -47,7 +47,7 @@ profiles { ...@@ -47,7 +47,7 @@ profiles {
podman.enabled = true podman.enabled = true
process { process {
errorStrategy = 'finish' errorStrategy = 'finish'
memory = '16GB' memory = '15GB'
withLabel: big_mem_mono_cpus { withLabel: big_mem_mono_cpus {
cpus = 1 cpus = 1
} }
...@@ -77,7 +77,7 @@ profiles { ...@@ -77,7 +77,7 @@ profiles {
singularity.cacheDir = "./bin/" singularity.cacheDir = "./bin/"
process { process {
errorStrategy = 'finish' errorStrategy = 'finish'
memory = '16GB' memory = '15GB'
withLabel: big_mem_mono_cpus { withLabel: big_mem_mono_cpus {
cpus = 1 cpus = 1
} }
......
version = "2.7.8a"
container_url = "xgrand/star:${version}"
params.star_mapping_fastq_out = ""
process gff3_2_gtf {
container = "dceoy/cufflinks"
label "small_mem_mono_cpus"
input:
tuple val(genome_id), path(gff3_file)
output:
path "${genome_id}.gtf", emit: gtf
script:
"""
gffread ${gff3_file} -T -o ${genome_id}.gtf
"""
}
process index_with_gtf {
container = "${container_url}"
label "big_mem_multi_cpus"
input:
tuple val(genome_id), path(genome_fasta)
path gtf_file
output:
tuple val(genome_id), path ("*"), emit: index
script:
"""
STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
--genomeDir ./ \
--genomeFastaFiles ${genome_fasta} \
--sjdbGTFfile ${gtf_file} \
--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1)
"""
}
workflow index_with_gff {
take:
genome_fasta
gff_file
main:
gff3_2_gtf(gff_file)
index_with_gtf(genome_fasta,gff3_2_gtf.out.gtf)
emit:
report = index_with_gtf.out.index
}
process index_without_gff {
container = "${container_url}"
label "big_mem_multi_cpus"
input:
tuple val(genome_id), path(genome_fasta)
output:
tuple val(genome_id), path ("*"), emit: index
script:
"""
STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
--genomeDir ./ \
--genomeFastaFiles ${genome_fasta} \
--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1)
"""
}
process mapping_fastq {
container = "${container_url}"
label "big_mem_multi_cpus"
if (params.star_mapping_fastq_out != "") {
publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(reads_id), path(reads)
output:
path "*.Log.final.out", emit: report
tuple val(reads_id), path("*.bam"), emit: bam
script:
if (reads_id instanceof List){
file_prefix = reads_id[0]
} else {
file_prefix = reads_id
}
if (reads.size() == 2)
"""
mkdir -p index
mv ${index} index/
STAR --runThreadN ${task.cpus} \
--genomeDir index/ \
--readFilesCommand zcat \
--readFilesIn ${reads[0]} ${reads[1]} \
--outFileNamePrefix ${reads_id}. \
--alignIntronMax 10000 \
--outSAMtype BAM SortedByCoordinate \
--outSAMstrandField intronMotif
mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
"""
else
"""
mkdir -p index
mv ${index} index/
STAR --runThreadN ${task.cpus} \
--genomeDir index/ \
--readFilesCommand zcat \
--readFilesIn ${reads} \
--outFileNamePrefix ${reads_id}. \
--alignIntronMax 10000 \
--outSAMtype BAM SortedByCoordinate \
--outSAMstrandField intronMotif
mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
"""
}
process mapping_fastq_withChimeric {
container = "${container_url}"
label "big_mem_multi_cpus"
if (params.star_mapping_fastq_out != "") {
publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy'
}
input:
tuple val(index_id), path(index)
tuple val(reads_id), path(reads)
output:
path "*.Log.final.out", emit: report
tuple val(reads_id), path("*.bam"), emit: bam
script:
if (reads_id instanceof List){
file_prefix = reads_id[0]
} else {
file_prefix = reads_id
}
if (reads.size() == 2)
"""
mkdir -p index
mv ${index} index/
STAR --runThreadN ${task.cpus} \
--genomeDir index/ \
--readFilesCommand zcat \
--readFilesIn ${reads[0]} ${reads[1]} \
--outFileNamePrefix ${reads_id}. \
--alignIntronMax 10000 \
--outSAMtype BAM SortedByCoordinate \
--outSAMstrandField intronMotif \
--chimOutType WithinBAM
mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
"""
else
"""
mkdir -p index
mv ${index} index/
STAR --runThreadN ${task.cpus} \
--genomeDir index/ \
--readFilesCommand zcat \
--readFilesIn ${reads} \
--outFileNamePrefix ${reads_id}. \
--alignIntronMax 10000 \
--outSAMtype BAM SortedByCoordinate \
--outSAMstrandField intronMotif \
--chimOutType WithinBAM
mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
"""
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment