Verified Commit 7571ac5a authored by Laurent Modolo's avatar Laurent Modolo
Browse files

replace original chip_quant_analysis.nf by new one

parent b287e302
profiles {
docker {
docker.temp = "auto"
docker.enabled = true
process {
withLabel: check_input {
container = "lbmc/csv_checkdesign_python:0.0.1"
cpus = 1
}
withLabel: bowtie2 {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withLabel: bowtie {
container = "lbmc/bowtie:1.2.2"
cpus = 4
}
withLabel: macs2 {
container = "lbmc/macs2:2.1.2"
cpus = 4
}
withLabel: fastqc {
container = "lbmc/fastqc:0.11.5"
cpus = 1
}
withLabel: multiQC {
container = "lbmc/multiqc:1.7"
cpus = 1
}
withLabel: cutadapt {
container = "lbmc/cutadapt:2.1"
cpus = 1
}
withLabel: picardtools {
container = "lbmc/picard:2.18.11"
cpus = 1
}
withLabel: bedtools {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withLabel: samtools {
container = "lbmc/samtools:1.7"
cpus = 4
}
withLabel: sambamba {
container = "lbmc/sambamba:0.6.7"
cpus = 4
}
withLabel: deeptools {
container = "lbmc/deeptools:3.0.2"
cpus = 4
}
withLabel: R {
container = "lbmc/chip_quant_r:0.0.1"
cpus = 1
}
}
}
singularity {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withLabel: check_input {
container = "lbmc/csv_checkdesign_python:0.0.1"
cpus = 1
}
withLabel: bowtie2 {
container = "lbmc/bowtie2:2.3.4.1"
cpus = 4
}
withLabel: bowtie {
cpus = 4
container = "lbmc/bowtie:1.2.2"
}
withLabel: macs2 {
container = "lbmc/macs2:2.1.2"
cpus = 4
}
withLabel: fastqc {
cpus = 1
container = "lbmc/fastqc:0.11.5"
}
withLabel: multiQC {
container = "lbmc/multiqc:1.7"
cpus = 1
}
withLabel: cutadapt {
container = "lbmc/cutadapt:2.1"
cpus = 1
}
withLabel: picardtools {
container = "lbmc/picard:2.18.11"
cpus = 1
}
withLabel: bedtools {
container = "lbmc/bedtools:2.25.0"
cpus = 1
}
withLabel: samtools {
container = "lbmc/samtools:1.7"
cpus = 4
}
withLabel: sambamba {
container = "lbmc/sambamba:0.6.7"
cpus = 4
}
withLabel: deeptools {
container = "lbmc/deeptools:3.0.2"
cpus = 4
}
withLabel: R {
container = "lbmc/chip_quant_r:0.0.1"
cpus = 1
}
}
}
psmn{
singularity.enabled = true
singularity.cacheDir = "$baseDir/.singularity_psmn/"
singularity.runOptions = "--bind /Xnfs,/scratch"
process{
withLabel: check_input {
container = "lbmc/csv_checkdesign_python:0.0.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withLabel: bowtie2 {
container = "lbmc/bowtie2:2.3.4.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: bowtie {
container = "lbmc/bowtie:1.2.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: macs2 {
container = "lbmc/macs2:2.1.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: fastqc {
container = "lbmc/fastqc:0.11.5"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withLabel: multiQC {
container = "lbmc/multiqc:1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "5GB"
time = "6h"
queue = "monointeldeb128"
}
withLabel: cutadapt {
container = "lbmc/cutadapt:2.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withLabel: picardtools {
container = "lbmc/picard:2.18.11"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withLabel: bedtools {
container = "lbmc/bedtools:2.25.0"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = "monointeldeb128"
}
withLabel: samtools {
container = "lbmc/samtools:1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: deeptools {
container = "lbmc/deeptools:3.0.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: sambamba {
container = "lbmc/sambamba:0.6.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 32
memory = "30GB"
time = "24h"
queue = "CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D"
penv = "openmp32"
}
withLabel: R {
container = "lbmc/chip_quant_r:0.0.1"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "50GB"
time = "12h"
queue = "monointeldeb128"
}
}
}
ccin2p3 {
singularity.enabled = true
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withLabel: check_input {
container = "lbmc/csv_checkdesign_python:0.0.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: bowtie2 {
container = "lbmc/bowtie2:2.3.4.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: bowtie {
container = "lbmc/bowtie:1.2.2"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: fastqc {
container = "lbmc/fastqc:0.11.5"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: multiQC {
container = "lbmc/multiqc:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: cutadapt {
container = "lbmc/cutadapt:2.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: macs2 {
container = "lbmc/macs2:2.1.2"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: picardtools {
container = "lbmc/picard:2.18.11"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: bedtools {
container = "lbmc/bedtools:2.25.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: samtools {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: sambamba {
container = "lbmc/sambamba:0.6.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: deeptools {
container = "lbmc/deeptools:3.0.2"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
withLabel: R {
container = "lbmc/chip_quant_r:0.0.1"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
cpus = 1
queue = "huge"
}
}
}
}
This diff is collapsed.
#!/usr/bin/env nextflow
/*
========================================================================================
QUANTITATIVE chip_analysis
========================================================================================
*/
nextflow.enable.dsl=2
def nfcoreHeader() {
// Log colors ANSI codes
c_reset = "\033[0m";
c_dim = "\033[2m";
c_black = "\033[0;30m";
c_green = "\033[0;32m";
c_yellow = "\033[0;33m";
c_blue = "\033[0;34m";
c_purple = "\033[0;35m";
c_cyan = "\033[0;36m";
c_white ="\033[0;37m";
return """ -${c_dim}--------------------------------------------------${c_reset}-
${c_green},--.${c_black}/${c_green},-.${c_reset}
${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset}
${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset}
${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset}
${c_green}`._,._,\'${c_reset}
${c_purple} QUANTITATIVE CHIP-SEQ Pipeline ${c_reset}
-${c_dim}--------------------------------------------------${c_reset}-
""".stripIndent()
}
def helpMessage() {
log.info nfcoreHeader()
log.info"""
Usage:
The typical command for running the pipeline is as follows:
nextflow run src/chip_quant_analysis.nf --input_from_sra datafromSRA.csv --input_from_path datafromPATH.csv --fasta <genome file> -profile singularity
Required arguments:
--csv Full path to directory of CSV file which specifies full path fastq files
Reference genome
--fasta Full path to directory containing genome fasta file
--fasta_calib Full path to directory containing genome fasta file used for calibration
Mapping option:
--shortReads Specifies that all input file are not long reads in order to use more adapted mapper: bowtie (default: bowtie2)
QC Option:
--skipFastqc Skip fastq quality control step (default: activated).
--skipMultiqc Skip merging tools report suitable with multiqc (default: activated)
Statistics Option:
--skipMapping_stats Skip building Venn diagramm to check reads origin proportion (default: activated)
--skipNorm_stats Skip building occupancy plot to check normalization behaviour (default: activated)
Trimming option:
--trimming Activated trimming step (default: desactivated)
--adapter_removal Activated adapter removal step (default: desactivated)
Remove duplicates
--duplicate_removal Activated reads duplicates removal step (default: desactivated)
Peak Calling :
--macs_gsize Effective genome size (if not specified macs2 will not run)
Nextflow config:
-c Path to config file: src/chip_analysis.config
-profile Profil used by nextflow to run the pipeline (you have choice between singularity, docker, psmn or ccin2p3)
For local utilisation use singularity or docker
Save option:
--outdir Specify where to save the output from the nextflow run (default: "./results/")
help message:
--help Print help message
"""
.stripIndent()
}
////////////////////////////////////////////////////
/* -- DEFAULT PARAMETER VALUES -- */
////////////////////////////////////////////////////
params.help = false
params.adapter_removal = false
params.trimming = false
params.skipFastqc = false
params.skipMultiqc = false
params.shortReads = false
params.duplicate_removal = false
params.skipNorm_stats = false
params.skipMapping_stats = false
params.csv_path = ""
params.csv_sra = ""
params.fasta= false
params.fasta_calib= false
params.macs_gsize = false
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// Header log info
log.info nfcoreHeader()
def summary = [:]
summary['Genome'] = params.fasta ?: 'Not supplied'
summary['Genome calibration'] = params.fasta_calib ?: 'Not supplied'
summary['CSV_Path'] = params.csv_path.size() > 0 ? params.csv_path : 'Not supplied'
summary['CSV_SRA'] = params.csv_sra.size() > 0 ? params.csv_sra : 'Not supplied'
summary['Trimming Step'] = params.trimming ? 'Yes' :'Skipped'
summary['Remove adapter'] = params.adapter_removal ? 'Yes' : 'Skipped'
summary['Reads QC'] = params.skipFastqc ? 'Skipped' : 'Yes'
summary['Merging Reports'] = params.skipMultiqc ? 'Skipped' : 'Yes'
summary['Mapper'] = params.shortReads ? 'Bowtie1' : 'Bowtie2'
summary['Mapping statistics'] = params.skipMapping_stats ? 'Skipped' : 'Yes'
summary['Remove Duplicate'] = params.duplicate_removal ? 'Yes': 'Skipped'
summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied'
summary['Normalization stats'] = params.skipNorm_stats ? 'Skipped' : 'Yes'
summary['Config Profile'] = workflow.profile
log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n")
log.info "-\033[2m--------------------------------------------------\033[0m-"
// Show a big warning message if we're not running MACS
if (!params.macs_gsize) {
log.error """
=============================================================
WARNING! MACS genome size parameter not precised
Peak calling analysis will be skipped.
Please specify value for '--macs_gsize' to run these steps.
=============================================================
"""
}
Channel
.value(params.macs_gsize)
.set{ macs_gsize }
Channel
.fromPath( params.fasta, checkIfExists: true )
.ifEmpty { error """
=============================================================
WARNING! No genome fasta file precised.
Use --fasta <my genome>
Or --help for more informations
=============================================================
""" }
.set { fasta }
Channel
.fromPath( params.fasta_calib, checkIfExists: true )
.ifEmpty { error log.error """
=============================================================
WARNING! No Calibration genome fasta file precised.
Use '--fasta_calib'
Or '--help' for more informations
=============================================================
""" }
.set { fasta_calib }
def file_name_to_path (file_name) {
Channel
.fromFilePairs(file_name, size: -1)
.ifEmpty { error log.error """
=============================================================
ERROR! problem in the csv file format with ${file_name}
=============================================================
""" }
.first()
.getVal()
}
/*
Transform csv table into a list of files such that:
for paired_end we have
[WCE_m_id, [pair_id, [fastq_R1, fastq_r2]
for single_end we have
[WCE_m_id, [file_id, fastq]
*/
if (params.csv_path.size() > 0) {
log.info "loading local csv files"
Channel
.fromPath(params.csv_path, checkIfExists: true)
.ifEmpty { error
log.error """
=============================================================
WARNING! No csv input file precised.
Use '--csv_path <file.csv>'
Or '--help' for more informations
=============================================================
"""
}
.splitCsv(header: true, sep: "\t")
.flatMap{
it -> [
[file(it.WCE_m), "IP_w", file(it.IP_w)],
[file(it.WCE_m), "WCE_w", file(it.WCE_w)],
[file(it.WCE_m), "IP_m", file(it.IP_m)],
[file(it.WCE_m), "WCE_m", file(it.WCE_m)]
]
}
.map{
it ->
if (it[1].size() >= 2){ // if data are paired_end
[
it[0][0].simpleName[0..-4], it[1], it[2][0].simpleName[0..-4],
[it[2][0].simpleName[0..-4], it[2]]
]
} else {
[
it[0].simpleName, it[1], it[2].simpleName,
[it[2].simpleName, it[2]]
]
}
}
.set{input_csv}
} else {
log.info "loading remotes SRA csv files"
Channel
.fromPath(params.csv_sra, checkIfExists: true)
.ifEmpty { error
log.error """
=============================================================
WARNING! No csv input file precised.
Use '--csv_path <file.csv>' or
Use '--csv_SRA <file.csv>'
Or '--help' for more informations
=============================================================
"""
}
.splitCsv(header: true, sep: "\t")
.flatMap{
it -> [
[it.WCE_m, "IP_w", it.IP_w],
[it.WCE_m, "WCE_w", it.WCE_w],
[it.WCE_m, "IP_m", it.IP_m],
[it.WCE_m, "WCE_m", it.WCE_m]
]
}
.map{
it ->
if (it[1].size() >= 2){ // if data are paired_end
[
it[0][0].simpleName[0..-4], it[1], it[2][0].simpleName[0..-4],
[it[2][0].simpleName[0..-4], it[2]]
]
} else {
[