diff --git a/src/chipster.nf b/src/chipster.nf index d753bffb4f707e74c70afdd0f237fd92be9c0bd5..5df367d8e8d733bb81e7b4cdc80df48323733f3b 100755 --- a/src/chipster.nf +++ b/src/chipster.nf @@ -7,51 +7,6 @@ nextflow.enable.dsl=2 * Preprocessing, filtration, alignment, peak calling... */ -/* - **************************************************************** - parameters - **************************************************************** -*/ - -params.paired_end = false -/* false for single end data, true for paired-end data - -@type: Boolean -*/ - -params.fastq = "./data/tiny-delta-te-dataset/fastq_rnaseq/*.gz" -/* Fastq files - -@type: Files -*/ - -params.genome = "./data/tiny-delta-te-dataset/synth.fasta" -/* A genome file - -@type: File -*/ - -params.chrom_sizes = "./data/tiny-delta-te-dataset/chrom.sizes" -/* samtools generated genome.sizes file: samtools faidx synth.fasta and cut -f 1,2 synth.fasta.fai > chrom.sizes - -@type: File -*/ - -// params.idx = "" -/* already indexed reference genome ? enter path... - -@Type: String -*/ - -/* Parametres ctrl names & IP names -utiliser l'oppérateur .join ou .filter - -/* Params project Name */ -params.project = "" - -/* Params from config file yaml: */ - -data = params.input.collect {k , v -> "${params.fastq}/${v.fastq.gz}"} /* Params out */ params.fastp_out = "$params.project/fastp/" @@ -59,11 +14,11 @@ params.index_fasta_out = "$params.project/Indexed_genome/" params.sort_bam_out = "$params.project/Bam_filtered_sorted/" params.index_bam_out = "$params.project/Bam_filt_sort_indexed/" params.bam_to_bigwig_out = "$params.project/BigWig/" -params.peak_calling_bg_out = "$params.project/Peak_calling/" +params.peak_calling_out = "$params.project/Peak_calling/" params.bam_to_bed_out = "$params.project/Bed/" params.bed_slop_out = "$params.project/Bed_sloped/" params.bedGraph_out = "$params.project/BedGraph/" -params.chipseq_bam2BG_out = "$params.project/chipseq_BigGig" +params.chipseq_bam2BW_out = "$params.project/chipseq_BigGig" /* **************************************************************** @@ -71,7 +26,7 @@ params.chipseq_bam2BG_out = "$params.project/chipseq_BigGig" **************************************************************** */ -log.info "fastq files : ${params.fastq}" +log.info "fastq folder : ${params.fastq_folder}" log.info "genome file : ${params.genome}" log.info "genome sizes : ${params.chrom_sizes}" /* log.info "indexed genome file : ${params.idxgenome}" */ @@ -89,15 +44,35 @@ log.info "output folder results/${params.folder}" /* Raw paired-end reads fastq */ if (params.paired_end) { + error "Not Implemented" + /* Channel .fromFilePairs( params.fastq, size: 2 ) //def une error .set { fastq_files } + */ } else { + + Channel + .fromPath( params.input.collect { k, v -> "${params.fastq_folder}/${v.fastq}" }) + .ifEmpty { error "No fastq file defined" } + .set { fastq_files } + + + Channel + .from( params.input.collect { k, v -> v.sample }) + .ifEmpty { error "No sample names given" } + .set { sample_names } + + Channel + .from( params.input.collect { k, v -> v.condition }) + .ifEmpty { error "No condition defined" } + .set { condition_names } + Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any files matching: ${params.fastq}" } - .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], [it] ]} - .set{ fastq_files } + .from( params.input.collect { k, v -> v.type }) + .ifEmpty { error "No sample types defined" } + .set { sample_types } + } /* @@ -144,18 +119,18 @@ Channel */ fastqc_mod = "./nf_modules/fastqc/main.nf" -include { fastp_default } from "./nf_modules/fastp/main.nf" +include { fastp_chipster } from "./nf_modules/fastp/main.nf" include { fastqc_fastq as fastqc_raw } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/") include { fastqc_fastq as fastqc_preprocessed } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/") include { multiqc } from './nf_modules/multiqc/main.nf' addParams(multiqc_out: "$params.project/QC/") include { index_fasta } from "./nf_modules/bowtie2/main.nf" -include { mapping_fastq } from "./nf_modules/bowtie2/main.nf" -include { filter_bam_quality } from "./nf_modules/samtools/main.nf" -include { sort_bam } from "./nf_modules/samtools/main.nf" -include { index_bam } from "./nf_modules/samtools/main.nf" +include { mapping_fastq_chipster } from "./nf_modules/bowtie2/main.nf" +include { filter_bam_quality_chipster } from "./nf_modules/samtools/main.nf" +include { sort_bam_chipster } from "./nf_modules/samtools/main.nf" +include { index_bam_chipster } from "./nf_modules/samtools/main.nf" include { bam_to_bigwig } from "./nf_modules/deeptools/main.nf" -include { chipseq_bam2BG } from "./nf_modules/deeptools/main.nf" -include { peak_calling_bg } from "./nf_modules/macs3/main.nf" +include { chipseq_bam2BW_chipster } from "./nf_modules/deeptools/main.nf" +include { peak_calling } from "./nf_modules/macs3/main.nf" /* **************************************************************** @@ -166,12 +141,12 @@ include { peak_calling_bg } from "./nf_modules/macs3/main.nf" workflow { // fastp - fastp_default(fastq_files) + fastp_chipster(fastq_files, sample_names, condition_names, sample_types) // fastqc_rawdata - fastqc_raw(fastq_files) + fastqc_raw(fastq_files.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it] }) // fastqc_processed - fastqc_preprocessed(fastp_default.out.fastq) + fastqc_preprocessed(fastp_chipster.out.fastq.map { it -> [it [0], it[1]]}) // multiqc multiqc( fastqc_raw.out.report @@ -183,7 +158,7 @@ workflow { // index reference genome index_fasta(genome_file) // mapping preprocessed reads - mapping_fastq(index_fasta.out.index.collect(), fastp_default.out.fastq) + mapping_fastq_chipster(index_fasta.out.index.collect(), fastp_chipster.out.fastq) /*if (params.idxgenome == "") { index_fasta(genome_file) @@ -194,20 +169,26 @@ workflow { */ // filter bam - remove reads with quality <30 - filter_bam_quality(mapping_fastq.out.bam) + filter_bam_quality_chipster(mapping_fastq_chipster.out.bam) // samtools_sort - sort_bam(filter_bam_quality.out.bam) + sort_bam_chipster(filter_bam_quality_chipster.out.bam) // samtools_index - index_bam(sort_bam.out.bam) + index_bam_chipster(sort_bam_chipster.out.bam) // Create a bigwig file // bam_to_bigwig(index_bam.out.bam_idx) // Chipseq Bam 2 bigwig file with reads extends - chipseq_bam2BG(index_bam.out.bam_idx) + chipseq_bam2BW_chipster(index_bam_chipster.out.bam_idx) + + index_bam_chipster.out.bam_idx.groupTuple(by: 3).set { combined_bams } + combined_bams.map { it -> if(it[4][0] == 'IP') { [it[3], it[1][0], it[1][1]] } else {[ it[3], it[1][1], it[1][0]]} }.set { peak_calling_channel_in } // peak calling using MACS3 Prend des bed ou des bam en entrée... - // peak_calling_bg() -} \ No newline at end of file + peak_calling(peak_calling_channel_in) +} + +/* input: + tuple val(file_id), path(bam_ip), path(bam_control) */ \ No newline at end of file diff --git a/src/config.yml b/src/config.yml index 2135abcdd0b58757e9df19aa98aa0807ff3a2d28..59759bfe1ea928b91112d126c3d27332dccbb4b5 100644 --- a/src/config.yml +++ b/src/config.yml @@ -1,16 +1,16 @@ -input: - # A row defines some features to describe a sample to analyse. - # You can add as many row as you want below each other. Be sure that - # the name of the row is the same as the file name witout extension. - # project name, +# A row defines some features to describe a sample to analyse. +# You can add as many row as you want below each other. Be sure that +# the name of the row is the same as the file name witout extension. - # boolean value to setup sequencing type (paired-end or single-end) - paired-end: FALSE +# project name, - # directory containing fastq files (rawdata) - fastq_folder: "" +# boolean value to setup sequencing type (paired-end or single-end) +paired-end: FALSE +# directory containing fastq files (rawdata) +fastq_folder: "" +samples: row1: # sample must be a string. It corresponds to the name of the sample sample: "5Y_siDDX_CTCF" @@ -28,6 +28,6 @@ input: type: "Input" - # Under construction: - # Organism (hg19, GRCH38, HBV...) default hg19 for FasterDB compatibility - # organism: "" \ No newline at end of file +# Under construction: +# Organism (hg19, GRCH38, HBV...) default hg19 for FasterDB compatibility +# organism: "" \ No newline at end of file diff --git a/src/nf_modules/deeptools/main.nf b/src/nf_modules/deeptools/main.nf index 8a4b793293f486133d3019c928b46ef8665b465b..aebc47a32aa277e3d600ac86de7138b793c38469 100644 --- a/src/nf_modules/deeptools/main.nf +++ b/src/nf_modules/deeptools/main.nf @@ -105,21 +105,25 @@ plotProfile -m ${matrix} \ """ } -params.chipseq_bam2BG = "" -params.chipseq_bam2BG_out = "" -process chipseq_bam2BG { +// Implement by Xavier Grand To ChIPseq Pipeline named chipser +// Genome size is defined, need to modify as parameter +// --effectiveGenomeSize in command line +params.chipseq_bam2BW = "" +params.chipseq_bam2BW_out = "" +params.genome_size = 2913022398 +process chipseq_bam2BW_chipster { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" - if (params.chipseq_bam2BG_out != "") { - publishDir "results/${params.chipseq_bam2BG_out}", mode: 'copy' + if (params.chipseq_bam2BW_out != "") { + publishDir "results/${params.chipseq_bam2BW_out}", mode: 'copy' } input: - tuple val(file_id), path(bam), path(idx) + tuple val(file_id), path(bam), path(idx), val(condition), val(type) output: - tuple val(file_id), path("*.bw"), emit: bw + tuple val(file_id), path("*.bw"), val(condition), val(type), emit: bw script: """ @@ -127,7 +131,7 @@ bamCoverage -p ${task.cpus} -b ${bam} \ --binSize 10 \ --ignoreDuplicates \ --extendReads 200 \ - --effectiveGenomeSize 2913022398 \ + --effectiveGenomeSize ${params.genome_size} \ -o ${bam.simpleName}.bw \ """ } \ No newline at end of file diff --git a/src/nf_modules/fastp/main.nf b/src/nf_modules/fastp/main.nf index 2593eefecab24037db7d80f213b83db29e0092b4..6f0f0c4df1ab59923248c0763e89b78ffe3ed3e0 100644 --- a/src/nf_modules/fastp/main.nf +++ b/src/nf_modules/fastp/main.nf @@ -79,6 +79,62 @@ process fastp_default { """ } + +process fastp_chipster { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_prefix" + if (params.fastp_out != "") { + publishDir "results/${params.fastp_out}", mode: 'copy' + } + + input: + path reads + val file_id + val condition + val type + + output: + tuple val(file_id), path("*_trim.fastq.gz"), val(condition), val(type), emit: fastq + tuple val(file_id), path("${file_prefix}.html"), emit: html + tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report + + script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + if (reads.size() == 2) + """ + fastp --thread ${task.cpus} \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --in1 ${reads[0]} \ + --in2 ${reads[1]} \ + --out1 ${file_prefix}_R1_trim.fastq.gz \ + --out2 ${file_prefix}_R2_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ + else + """ + fastp --thread ${task.cpus} \ + --qualified_quality_phred 20 \ + --disable_length_filtering \ + --detect_adapter_for_pe \ + ${params.fastp} \ + --in1 ${reads[0]} \ + --out1 ${file_prefix}_trim.fastq.gz \ + --html ${file_prefix}.html \ + --json ${file_prefix}_fastp.json \ + --report_title ${file_prefix} + """ +} + process fastp_accel_1splus { container = "${container_url}" label "big_mem_multi_cpus" diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf index 4228dc5f272a7a118ef36efe1f2562fdb5b4ce94..b5191fc4557651892a5c1939fd3eed1a6c679469 100644 --- a/src/nf_modules/samtools/main.nf +++ b/src/nf_modules/samtools/main.nf @@ -45,6 +45,27 @@ samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \ """ } + +process filter_bam_quality_chipster { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.filter_bam_quality_out != "") { + publishDir "results/${params.filter_bam_quality_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), val(condition), val(type) + + output: + tuple val(file_id), path("*_filtered.bam"), val(condition), val(type), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \ + ${bam.simpleName}_filtered.bam +""" +} + params.filter_bam = "" params.filter_bam_out = "" process filter_bam { @@ -133,6 +154,28 @@ samtools index ${params.index_bam} ${bam} """ } + +process index_bam_chipster { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.index_bam_out != "") { + publishDir "results/${params.index_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), val(condition), val(type) + + output: + tuple val(file_id), path("${bam}"), path("*.bam.bai"), val(condition), val(type), emit: bam_idx + + script: +""" +samtools index ${params.index_bam} ${bam} +""" +} + + params.sort_bam = "" params.sort_bam_out = "" process sort_bam { @@ -155,6 +198,27 @@ samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sor """ } + +process sort_bam_chipster { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + if (params.sort_bam_out != "") { + publishDir "results/${params.sort_bam_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(bam), val(condition), val(type) + + output: + tuple val(file_id), path("*.bam*"), val(condition), val(type), emit: bam + + script: +""" +samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sorted.bam ${bam} +""" +} + params.split_bam = "" params.split_bam_out = "" process split_bam { diff --git a/src/test.nf b/src/test.nf index be6c7083286ee4bcd8d048a875ff52b83131780b..34298508b98569fe986bd931154d16432244baf7 100644 --- a/src/test.nf +++ b/src/test.nf @@ -1,7 +1,7 @@ nextflow.enable.dsl=2 -Channel +/* Channel .from([[1, "fastq1.fq"], [2, "fastq2.fq"], [3, "fastq3.fq"], [4, "fastq4.fq"]]) .set{ fastq_files } @@ -13,4 +13,7 @@ Channel // fastq_files.join(sample_names).map{it -> [file(it[1]).baseName, it[1], it[2]]}.view() fastq_files.join(sample_names).set{ vals } -vals.combine(vals).filter { it -> (it[2] != it[5]) && (it[2] == "test") }.view() \ No newline at end of file +vals.combine(vals).filter { it -> (it[2] != it[5]) && (it[2] == "test") }.view() */ + + +println(params.genome) \ No newline at end of file