Commit 50014602 authored by Xavier Grand's avatar Xavier Grand
Browse files

modification complete avec Nicolas, intégration de tous les paramètre du...

modification complete avec Nicolas, intégration de tous les paramètre du fichier de config dans les channels
parent dbe9d679
......@@ -7,51 +7,6 @@ nextflow.enable.dsl=2
* Preprocessing, filtration, alignment, peak calling...
*/
/*
****************************************************************
parameters
****************************************************************
*/
params.paired_end = false
/* false for single end data, true for paired-end data
@type: Boolean
*/
params.fastq = "./data/tiny-delta-te-dataset/fastq_rnaseq/*.gz"
/* Fastq files
@type: Files
*/
params.genome = "./data/tiny-delta-te-dataset/synth.fasta"
/* A genome file
@type: File
*/
params.chrom_sizes = "./data/tiny-delta-te-dataset/chrom.sizes"
/* samtools generated genome.sizes file: samtools faidx synth.fasta and cut -f 1,2 synth.fasta.fai > chrom.sizes
@type: File
*/
// params.idx = ""
/* already indexed reference genome ? enter path...
@Type: String
*/
/* Parametres ctrl names & IP names
utiliser l'oppérateur .join ou .filter
/* Params project Name */
params.project = ""
/* Params from config file yaml: */
data = params.input.collect {k , v -> "${params.fastq}/${v.fastq.gz}"}
/* Params out */
params.fastp_out = "$params.project/fastp/"
......@@ -59,11 +14,11 @@ params.index_fasta_out = "$params.project/Indexed_genome/"
params.sort_bam_out = "$params.project/Bam_filtered_sorted/"
params.index_bam_out = "$params.project/Bam_filt_sort_indexed/"
params.bam_to_bigwig_out = "$params.project/BigWig/"
params.peak_calling_bg_out = "$params.project/Peak_calling/"
params.peak_calling_out = "$params.project/Peak_calling/"
params.bam_to_bed_out = "$params.project/Bed/"
params.bed_slop_out = "$params.project/Bed_sloped/"
params.bedGraph_out = "$params.project/BedGraph/"
params.chipseq_bam2BG_out = "$params.project/chipseq_BigGig"
params.chipseq_bam2BW_out = "$params.project/chipseq_BigGig"
/*
****************************************************************
......@@ -71,7 +26,7 @@ params.chipseq_bam2BG_out = "$params.project/chipseq_BigGig"
****************************************************************
*/
log.info "fastq files : ${params.fastq}"
log.info "fastq folder : ${params.fastq_folder}"
log.info "genome file : ${params.genome}"
log.info "genome sizes : ${params.chrom_sizes}"
/* log.info "indexed genome file : ${params.idxgenome}" */
......@@ -89,15 +44,35 @@ log.info "output folder results/${params.folder}"
/* Raw paired-end reads fastq */
if (params.paired_end) {
error "Not Implemented"
/*
Channel
.fromFilePairs( params.fastq, size: 2 ) //def une error
.set { fastq_files }
*/
} else {
Channel
.fromPath( params.input.collect { k, v -> "${params.fastq_folder}/${v.fastq}" })
.ifEmpty { error "No fastq file defined" }
.set { fastq_files }
Channel
.from( params.input.collect { k, v -> v.sample })
.ifEmpty { error "No sample names given" }
.set { sample_names }
Channel
.from( params.input.collect { k, v -> v.condition })
.ifEmpty { error "No condition defined" }
.set { condition_names }
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any files matching: ${params.fastq}" }
.map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], [it] ]}
.set{ fastq_files }
.from( params.input.collect { k, v -> v.type })
.ifEmpty { error "No sample types defined" }
.set { sample_types }
}
/*
......@@ -144,18 +119,18 @@ Channel
*/
fastqc_mod = "./nf_modules/fastqc/main.nf"
include { fastp_default } from "./nf_modules/fastp/main.nf"
include { fastp_chipster } from "./nf_modules/fastp/main.nf"
include { fastqc_fastq as fastqc_raw } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/01_fastqc_raw/")
include { fastqc_fastq as fastqc_preprocessed } from fastqc_mod addParams(fastqc_fastq_out: "$params.project/02_fastqc_preprocessed/")
include { multiqc } from './nf_modules/multiqc/main.nf' addParams(multiqc_out: "$params.project/QC/")
include { index_fasta } from "./nf_modules/bowtie2/main.nf"
include { mapping_fastq } from "./nf_modules/bowtie2/main.nf"
include { filter_bam_quality } from "./nf_modules/samtools/main.nf"
include { sort_bam } from "./nf_modules/samtools/main.nf"
include { index_bam } from "./nf_modules/samtools/main.nf"
include { mapping_fastq_chipster } from "./nf_modules/bowtie2/main.nf"
include { filter_bam_quality_chipster } from "./nf_modules/samtools/main.nf"
include { sort_bam_chipster } from "./nf_modules/samtools/main.nf"
include { index_bam_chipster } from "./nf_modules/samtools/main.nf"
include { bam_to_bigwig } from "./nf_modules/deeptools/main.nf"
include { chipseq_bam2BG } from "./nf_modules/deeptools/main.nf"
include { peak_calling_bg } from "./nf_modules/macs3/main.nf"
include { chipseq_bam2BW_chipster } from "./nf_modules/deeptools/main.nf"
include { peak_calling } from "./nf_modules/macs3/main.nf"
/*
****************************************************************
......@@ -166,12 +141,12 @@ include { peak_calling_bg } from "./nf_modules/macs3/main.nf"
workflow {
// fastp
fastp_default(fastq_files)
fastp_chipster(fastq_files, sample_names, condition_names, sample_types)
// fastqc_rawdata
fastqc_raw(fastq_files)
fastqc_raw(fastq_files.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it] })
// fastqc_processed
fastqc_preprocessed(fastp_default.out.fastq)
fastqc_preprocessed(fastp_chipster.out.fastq.map { it -> [it [0], it[1]]})
// multiqc
multiqc(
fastqc_raw.out.report
......@@ -183,7 +158,7 @@ workflow {
// index reference genome
index_fasta(genome_file)
// mapping preprocessed reads
mapping_fastq(index_fasta.out.index.collect(), fastp_default.out.fastq)
mapping_fastq_chipster(index_fasta.out.index.collect(), fastp_chipster.out.fastq)
/*if (params.idxgenome == "") {
index_fasta(genome_file)
......@@ -194,20 +169,26 @@ workflow {
*/
// filter bam - remove reads with quality <30
filter_bam_quality(mapping_fastq.out.bam)
filter_bam_quality_chipster(mapping_fastq_chipster.out.bam)
// samtools_sort
sort_bam(filter_bam_quality.out.bam)
sort_bam_chipster(filter_bam_quality_chipster.out.bam)
// samtools_index
index_bam(sort_bam.out.bam)
index_bam_chipster(sort_bam_chipster.out.bam)
// Create a bigwig file
// bam_to_bigwig(index_bam.out.bam_idx)
// Chipseq Bam 2 bigwig file with reads extends
chipseq_bam2BG(index_bam.out.bam_idx)
chipseq_bam2BW_chipster(index_bam_chipster.out.bam_idx)
index_bam_chipster.out.bam_idx.groupTuple(by: 3).set { combined_bams }
combined_bams.map { it -> if(it[4][0] == 'IP') { [it[3], it[1][0], it[1][1]] } else {[ it[3], it[1][1], it[1][0]]} }.set { peak_calling_channel_in }
// peak calling using MACS3 Prend des bed ou des bam en entrée...
// peak_calling_bg()
}
\ No newline at end of file
peak_calling(peak_calling_channel_in)
}
/* input:
tuple val(file_id), path(bam_ip), path(bam_control) */
\ No newline at end of file
input:
# A row defines some features to describe a sample to analyse.
# You can add as many row as you want below each other. Be sure that
# the name of the row is the same as the file name witout extension.
# project name,
# A row defines some features to describe a sample to analyse.
# You can add as many row as you want below each other. Be sure that
# the name of the row is the same as the file name witout extension.
# boolean value to setup sequencing type (paired-end or single-end)
paired-end: FALSE
# project name,
# directory containing fastq files (rawdata)
fastq_folder: ""
# boolean value to setup sequencing type (paired-end or single-end)
paired-end: FALSE
# directory containing fastq files (rawdata)
fastq_folder: ""
samples:
row1:
# sample must be a string. It corresponds to the name of the sample
sample: "5Y_siDDX_CTCF"
......@@ -28,6 +28,6 @@ input:
type: "Input"
# Under construction:
# Organism (hg19, GRCH38, HBV...) default hg19 for FasterDB compatibility
# organism: ""
\ No newline at end of file
# Under construction:
# Organism (hg19, GRCH38, HBV...) default hg19 for FasterDB compatibility
# organism: ""
\ No newline at end of file
......@@ -105,21 +105,25 @@ plotProfile -m ${matrix} \
"""
}
params.chipseq_bam2BG = ""
params.chipseq_bam2BG_out = ""
process chipseq_bam2BG {
// Implement by Xavier Grand To ChIPseq Pipeline named chipser
// Genome size is defined, need to modify as parameter
// --effectiveGenomeSize in command line
params.chipseq_bam2BW = ""
params.chipseq_bam2BW_out = ""
params.genome_size = 2913022398
process chipseq_bam2BW_chipster {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.chipseq_bam2BG_out != "") {
publishDir "results/${params.chipseq_bam2BG_out}", mode: 'copy'
if (params.chipseq_bam2BW_out != "") {
publishDir "results/${params.chipseq_bam2BW_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), path(idx)
tuple val(file_id), path(bam), path(idx), val(condition), val(type)
output:
tuple val(file_id), path("*.bw"), emit: bw
tuple val(file_id), path("*.bw"), val(condition), val(type), emit: bw
script:
"""
......@@ -127,7 +131,7 @@ bamCoverage -p ${task.cpus} -b ${bam} \
--binSize 10 \
--ignoreDuplicates \
--extendReads 200 \
--effectiveGenomeSize 2913022398 \
--effectiveGenomeSize ${params.genome_size} \
-o ${bam.simpleName}.bw \
"""
}
\ No newline at end of file
......@@ -79,6 +79,62 @@ process fastp_default {
"""
}
process fastp_chipster {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_prefix"
if (params.fastp_out != "") {
publishDir "results/${params.fastp_out}", mode: 'copy'
}
input:
path reads
val file_id
val condition
val type
output:
tuple val(file_id), path("*_trim.fastq.gz"), val(condition), val(type), emit: fastq
tuple val(file_id), path("${file_prefix}.html"), emit: html
tuple val(file_id), path("${file_prefix}_fastp.json"), emit: report
script:
if (file_id instanceof List){
file_prefix = file_id[0]
} else {
file_prefix = file_id
}
if (reads.size() == 2)
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--in1 ${reads[0]} \
--in2 ${reads[1]} \
--out1 ${file_prefix}_R1_trim.fastq.gz \
--out2 ${file_prefix}_R2_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
else
"""
fastp --thread ${task.cpus} \
--qualified_quality_phred 20 \
--disable_length_filtering \
--detect_adapter_for_pe \
${params.fastp} \
--in1 ${reads[0]} \
--out1 ${file_prefix}_trim.fastq.gz \
--html ${file_prefix}.html \
--json ${file_prefix}_fastp.json \
--report_title ${file_prefix}
"""
}
process fastp_accel_1splus {
container = "${container_url}"
label "big_mem_multi_cpus"
......
......@@ -45,6 +45,27 @@ samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \
"""
}
process filter_bam_quality_chipster {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.filter_bam_quality_out != "") {
publishDir "results/${params.filter_bam_quality_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), val(condition), val(type)
output:
tuple val(file_id), path("*_filtered.bam"), val(condition), val(type), emit: bam
script:
"""
samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \
${bam.simpleName}_filtered.bam
"""
}
params.filter_bam = ""
params.filter_bam_out = ""
process filter_bam {
......@@ -133,6 +154,28 @@ samtools index ${params.index_bam} ${bam}
"""
}
process index_bam_chipster {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.index_bam_out != "") {
publishDir "results/${params.index_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), val(condition), val(type)
output:
tuple val(file_id), path("${bam}"), path("*.bam.bai"), val(condition), val(type), emit: bam_idx
script:
"""
samtools index ${params.index_bam} ${bam}
"""
}
params.sort_bam = ""
params.sort_bam_out = ""
process sort_bam {
......@@ -155,6 +198,27 @@ samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sor
"""
}
process sort_bam_chipster {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "$file_id"
if (params.sort_bam_out != "") {
publishDir "results/${params.sort_bam_out}", mode: 'copy'
}
input:
tuple val(file_id), path(bam), val(condition), val(type)
output:
tuple val(file_id), path("*.bam*"), val(condition), val(type), emit: bam
script:
"""
samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sorted.bam ${bam}
"""
}
params.split_bam = ""
params.split_bam_out = ""
process split_bam {
......
nextflow.enable.dsl=2
Channel
/* Channel
.from([[1, "fastq1.fq"], [2, "fastq2.fq"], [3, "fastq3.fq"], [4, "fastq4.fq"]])
.set{ fastq_files }
......@@ -13,4 +13,7 @@ Channel
// fastq_files.join(sample_names).map{it -> [file(it[1]).baseName, it[1], it[2]]}.view()
fastq_files.join(sample_names).set{ vals }
vals.combine(vals).filter { it -> (it[2] != it[5]) && (it[2] == "test") }.view()
\ No newline at end of file
vals.combine(vals).filter { it -> (it[2] != it[5]) && (it[2] == "test") }.view() */
println(params.genome)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment