diff --git a/src/chipster.nf b/src/chipster.nf index 5df367d8e8d733bb81e7b4cdc80df48323733f3b..26a77d6fbdfb631eb1d2bcee236c78f0f5953294 100755 --- a/src/chipster.nf +++ b/src/chipster.nf @@ -4,10 +4,9 @@ nextflow.enable.dsl=2 /* ChIPseq pipeline : * Pipeline that analyses ChIPseq data - * Preprocessing, filtration, alignment, peak calling... + * Preprocessing, filtration, alignment, peak calling. */ - /* Params out */ params.fastp_out = "$params.project/fastp/" params.index_fasta_out = "$params.project/Indexed_genome/" @@ -29,12 +28,6 @@ params.chipseq_bam2BW_out = "$params.project/chipseq_BigGig" log.info "fastq folder : ${params.fastq_folder}" log.info "genome file : ${params.genome}" log.info "genome sizes : ${params.chrom_sizes}" -/* log.info "indexed genome file : ${params.idxgenome}" */ - -/* -log.info "paired-end data: ${params.paired_end}" -log.info "output folder results/${params.folder}" -*/ /* **************************************************************** @@ -75,43 +68,38 @@ if (params.paired_end) { } -/* -if (params.idx != "") { - Channel - .fromPath( params.idx ) - .ifEmpty { error "Cannot find idexed genome reference files matching: ${params.idx}" } - .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], [it] ]} - .set { index_fasta.out } -} -*/ - Channel .fromPath( params.genome ) .ifEmpty { error "Cannot find any files matching: ${params.genome}" } .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set { genome_file } -/* -Channel // IP & CTRL names - .from( params.ctrl ) - -Channel - .from( params.ip ) -*/ - -/* -if... -Channel - .fromPath( params.idxgenome ) - .set { genome_idx } -*/ - Channel .fromPath( params.chrom_sizes ) .ifEmpty { error "Cannot find any files matching: ${params.chrom_sizes}" } .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} .set{ genome_sizes } +/* +if (params.idx != "") { + Channel + .fromPath( "${params.genome}/*.index.*" ) + .ifEmpty { error "Cannot find idexed genome reference files matching: ${params.idx}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], [it] ]} + .set { index_fasta.out.index } + + + tuple val(file_id), path("*.index*"), emit: index +} +else { + Channel + .fromPath( params.genome ) + .ifEmpty { error "Cannot find any files matching: ${params.genome}" } + .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { genome_file } +} +*/ + /* **************************************************************** Imports @@ -140,9 +128,14 @@ include { peak_calling } from "./nf_modules/macs3/main.nf" workflow { + //########################## PREPROCESSING #################### + // fastp fastp_chipster(fastq_files, sample_names, condition_names, sample_types) + //########################## QUALITY CHECKS ################### + /* + // fastqc_rawdata fastqc_raw(fastq_files.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it] }) // fastqc_processed @@ -155,18 +148,35 @@ workflow { ).collect() ) + */ + + //############ GENOME INDEXATION AND MAPPING ################### + + // Genome indexation and Mapping in two steps, genome is indexed every run... + // index reference genome - index_fasta(genome_file) + // index_fasta(genome_file) + // mapping preprocessed reads - mapping_fastq_chipster(index_fasta.out.index.collect(), fastp_chipster.out.fastq) - - /*if (params.idxgenome == "") { + // mapping_fastq_chipster(index_fasta.out.index.collect(), fastp_chipster.out.fastq) + + // Implementation of indexed genome providing: + + if (! params.idx) { index_fasta(genome_file) - mapping_fastq(index_fasta.out.index.collect(), fastp_default.out.fastq) - } else { - mapping_fastq(genome_idx.collect(), fastp_default.out.fastq) + mapping_fastq_chipster(index_fasta.out.index.collect(), fastp_chipster.out.fastq) } - */ + else { + idx_genome = "${params.idx}*.bt2" + Channel + .fromPath( idx_genome ) + .ifEmpty { error "Cannot find idexed genome reference files" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it ]}.groupTuple().view() + .set { genome_indexed_input } + mapping_fastq_chipster(genome_indexed_input.collect(), fastp_chipster.out.fastq) + } + + //############ MAPPING FILTERING, INDEXING, SORTING ############## // filter bam - remove reads with quality <30 filter_bam_quality_chipster(mapping_fastq_chipster.out.bam) @@ -177,18 +187,17 @@ workflow { // samtools_index index_bam_chipster(sort_bam_chipster.out.bam) - // Create a bigwig file - // bam_to_bigwig(index_bam.out.bam_idx) + //########################### BIGWIGS ########################### // Chipseq Bam 2 bigwig file with reads extends chipseq_bam2BW_chipster(index_bam_chipster.out.bam_idx) + //######################## PEAK CALLING ######################### + // IP and Input samples identification and Channel creation index_bam_chipster.out.bam_idx.groupTuple(by: 3).set { combined_bams } combined_bams.map { it -> if(it[4][0] == 'IP') { [it[3], it[1][0], it[1][1]] } else {[ it[3], it[1][1], it[1][0]]} }.set { peak_calling_channel_in } + // peak calling using MACS3 Prend des bed ou des bam en entrée... peak_calling(peak_calling_channel_in) -} - -/* input: - tuple val(file_id), path(bam_ip), path(bam_control) */ \ No newline at end of file +} \ No newline at end of file diff --git a/src/test.nf b/src/test.nf index 34298508b98569fe986bd931154d16432244baf7..12d96adddd072df24ad5823c6abfc9342f0cdc32 100644 --- a/src/test.nf +++ b/src/test.nf @@ -1,5 +1,14 @@ nextflow.enable.dsl=2 +idx_genome = "${params.idx}*.bt2" + +Channel + .fromPath( idx_genome ) + .ifEmpty { error "Cannot find idexed genome reference files" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it ]}.groupTuple() + .set { genome_indexed_input } + +genome_indexed_input.view() /* Channel .from([[1, "fastq1.fq"], [2, "fastq2.fq"], [3, "fastq3.fq"], [4, "fastq4.fq"]]) @@ -16,4 +25,4 @@ fastq_files.join(sample_names).set{ vals } vals.combine(vals).filter { it -> (it[2] != it[5]) && (it[2] == "test") }.view() */ -println(params.genome) \ No newline at end of file +// println(params.genome) \ No newline at end of file