nextflow.enable.dsl=2
nextflow.preview.recursion=true

/*
Testing pipeline for marseq scRNASeq analysis
*/
params.kmer_size = 12
params.bootstrap = 10

include { SUBSAMPLE_READ } from "./modules/sample_reads"
include { SPLIT } from "./modules/split"
include { FASTKMERS } from "./modules/fastkmers"
include { MERGEKMER } from "./modules/mergekmer"
include { COLLATEKMER } from "./modules/mergekmer"
include { KMERCLUST_LOAD; KMERCLUST; KMERCLUST_PLOT; KMERCLUST_MERGE } from "./modules/kmerclust"


Channel.fromPath( file(params.csv) )
                    .splitCsv(header: true)
                    .multiMap{row ->
                        fastq_r1 = file("${row.r1}")
                        fastq_r2 = file("${row.r2}")
                        r1: [['id': fastq_r1.simpleName, 'specie': row.specie, 'sex': row.sex, 'read': 'r1'], fastq_r1]
                        r2: [['id': fastq_r2.simpleName, 'specie': row.specie, 'sex': row.sex, 'read': 'r2'], fastq_r2]
                    }
                    .set{ fastq }

Channel.fromPath(params.csv).set{params_csv}

workflow {
  SUBSAMPLE_READ(fastq.r1.mix(fastq.r2))
  SPLIT(SUBSAMPLE_READ.out.fastq)
  FASTKMERS(SPLIT.out.fastq.transpose())
  MERGEKMER(FASTKMERS.out.csv.groupTuple())
  COLLATEKMER(MERGEKMER.out.csv.map{it -> [it[0].specie, it[1]] }.groupTuple())
  KMERCLUST_LOAD(COLLATEKMER.out.csv, params_csv.collect())
/*
  KMERCLUST(KMERCLUST_LOAD.out.rdata)
  KMERCLUST_BOOT(Channel.of(1..params.bootstrap).combine(KMERCLUST_LOAD.out.rdata))
  KMERCLUST_MERGE(KMERCLUST.out.rdata.groupTuple())
  KMERCLUST_PLOT(KMERCLUST_MERGE.out.rdata.mix(KMERCLUST_LOAD.out.rdata).groupTuple())
*/
}