version = "2.1.0"
container_url = "xgrand/seqkit:${version}"

params.reversecomp_out = ""
process reversecomp {
  container = "${container_url}"
  label "small_mem_mono_cpus"
  tag "rev-comp"
  if (params.reversecomp_out != "") {
    publishDir "results/${params.reversecomp_out}", mode: 'copy'
  }
  
  input:
    val(adapt)

  output:
    path("adapt.fasta"), emit: adapt_fst
    path("adaptRC.fasta"), emit: adaptRC_fst

  script:
    """
    echo ">adapt" >> adapt.fasta
    echo ${adapt} >> adapt.fasta
    seqkit seq adapt.fasta -r -p -r -p -t DNA -v > adaptRC.fasta
    """
}

params.seqkit_grep_out = ""
process seqkit_grep {
  container = "${container_url}"
  label "small_mem_multi_cpus"
  tag "Filter_reads"
  if (params.seqkit_grep_out != "") {
    publishDir "results/${params.seqkit_grep_out}", mode: 'copy'
  }
  
  input:
    path(fastq)
    val(adapt)
    val(gsp)

  output:
    path("filtered_5RACE_GSP.fastq"), emit: filtered_fastq
    path("seq_stats.csv")
    path("*.txt")
    path("filtered_5RACE.fastq")

  script:
    lgadapt = Math.round(adapt.size().div(10))
    lggsp = Math.round(gsp.size().div(10))
    """
    echo "mismatch allowed to 5'RACE adapter:  ${lgadapt}" > mismatch.txt
    echo "mismatch allowed to Gene Specific primer:  ${lggsp}" >> mismatch.txt
    echo ${adapt} > adapt.txt
    echo ${gsp} > gsp.txt
    seqkit grep -i -f adapt.txt -m ${lgadapt} ${fastq} -o filtered_5RACE.fastq -j ${task.cpus}
    seqkit grep -i -f gsp.txt -m ${lggsp} filtered_5RACE.fastq -o filtered_5RACE_GSP.fastq -j ${task.cpus}
    seqkit stats ${fastq} -T -j ${task.cpus} > seq_stats.csv
    seqkit stats filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv
    seqkit stats filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv
    """
}

params.fastq_out = ""
process concatenate {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "Concatenate_reads"
  if (params.fastq_out != "") {
    publishDir "results/${params.fastq_out}", mode: 'copy'
  }

  input:
    path(fastq_dir)

  output:
    path "merged.fastq.gz", emit: merged_fastq

  script:
    """
    seqkit scat -j ${tasks.cpus} -f ${fastq_dir} --gz-only > merged.fastq
    gzip merged.fastq
    """
}