Skip to content
Snippets Groups Projects
Select Git revision
  • 231fd9e6a361a8095f07f32ce8c42b69f9409f8f
  • master default protected
  • dev
  • v2.0.0
  • v0.4.0
  • v0.3.0
  • v0.2.9
  • v0.2.8
  • v0.2.7
  • v0.2.6
  • v0.1.0
  • v0.2.5
  • v0.2.4
  • v0.2.3
  • v0.2.2
  • v0.2.1
  • v0.2.0
  • v0.1.2
18 results

indexing.config

Blame
  • Forked from LBMC / nextflow
    Source project has a limited visibility.
    RNAseq.nf 4.81 KiB
    /*
    *	RNAseq Analysis pipeline
    */
    
    params.input = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
    
    Channel
       .fromFilePairs(params.input)
       .ifEmpty { error "Cannot find any file matching: ${params.input}" }
       .set {input_channel}
    
    /* Trimming by quality */
    
    process trimming {
      tag "$file_id"
      cpus 4
      publishDir "results/RNAseq/01_cutadapt/", mode: 'copy'
      echo true
    
      input:
      set file_id, file(reads) from input_channel
    
      output:
      set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut
      file "*.txt" into rapport_UrQt
    
      script:
      """
      cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \
      -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_tmp_R2.fastq.gz \
      ${reads[0]} ${reads[1]} > ${pair_id}_report.txt
    
      cutadapt -u -14 -o ${pair_id}_cut_R2.fastq.gz ${pair_id}_tmp_R2.fastq.gz \
      > ${pair_id}_cut_report.txt
      """
    }
    
    
    /* rRNA and tRNA filtering */
    
    params.indexrRNA = "/Xnfs/lbmcdb/Ricci_team/shared_data/genomes/human_rRNA_tRNA/*.bt2"
    
    log.info "index files : ${params.indexrRNA}"
    
    Channel
      .fromPath( params.indexrRNA )
      .ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
      .set { rRNA_index_files }
    
    process rRNA_removal {
      tag "$file_id"
      cpus 8
      publishDir "results/RNAseq/U937/02_rRNA_depletion/", mode: 'copy'
    
      input:
      set file_id, file(reads) from fastq_files_cut
      file index from rRNA_index_files.toList()
    
      output:
      set file_id, "*.fastq.gz" into rRNA_removed_files
      file "*.txt" into bowtie_report
    
      script:
    """
    bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \
    -1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \
    ${file_id}_bowtie2_report.txt > /dev/null
    
    if grep -q "Error " ${file_id}_bowtie2_report.txt; then
      exit 1
    fi
    """
    }
    
    /*	mapping against human genome with hisat2 */
    
    params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
    
    log.info "index : ${params.index_hg38}"
    
    
    Channel
      .fromPath ( params.index_hg38 )
      .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
      .set { index_file_hg38 }--paired
    
    process hisat2_human {
      tag "$file_id"
    
      input:
        set file_id, file(fastq_filtred) from rRNA_removed_reads
        file index from index_file_hg38.toList()
    
      output:
        set file_id, "*.fastq.gz" into reads_non_aligned_hg38
        set file_id, "*.bam" into reads_aligned_hg38
        file "*.txt" into hisat_report
    
      script:
    """
    hisat2 -x genome_tran -p ${task.cpus} \
    -1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \
    --un-conc-gz ${file_id}_notaligned_hg38_R%.fastq.gz \
    --rna-strandness 'F' \
    2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam
    
    """
    }
    
    /*                   sorting                             */
    
    process index_bam {
      tag "$file_id"
      publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy'
    
      input:
        set file_id, file(bam) from reads_aligned_hg38
    
      output:
        set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
    
      script:
    """
    samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
    samtools index ${file_id}_sorted.bam
    """
    }
    
    sorted_bam_files.into{for_dedup;for_htseq}
    
    /*                   deduplicating reads
    
    params.dedup_options = "--paired"
    
    process dedup {
      tag "$file_id"
    
      input:
      set file_id, file(bam) from for_dedup
    
      output:
      set file_id, "*dedup.bam" into dedup_bam
      file "*.txt" into dedup_report
    
      script:
    """
    umi_tools dedup -I ${bam[0]} \
                    ${params.dedup_options} \
                    -S ${file_id}_dedup.bam > report.txt
    """
    }
    
    process sort_bam {
      tag "$file_id"
      publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy'
    
      input:
        set file_id, file(bam) from dedup_bam
        file dedup from dedup_report
    
      output:
        set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2
        file "*.txt" into report_dedup
    
      script:
    """
    samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
    samtools index ${file_id}_sorted.bam
    cat ${dedup} > ${file_id}_dedup_report.txt
    """
    } */
    
    /*                   HTseq                            */
    
    params.gtf = "$baseDir/data/annotation/*.gtf"
    log.info "gtf files : ${params.gtf}"
    
    Channel
      .fromPath( params.gtf )
      .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
      .set { gtf_file }
    
    process counting {
      tag "$file_id"
      publishDir "${params.output}/04_HTseq/", mode: 'copy'
    
      input:
      set file_id, file(bam) from for_htseq
      file gtf from gtf_file.toList()
    
      output:
      file "*.count" into count_files
    
      script:
    """
    htseq-count ${bam[0]} ${gtf} \
                --mode=intersection-nonempty \
                -a 10 \
                -s yes \
                -t CDS \
                -i gene_id \
                -r pos \
                -f bam \
    > ${file_id}_CDS.count
    
    htseq-count ${bam[0]} ${gtf} \
                --mode=intersection-nonempty \
                -a 10 \
                -s yes \
                -t exon \
                -i gene_id \
                -r pos \
                -f bam \
    > ${file_id}_exon.count
    
    """
    }