Compare revisions

Hakan Ozadam · Hakan Ozadam · Hakan Ozadam · Hakan Ozadam · Hakan Ozadam · Hakan Ozadam
--- a/.gitignore
+++ b/.gitignore
+*.class
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+# Package Files #
+*.jar
+*.war
+*.ear
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+# NextFlow Specific
+.nextflow.log*
+.nextflow
+# Docker Specific
+*apt.list
+*conda.list
 nextflow
 .nextflow.log*
 .nextflow/

--- a/LICENSE
+++ b/LICENSE
+Riboflow License 
+MIT License
+Copyright (c) 2019 Ribosome Profiling
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+Nexflow License : 
  CeCILL FREE SOFTWARE LICENSE AGREEMENT
@@ -516,3 +541,4 @@ that may arise during the performance of the Agreement.
 occurrence, and unless emergency proceedings are necessary, the
 disagreements or disputes shall be referred to the Paris Courts having
 jurisdiction, by the more diligent Party.
+>>>>>>> 3efbbb5e44ac834cd71303a89aa5b1f7f5e225eb
--- a/VERSION
+++ b/VERSION
+version = '0.0.0'
--- a/results/.gitignore
+++ b/results/.gitignore
-*
--- a/src/FLOSS_score/analysis.sh
+++ b/src/FLOSS_score/analysis.sh
+7_#!/bin/bash
+#
+### variables SGE
+### shell du job
+#$ -S /bin/bash
+### nom du job
+#$ -N FLOSS_U937_all_merge
+### file d'attente:
+#$ -q M6142deb*
+### parallel environment & nb cpu (NSLOTS)
+#$ -pe openmp32 32
+### charger l'environnement utilisateur pour SGE
+#$ -cwd
+### exporte les variables d'environnement sur les noeuds d'exécution
+#$ -V
+### mails en debut et fin d'execution
+#$ -m be
+# init env (should be in ~/.profile)
+#source /usr/share/lmod/lmod/init/bash
+#source ~/.bashrc
+### configurer l'environnement
+#cd ${SGE_O_WORKIR}
+# nombre de processeurs
+#CPU=${NSLOTS}
+### Merge bam files from NY5 and hg38 alignements
+#samtools view -H /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/03_hisat2_hg38/U937_CHX_all_merged_hg38_sorted.mapped.bam > tmp.sam
+#samtools view -H /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/06_hisat2_NY5/U937_CHX_all.bam >> tmp.sam
+#samtools view /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/03_hisat2_hg38/U937_CHX_all_merged_hg38_sorted.mapped.bam >> tmp.sam
+#samtools view /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/06_hisat2_NY5/U937_CHX_all.bam >> tmp.sam
+#samtools view -Sb tmp.sam > /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5.bam
+#samtools sort -o /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5_sort.bam /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5.bam
+#samtools index /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5_sort.bam
+#rm tmp.sam /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5.bam
+### Calculating FLOSS scores
+R --no-save < ldist-calc-template.R
+# FLOSS scores needed to pick lincs in ldist plots
+#R --no-save < floss-calc-mesc.R ####### ---> this script is now merged in the first one
+R --no-save < ldist-plots-template.R
+R --no-save < floss-plots-template.R
--- a/src/FLOSS_score/floss-plots-template.R
+++ b/src/FLOSS_score/floss-plots-template.R
+library("biomaRt")
+library("GenomicRanges")
+library("zoo")
+source("./src/FLOSS_score/footprint-assignment.R")
+source("./src/FLOSS_score/floss.R")
+source("./src/FLOSS_score/mouse-annotation.R")
+source("./src/FLOSS_score/plot-utils.R")
+Floss <- read.csv("results/chx-floss.txt", check.names=FALSE, row.names=1)
+row.names(Floss) <- gsub("\\.[[:digit:]]*", "", row.names(Floss))
+xref <- match(row.names(Floss), annots$ensembl_transcript_id)
+table(is.na(xref))
+trxs <- cbind(Floss, annots[xref,])
+trxs$name <- row.names(trxs)
+pcoding   = trxs[trxs$name %in% codingIds & !(trxs$name %in% mitoIds) & !(trxs$name %in% dirtyPCodingIds),]
+mitoch    = trxs[trxs$name %in% codingIds &  (trxs$name %in% mitoIds),]
+lincs     = trxs[trxs$name %in% lincIds ,]
+snolincs  = trxs[trxs$name %in% snohgIds  &   trxs$transcript_biotype == "lincRNA",]
+miscs     = trxs[trxs$name %in% miscIds,]
+Cutoffs <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
+predx <- seq(1,6,0.1)
+plotBase <- function(title) {
+  plot(log10(pcoding$cdsNRead), pcoding$cdsScore, xlim=c(0,6), ylim=c(0,1),
+       pch=".", col=solarGrey01,
+       axes=F, xlab="Total Reads", ylab="Fragment Length Distribution Score", cex.lab=1.33, main = title)
+  axis(1, log10(10**seq(0,6,1)), labels=c("1", "10", "100", "1k", "10k", "100k", "1M"), cex.axis=1)
+  axis(1, log10(c(sapply(10**seq(0,5,1), minor_decade))), labels=F, tcl=-0.5)
+  axis(2, seq(0,1,0.2))
+}
+plotDrug <- function() {
+  plotBase("Mito CDS, mics and lincs")
+  points(log10(mitoch$cdsNRead), mitoch$cdsScore,
+         pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
+  points(log10(miscs$trxNRead), miscs$trxScore,
+         pch=21, cex=0.75, col=solarYellow, bg=lighten(solarYellow))
+  text(log10(miscs$trxNRead), miscs$trxScore,
+       labels=miscs$external_gene_id, col=solarYellow, pos=4 )
+  points(log10(lincs$trxNRead), lincs$trxScore,
+         pch=21, cex=0.75, col=solarMagenta, bg=lighten(solarMagenta))
+  plotBase("5'UTR of protein coding mRNA")
+  points(log10(pcoding$utr5NRead), pcoding$utr5Score
+         , pch=21, cex=0.25, col=solarRed, bg=lighten(solarRed))
+}
+pdf("results/10_FLOSS_score/hg38_NY5-floss-rplot.pdf", useDingbats=F)
+plotDrug()
+plotBase("5'UTR")
+lines(predx, predict(Cutoffs$extremeLoess, predx))
+pcoding$classifyUtr5 <- Cutoffs$classify(pcoding$utr5NRead, pcoding$utr5Score)
+utr5Good <- pcoding[pcoding$classifyUtr5 != "Extreme" & !is.na(pcoding$classifyUtr5),]
+utr5Bad <- pcoding[pcoding$classifyUtr5 == "Extreme" & !is.na(pcoding$classifyUtr5),]
+points(log10(utr5Good$utr5NRead), utr5Good$utr5Score
+       , pch=21, cex=0.25, col=solarRed, bg=lighten(solarRed))
+points(log10(utr5Bad$utr5NRead), utr5Bad$utr5Score
+       , pch=21, cex=0.25, col=solarCyan, bg=lighten(solarCyan))
+plotBase("lincs")
+lines(predx, predict(Cutoffs$extremeLoess, predx))
+lincs$classify <- Cutoffs$classify(lincs$trxNRead, lincs$trxScore)
+lincsGood <- lincs[lincs$classify != "Extreme" & !is.na(lincs$classify),]
+lincsBad <- lincs[lincs$classify == "Extreme" & !is.na(lincs$classify),]
+points(log10(lincsGood$trxNRead), lincsGood$trxScore,
+       pch=21, cex=0.25, col=solarMagenta, bg=lighten(solarMagenta))
+points(log10(lincsBad$trxNRead), lincsBad$trxScore,
+       pch=21, cex=0.25, col=solarGreen, bg=lighten(solarGreen))
+dev.off()
+pcoding$classifyCds <- Cutoffs$classify(pcoding$cdsNRead, pcoding$cdsScore)
+table(pcoding[pcoding$cdsNRead > 100, "classifyCds"])
+sum(pcoding[pcoding$cdsNRead > 100, "classifyCds"] != "Extreme", na.rm=TRUE) / sum(!is.na(pcoding[pcoding$cdsNRead > 100, "classifyCds"]))
+table(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"])
+sum(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"] != "Extreme", na.rm=TRUE) / sum(!is.na(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"]))
+table(lincs[lincs$trxNRead > 100, "classify"])
+sum(lincs[lincs$trxNRead > 100, "classify"] != "Extreme", na.rm=TRUE) / sum(!is.na(lincs[lincs$trxNRead > 100, "classify"]))
+dir.create("results/tables", showWarnings = FALSE)
+write.csv(pcoding[,c("cdsNRead", "cdsScore", "classifyCds")],
+          "results/tables/floss-refcoding-cds.txt")
+write.csv(pcoding[,c("utr5NRead", "utr5Score", "classifyUtr5")],
+          "results/tables/floss-refcoding-utr5.txt")
+write.csv(lincs[,c("trxNRead", "trxScore", "classify")],
+          "results/tables/floss-lincs.txt")
+write.csv(mitoch[,c("cdsNRead", "cdsScore")],
+          "results/tables/floss-mitoch.txt")
+write.csv(miscs[,c("trxNRead", "trxScore")],
+          "results/tables/floss-miscs.txt")
+write.csv(lincs[,c("ensembl_gene_id", "trxNRead", "trxScore", "classify", "external_gene_name", "gene_biotype", "transcript_biotype", "chromosome_name", "start_position", "end_position", "strand", "description")],
+          "results/tables/floss-lincs-annotated.txt")
--- a/src/FLOSS_score/floss.R
+++ b/src/FLOSS_score/floss.R
+library("zoo")
+## Length range to consider: 26 through 34 nucleotides
+defaultLengthMin <- 26
+defaultLengthMax <- 34
+## Length counts for a GAlignments
+alignLengths <- function(galns, lengthMin=defaultLengthMin, lengthMax=defaultLengthMax) {
+    lmax = 1 + lengthMax - lengthMin
+    ld <- tabulate(pmax(0, pmin(lmax, qwidth(galns) - (lengthMin - 1))), nbins=lmax)
+    names(ld) <- as.character(seq(lengthMin, lengthMax))
+    ld
+}
+normLDist <- function(ld) { ld/sum(ld) }
+## L1 norm length distribution difference between two length counts
+ldistDiff <- function(d1, d2) { sum(abs(colSums(rbind(d1/sum(d1), - d2/sum(d2))))) / 2.0 }
+trxRegionLengthDists <- function(bamfile, trxRgns, lengthMin=defaultLengthMin, lengthMax=defaultLengthMax) {
+  alns <- getAlignments(bamfile, trxRgns$trx)
+  regionLengthDist <- function(rgn) {
+      alignLengths(alns[queryHits(findOverlaps(alns, rgn, type="within"))], lengthMin, lengthMax)
+  }
+  lapply( trxRgns, regionLengthDist )
+}
+calcTrxRegionLengthDists <- function(bamfile, trxBed, verbose=NA,
+                                     lengthMin=defaultLengthMin, lengthMax=defaultLengthMax, ...) {
+    ttl <- length(trxBed)
+    calcOne <- function(i) {
+        if (!is.na(verbose) && i %% verbose == 0) { cat(sprintf("%d\t%d\t%s\t%s\n", i, ttl, as.character(trxBed$name[i]), path(bamfile))) }
+        trxGR <- transcriptGRanges(trxBed[i])
+        cdsIR <- transcriptCdsIRanges(trxGR, start(trxBed$thick)[[i]], end(trxBed$thick)[[i]])
+        trxRegionLengthDists(bamfile, trxRegions(trxGR, cdsIR, ...), lengthMin, lengthMax)
+    }
+    ld <- mclapply(seq(1,ttl), calcOne)
+    names(ld) <- trxBed$name
+    ld
+}
+sliceRegionLengthDists <- function(trxLDist, region) {
+    df <- do.call(rbind.data.frame, lapply(trxLDist, function(r) { r[[region]] }))
+    colnames(df) <- names(trxLDist[[1]][[region]])
+    j=1
+    for (i in 1:length(names(trxLDist))){
+      if ( !is.null(trxLDist[[i]][[region]])) {
+        rownames(df)[j] = names(trxLDist)[i]
+        j=j+1
+      }
+    }
+    df
+}
+writeRegionLengthDists <- function(trxLDist, pfx) {
+    lapply(c("trx", "cds", "utr5", "utr3"),
+           function(rgn) { rld <- sliceRegionLengthDists(trxLDist, rgn)
+                           write.csv(rld, sprintf("%s-%s-ldist.txt", pfx, rgn), row.names=T)
+                           sum(rld)
+                       } )
+}
+readLengthDists <- function( pfx ) {
+    read.csv(sprintf("%s-ldist.txt", pfx), row.names=1)
+}
+readRegionLengthDists <- function( pfx ) {
+    list( trx = readLengthDists(sprintf("%s-trx", pfx)),
+          cds = readLengthDists(sprintf("%s-cds", pfx)),
+          utr5 = readLengthDists(sprintf("%s-utr5", pfx)),
+          utr3 = readLengthDists(sprintf("%s-utr3", pfx)) )
+}
+flossLengthDist <- function( ldists, ldistRef ) {
+    floss <- apply(ldists, 1, function(l) { ldistDiff(l, ldistRef) })
+    nread <- apply(ldists, 1, function(l) { sum(l) })
+    ldists$Score <- floss
+    ldists$NRead <- nread
+    ldists
+}
+flossRegionLengthDists <- function( rgnLDists, ldistRef) {
+    list( trx = flossLengthDist( rgnLDists$trx, ldistRef ),
+          cds = flossLengthDist( rgnLDists$cds, ldistRef ),
+          utr5 = flossLengthDist( rgnLDists$utr5, ldistRef ),
+          utr3 = flossLengthDist( rgnLDists$utr3, ldistRef ) )
+}
+flossRegionScores <- function( rgnFloss ) {
+    floss <- data.frame( trxNRead = rgnFloss$trx$NRead,
+                         trxScore = rgnFloss$trx$Score,
+                         row.names = row.names(rgnFloss$trx) )
+    floss[row.names(rgnFloss$cds),"cdsNRead"] <- rgnFloss$cds$NRead
+    floss[row.names(rgnFloss$cds),"cdsScore"] <- rgnFloss$cds$Score
+    floss[row.names(rgnFloss$utr5),"utr5NRead"] <- rgnFloss$utr5$NRead
+    floss[row.names(rgnFloss$utr5),"utr5Score"] <- rgnFloss$utr5$Score
+    floss[row.names(rgnFloss$utr3),"utr3NRead"] <- rgnFloss$utr3$NRead
+    floss[row.names(rgnFloss$utr3),"utr3Score"] <- rgnFloss$utr3$Score
+    floss
+}
+flossCutoffs <- function(nreads, score) {
+  rollwidth <- 200
+  unsorted <- data.frame(nreads=nreads, score=score)
+  fr <- unsorted[order(unsorted$nreads),]
+  nr <- rollapply(fr$nreads, width=rollwidth, function(xs) { mean(xs, na.rm=T) })
+  medsc <- rollapply(fr$score, width=rollwidth, function(xs) { median(xs, na.rm=T) })
+  q1 <- rollapply(fr$score, width=rollwidth, function(xs) { quantile(xs, probs=0.25, na.rm=T) })
+  q3 <- rollapply(fr$score, width=rollwidth, function(xs) { quantile(xs, probs=0.75, na.rm=T) })
+  rawExtreme <- q3 + 3.0*(q3-q1)
+  extLoess <- loess(extreme ~ nread, data.frame(nread=log10(nr), extreme=rawExtreme))
+  classify <- function(nr, sc) {
+    extCutoff <- predict(extLoess, log10(nr))
+    factor(
+      if (is.na(extCutoff)) {
+        NA
+      } else if (sc > extCutoff) {
+        "Extreme"      
+      } else {
+        "Good"
+      },
+      levels=c("Good", "Extreme")
+      )
+  }
+  list( l10nreads = log10(nr),
+       scoreMedian = medsc, scoreQ3 = q3, scoreExtreme = rawExtreme,
+       extremeLoess = extLoess,
+       isExtreme = function(nr, sc) { sc > predict(extLoess, log10(nr)) },
+       classify = Vectorize(classify)
+       )
+}
--- a/src/FLOSS_score/footprint-assignment.R
+++ b/src/FLOSS_score/footprint-assignment.R
+library("GenomicRanges")
+library("Rsamtools")
+library("GenomicAlignments")
+## Sample asiteOffests
+## asites <- data.frame(asite=c(14,14,14,14,15,15), row.names=seq(26,31))
+## asite
+##  26    14
+##  27    14
+##  28    14
+##  29    14
+##  30    15
+##  31    15
+# Using a data frame of A site offsets, convert a GAlignments of footprint
+# alignments into a GRanges of A site nucleotides still in genomic coordinates.
+alignASites <- function(asiteOffsets, alns) {
+    alnLengths <- qwidth(alns)
+    alnAOffs <- asiteOffsets[as.character(alnLengths), "asite"]
+    ## e.g., 30mer runs 101 to 130 and offset is +14
+    ##   (+) strand, A site is 115
+    ##     start = 15 = offset + 1
+    ##   (-) strand, A site is 116
+    ##     offset from start is 30 - 14 = 16
+    alnDelta <- ifelse(strand(alns) == "-", alnLengths - alnAOffs, alnAOffs + 1)
+    alnsGood <- alns[!is.na(alnDelta)]
+    alnDelta <- alnDelta[!is.na(alnDelta)]
+    if (length(alnsGood) > 0) {
+        qnarrow(alnsGood, start = alnDelta, width = 1)
+    } else {
+        GRanges()
+    }
+}
+getAlignASites <- function(asiteOffsets, bamfile, trx) {
+  alignASites(asiteOffsets, getAlignments(bamfile, trx))
+}
+getASiteProfile <- function(asiteOffsets, bamfile, trx) {
+  asites <- getAlignASites(asiteOffsets, bamfile, trx)
+  if (length(asites) > 0) { 
+    tabulate( relativeWithin(start(asites), trx),
+             nbins = sum(width(trx)) )
+  } else {
+    tabulate( as.integer(c()), nbins = sum(width(trx)) )
+  }
+}
+# Insets in nucleotides for avoiding start and stop
+defaultInsets <- list( utr5Inset3 = 6, cdsInset5 = 45, cdsInset3 = 15, utr3Inset5 = 6 )
+zeroInsets    <- list( utr5Inset3 = 0, cdsInset5 =  0, cdsInset3 =  0, utr3Inset5 = 0 )
+trxRegionCountSizes <- function(insets, trx, cds) {
+    sizes <- list( trx = sum(width(trx)) )
+    if (length(cds) == 1) {
+        cdsQStart <- start(cds) + insets$cdsInset5
+        cdsQEnd <- end(cds) - insets$cdsInset3
+        sizes$cds <- if (cdsQEnd < cdsQStart) { NA } else { 1 + cdsQEnd - cdsQStart }
+        utr5QEnd <- start(cds) - (insets$utr5Inset3 + 1)
+        sizes$utr5 <- if (utr5QEnd < 0) { NA } else { 1 + utr5QEnd }
+        utr3QStart <- end(cds) + (insets$utr3Inset5 + 1)
+        sizes$utr3 <- if (utr3QStart >= sum(width(trx))) { NA } else { 1 + sum(width(trx)) - utr3QStart }
+    } else {
+        sizes$cds <- NA
+        sizes$utr5 <- NA
+        sizes$utr3 <- NA
+    }
+    sizes
+}
+trxRegionCountAligns <- function(asiteOffsets, insets, bamfile, trx, cds) {
+    genomicAsites <- getAlignASites(asiteOffsets, bamfile, trx)
+    if (length(genomicAsites) > 0) {
+        asites <- relativeWithin(start(genomicAsites), trx)
+        counts <- list( trx = sum(!is.na(asites)) )
+        if ( length(cds) == 1 ) {
+            cdsQStart <- start(cds) + insets$cdsInset5
+            cdsQEnd <- end(cds) - insets$cdsInset3
+            counts$cds <- if (cdsQEnd < cdsQStart) { NA } else { sum(asites >= cdsQStart & asites <= cdsQEnd, na.rm=TRUE) }
+            utr5QEnd <- start(cds) - (insets$utr5Inset3 + 1)
+            counts$utr5 <- if (utr5QEnd < 0) { NA } else { sum(asites <= utr5QEnd, na.rm=TRUE) }
+            utr3QStart <- end(cds) + (insets$utr3Inset5 + 1)
+            counts$utr3 <- if (utr3QStart >= sum(width(trx))) { NA } else { sum(asites >= utr3QStart, na.rm=TRUE) }
+        } else {
+            counts$cds <- NA
+            counts$utr5 <- NA
+            counts$utr3 <- NA           
+        }
+    } else {
+        counts <- list( trx = 0 )
+        if ( length(cds) == 1) {
+            counts$cds <- 0
+            counts$utr5 <- 0
+            counts$utr3 <- 0
+        } else {
+            counts$cds <- NA
+            counts$utr5 <- NA
+            counts$utr3 <- NA
+        }
+    }
+    counts
+}
+countAligns <- function(asiteOffsets, insets, bamfile, trxBed) {
+    ttl <- length(trxBed)
+    countOne <- function(i) {
+        if (i %% 100 == 0) { print(c(i, ttl, as.character(trxBed$name[i]))) }
+        trxGR <- transcriptGRanges(trxBed[i])
+        cdsIR <- transcriptCdsIRanges(trxGR, start(trxBed$thick)[[i]], end(trxBed$thick)[[i]])        
+        trxRegionCountAligns(asiteOffsets, insets, bamfile, trxGR, cdsIR)
+    }
+    counts <- mclapply(seq(1,ttl), countOne)
+    names(counts) <- trxBed$name
+    counts
+}
+countSizes <- function(insets, trxBed) {
+    countOne <- function(i) {
+        trxGR <- transcriptGRanges(trxBed[i])
+        cdsIR <- transcriptCdsIRanges(trxGR, start(trxBed$thick)[[i]], end(trxBed$thick)[[i]])        
+        trxRegionCountSizes(insets, trxGR, cdsIR)
+    }    
+    sizes <- mclapply(seq(1,length(trxBed)), countOne)
+    names(sizes) <- trxBed$name
+    sizes
+}
+regionCountFrame <- function(counts) {
+    data.frame(trx  = unlist(lapply(counts, function(r) { r$trx })),
+               cds  = unlist(lapply(counts, function(r) { r$cds })),
+               utr5 = unlist(lapply(counts, function(r) { r$utr5 })),
+               utr3 = unlist(lapply(counts, function(r) { r$utr3 })),
+               row.names = names(counts))
+}
--- a/src/FLOSS_score/human-annotation.R
+++ b/src/FLOSS_score/human-annotation.R
+library("Rsamtools")
+library("biomaRt")
+ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="uswest.ensembl.org")
+annots <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype",
+                  "chromosome_name", "start_position", "end_position", "strand", "description", "hgnc_symbol"),
+                mart=ensembl)
+colnames(annots)[length(colnames(annots))] <- "external_gene_name"
+granges <- GRanges(seqnames = annots$chromosome_name,
+                   ranges=IRanges(start = annots$start_position, end = annots$end_position),
+                   strand = ifelse(annots$strand < 0, "-", "+"))
+snornaGR <- granges[annots$transcript_biotype == "snoRNA",]
+snohgGenes <- annots[queryHits(findOverlaps(granges, snornaGR, select="all")),"ensembl_gene_id"]
+snohgIds <- annots[annots$ensembl_gene_id %in% snohgGenes,"ensembl_transcript_id"]
+snrnaGR <- granges[annots$transcript_biotype == "snRNA",]
+snhgGenes <- annots[queryHits(findOverlaps(granges, snrnaGR, select="all")),"ensembl_gene_id"]
+snhgIds <- annots[annots$ensembl_gene_id %in% snhgGenes,"ensembl_transcript_id"]
+miscGenes <- annots[annots$transcript_biotype == "misc_RNA", "ensembl_gene_id"]
+miscIds <- annots[annots$ensembl_gene_id %in% miscGenes ,"ensembl_transcript_id"] # & !grepl("^Gm", annots$external_gene_id)
+nonpcodingGR <- granges[annots$gene_biotype != "protein_coding"]
+pcodingAnnots <- annots[annots$gene_biotype == "protein_coding",]
+pcodingGR <- granges[annots$gene_biotype == "protein_coding"]
+pcodingGR <- resize(pcodingGR, width=(width(pcodingGR) + 10000), fix="center")
+dirtyPCodingGenes <- pcodingAnnots[queryHits(findOverlaps(pcodingGR, nonpcodingGR, select="all", ignore.strand=T)),"ensembl_gene_id"]
+dirtyPCodingIds <- annots[annots$ensembl_gene_id %in% dirtyPCodingGenes,"ensembl_transcript_id"]
+codingIds <- annots[annots$transcript_biotype == "protein_coding","ensembl_transcript_id"]
+mitoIds <- annots[annots$chromosome_name == "MT","ensembl_transcript_id"]
+lincIds <- annots[annots$gene_biotype == "lincRNA","ensembl_transcript_id"]
+refCodingIds  <- codingIds[!(codingIds %in% mitoIds) & !(codingIds %in% dirtyPCodingIds)]
+mitoCodingIds <- codingIds[codingIds %in% mitoIds]
+cleanLincIds  <- lincIds[!(lincIds %in% snohgIds | lincIds %in% snhgIds | lincIds %in% miscIds)]
+snohgLincIds  <- lincIds[lincIds %in% snohgIds]
--- a/src/FLOSS_score/ldist-calc-template.R
+++ b/src/FLOSS_score/ldist-calc-template.R
+library("rtracklayer")
+library("Rsamtools")
+library("parallel")
+library("biomaRt")
+library("GenomicRanges")
+source("./src/FLOSS_score/transcript-utils.R")
+source("./src/FLOSS_score/footprint-assignment.R")
+source("./src/FLOSS_score/floss.R")
+source("./src/FLOSS_score/mouse-annotation.R")
+options(mc.cores=8)
+# Annotation file which in bed format with no reduncance in transcripts
+BedFile <- "data/wgEncodeGencodeBasicVM24_chr.bed"
+## Bamfiles need to be sorted and indexed
+chxBamFile <- BamFile("data/RibosomeProfiling_sort.bam")
+aoffsets <- data.frame(asite=c(11,11,11,11,12,12), row.names=seq(26,31)) # offset determined by Ribocode software
+# for Samples
+Bed <- import(BedFile, format="BED")
+Bed$name <- gsub("\\.[[:digit:]]","" ,Bed$name ) # remove version of transcript
+Size <- regionCountFrame(countSizes(defaultInsets, Bed))
+write.csv(Size, "results/sizes.txt", row.names=TRUE)
+Chx <- regionCountFrame(countAligns(asiteOffsets = aoffsets, insets = defaultInsets, bamfile = chxBamFile, trxBed = Bed))
+write.csv(Chx, "results/Chx.txt", row.names=TRUE)
+chxLDists <- calcTrxRegionLengthDists(bamfile = chxBamFile, trxBed = Bed)
+writeRegionLengthDists(chxLDists, "results/chxLDists")
+###########
+chxLDists <- readRegionLengthDists("results/chxLDists")
+chxRef <- colSums(chxLDists$cds[refCodingIds,], na.rm = TRUE)
+write.csv(chxRef, "results/chx-ref-ldist.txt", row.names = TRUE)
+chxLDFloss <- flossRegionLengthDists( chxLDists, chxRef )
+chxFloss <- flossRegionScores( chxLDFloss )
+write.csv(chxFloss, "results/chx-floss.txt", row.names=TRUE)
--- a/src/FLOSS_score/ldist-plots-template.R
+++ b/src/FLOSS_score/ldist-plots-template.R
+library("biomaRt")
+library("GenomicRanges")
+library("boot")
+source("./src/FLOSS_score/transcript-utils.R")
+source("./src/FLOSS_score/footprint-assignment.R")
+source("./src/FLOSS_score/floss.R")
+source("./src/FLOSS_score/plot-utils.R")
+source("./src/FLOSS_score/mouse-annotation.R")
+cdist <- function(ldists) { normLDist(colSums(ldists, na.rm=TRUE)) }
+lengthLevels <- seq(defaultLengthMin, defaultLengthMax)
+RefLDist <- normLDist(read.csv("results/chx-ref-ldist.txt", check.names=F, row.names=1)[,1])
+chxLDists <- readRegionLengthDists("results/chxLDists")
+rownames(chxLDists$cds) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$cds) )
+rownames(chxLDists$utr5) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$utr5) )
+rownames(chxLDists$utr3) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$utr3) )
+rownames(chxLDists$trx) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$trx) )
+LDistCdses <- chxLDists$cds[rownames(chxLDists$cds) %in% refCodingIds,]
+LDistUtr5s <- chxLDists$utr5[row.names(chxLDists$utr5) %in% refCodingIds,]
+LDistLincs <- chxLDists$trx[row.names(chxLDists$trx) %in% cleanLincIds,]
+LDistCdses <- chxLDists$cds[row.names(chxLDists$cds) %in% mitoCodingIds,]
+CdsDist <- cdist(LDistCdses)
+pdf("./results/examples-rplot.pdf")
+plot(lengthLevels, CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
+     xlim=c(26,34), ylim=c(0,0.6), xlab="Fragment Length", main = "LincRNA")
+lines(lengthLevels, cdist(LDistLincs), type="l", col=solarMagenta, lwd=2)
+plot(lengthLevels, CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
+     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "5' UTR")
+lines(lengthLevels, cdist(LDistUtr5s), type="l", col=solarRed, lwd=2)
+dev.off()
+# Write data tables
+# write.csv(hg38_NY5RefCodingCdses,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-cds.txt")
+# write.csv(hg38_NY5Terc,            "results/10_FLOSS_score/tables/ldist-hg38_NY5-terc-trx.txt")
+# write.csv(hg38_NY5Vault,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-vault-trx.txt")
+# write.csv(hg38_NY5MitoCdses,       "results/10_FLOSS_score/tables/ldist-hg38_NY5-mito-cds.txt")
+# write.csv(hg38_NY5Snhg5,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-snhg5-trx.txt")
+# write.csv(Lincs,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-linc-trx.txt")
+# write.csv(hg38_NY5RefCodingUtr5s,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-utr5.txt")
+# write.csv(hg38_NY5HIVCDS,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-cds.txt")
+# write.csv(hg38_NY5HIVLTR,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-ltr.txt")
+# 
+# write.csv(rbind(refCodingCdses  = cdist(hg38_NY5RefCodingCdses),
+#                 cleanLincs      = cdist(hg38_NY5Lincs),
+#                 refCodingUtr5s  = cdist(hg38_NY5RefCodingUtr5s),
+#                 terc            = cdist(hg38_NY5Terc),
+#                 vault           = cdist(hg38_NY5Vault),
+#                 snhg5           = cdist(hg38_NY5Snhg5),
+#                 hivCds          = cdist(hg38_NY5HIVCDS),
+#                 hivLtr          = cdist(hg38_NY5HIVLTR)),
+#           "results/FLOSS_score/tables/ldist-hg38_NY5.txt")
--- a/src/FLOSS_score/mouse-annotation.R
+++ b/src/FLOSS_score/mouse-annotation.R
+library("Rsamtools")
+library("biomaRt")
+ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="mmusculus_gene_ensembl", host="uswest.ensembl.org")
+annots <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype",
+                "chromosome_name", "start_position", "end_position", "strand", "description", "external_gene_name"),
+                mart=ensembl)
+granges <- GRanges(seqnames = annots$chromosome_name,
+                   ranges=IRanges(start = annots$start_position, end = annots$end_position),
+                   strand = ifelse(annots$strand < 0, "-", "+"))
+snornaGR <- granges[annots$gene_biotype == "snoRNA",]
+snohgGenes <- annots[queryHits(findOverlaps(granges, snornaGR, select="all")),"ensembl_gene_id"]
+snohgIds <- annots[annots$ensembl_gene_id %in% snohgGenes,"ensembl_transcript_id"]
+nonlincGR <- granges[annots$gene_biotype != "lincRNA",]
+lincAnnots <- annots[annots$gene_biotype == "lincRNA",]
+lincGR <- granges[annots$gene_biotype == "lincRNA",]
+lincGR <- resize(lincGR, width=(width(lincGR) + 10000), fix="center")
+dirtyLincGenes <- lincAnnots[queryHits(findOverlaps(lincGR, nonlincGR, select="all", ignore.strand=T)),"ensembl_gene_id"]
+dirtyLincIds <- annots[annots$ensembl_gene_id %in% dirtyLincGenes,"ensembl_transcript_id"]
+nonpcodingGR <- granges[annots$gene_biotype != "protein_coding"]
+pcodingAnnots <- annots[annots$gene_biotype == "protein_coding",]
+pcodingGR <- granges[annots$gene_biotype == "protein_coding"]
+pcodingGR <- resize(pcodingGR, width=(width(pcodingGR) + 10000), fix="center")
+dirtyPCodingGenes <- pcodingAnnots[queryHits(findOverlaps(pcodingGR, nonpcodingGR, select="all", ignore.strand=T)),"ensembl_gene_id"]
+dirtyPCodingIds <- annots[annots$ensembl_gene_id %in% dirtyPCodingGenes,"ensembl_transcript_id"]
+codingIds <- annots[annots$transcript_biotype == "protein_coding","ensembl_transcript_id"]
+mitoIds <- annots[annots$chromosome_name == "MT","ensembl_transcript_id"]
+lincIds <- annots[annots$gene_biotype == "lincRNA","ensembl_transcript_id"]
+miscIds <- annots[annots$transcript_biotype == "misc_RNA" & !grepl("^Gm", annots$external_gene_id),"ensembl_transcript_id"]
+refCodingIds <- codingIds[!(codingIds %in% mitoIds) & !(codingIds %in% dirtyPCodingIds)]
+mitoCodingIds <- codingIds[codingIds %in% mitoIds]
+cleanLincIds <- lincIds[!(lincIds %in% dirtyLincIds)]
--- a/src/FLOSS_score/plot-utils.R
+++ b/src/FLOSS_score/plot-utils.R
+solarGrey01 <- rgb(88,110,117,maxColorValue=255)
+solarBlack <- rgb(0, 43, 54, maxColorValue=255)
+solarMagenta <- rgb(211,54,130,maxColorValue=255)
+solarRed <- rgb(220,50,47,maxColorValue=255)
+solarOrange <- rgb(203, 75, 22, maxColorValue=255)
+solarYellow <- rgb(181,137,0,maxColorValue=255)
+solarGreen <- rgb(133,153,0,maxColorValue=255)
+solarCyan <- rgb(42,161,152,maxColorValue=255)
+solarBlue <- rgb(38,139,210,maxColorValue=255)
+solarViolet <- rgb(108,113,196,maxColorValue=255)
+lighten <- function(corig,lfact=0.6) {
+  crgb <- col2rgb(corig)
+  clight <- 255 - ((255 - crgb) * lfact)
+  rgb(clight["red",], clight["green",], clight["blue",], maxColorValue=255)
+}
+minor_decade <- function(d) { seq(2*d, 9*d, d) }
--- a/src/FLOSS_score/transcript-utils.R
+++ b/src/FLOSS_score/transcript-utils.R
+library("IRanges")
+library("GenomicRanges")
+library("Rsamtools")
+# Convert an absolute genomic query position (qpos) to a
+# transcript-relative coordinate position within a transcript whose
+# coordinates are given by a GRanges of exons (outer).  NOTE that the
+# exons in outer are in ascending numeric order regardless of the
+# strand.
+relativeWithin <- function(qpos, outer) {
+  outerOffsets <- start(outer)
+  withinOffsets <- head(c(0, cumsum(width(outer))), -1)
+  startHits <- findOverlaps(IRanges(start=qpos, width=1), ranges(outer), select="first")
+  within <- (qpos - outerOffsets[startHits]) + withinOffsets[startHits]
+  if (as.vector(strand(outer))[1] == "-") {
+    within <- (sum(width(outer)) - 1) - within
+  }
+  within
+}
+# Convert a transcript-relative query position (qpos) to an absolute
+# genomic coordinate based on a transcript whose coordinates are given
+# by a GRanges of exons (outer). NOTE that the exons are
+# in ascending numeric order regardless of the strand.
+absoluteOuter <- function(qpos, outer) {
+  outerOffsets <- start(outer)
+  withinOffsets <- head(c(0, cumsum(width(outer))), -1)
+  inner <- IRanges(start=withinOffsets, width=width(outer))
+  if (as.vector(strand(outer))[1] == "-") {
+    qpos <- (sum(width(outer)) - 1) - qpos
+  }
+  startHits <- findOverlaps(IRanges(start=qpos, width=1), inner, select="first")
+  (qpos - withinOffsets[startHits]) + outerOffsets[startHits]
+}
+# Convert transcript-relative query IRanges (qranges) to an absolute
+# genomic GRanges based on a transcript whose coordinate are given by
+# a GRanges of exons (outer).
+irangeOuter <- function(qranges, outer) {
+    starts <- absoluteOuter(start(qranges), outer)
+    ends <- absoluteOuter(end(qranges), outer)
+    if (as.vector(strand(outer))[1] == "-") {
+        masks <- GRanges( seqnames = as.vector(seqnames(outer))[[1]],
+                         ranges = IRanges( start = ends, end = starts ),
+                         strand = "-" )
+    } else {
+        masks <- GRanges( seqnames = as.vector(seqnames(outer))[[1]],
+                         ranges = IRanges( start = starts, end = ends ),
+                         strand = "+" )        
+    }
+    intersect(masks, outer)
+}
+# relativeWithin(qpos) = relativeWithin(absoluteOuter(relativeWithin(qpos)))
+# absoluteOuter(qpos) = absoluteOuter(relativeWithin(absoluteOuter(qpos)))
+# relativeWithin(qpos) and absoluteOuter(qpos) may be NA.
+# bedGRange should be a 1-element GRanges slice.
+# The metadata columns should include a "blocks" entry as from an rtracklayer BED file.
+# A GRanges object is returned with one range per exon.
+# NOTE that the exons are in ascending numeric order regardless of the transcript strand.
+transcriptGRanges <- function(bedGRange) {
+    blocks <- mcols(bedGRange)$blocks[[1]]
+    exons <- shift(blocks, shift = start(bedGRange)[[1]] - 1)
+    sn <- seqnames(bedGRange)[1]
+    runLength(sn) <- length(exons)
+    st <- strand(bedGRange)[1]
+    runLength(st) <- length(exons)
+    GRanges(seqnames = sn,
+            ranges = exons,
+            strand = st)
+}
+# trx is a transcript GRanges wiht one range per exon, and thickStart
+# & thickEnd are the genomic start and end of the coding sequence,
+# e.g. taken from start(...) and end(...) on the $thick IRanges from a
+# bed file.
+# Return an IRanges for the CDS in transcript-local coordinates.
+transcriptCdsIRanges <- function(trx, thickStart, thickEnd) {
+    startWithin <- relativeWithin(thickStart, trx)
+    endWithin <- relativeWithin(thickEnd, trx)
+    if (is.na(startWithin) || is.na(endWithin)) {
+        IRanges()
+    } else {
+        IRanges( start = min( startWithin, endWithin ), end = max( startWithin, endWithin ) )
+    }
+}
+# For each transcript in a BED file and the associated transcript
+# GRanges as computed by bedGRangesList, find the transcript-local CDS
+# as per transcriptCdsIRanges and collect these as an IRangesList.
+bedCdsIRangesList <- function(bed, bedgrl) {
+    irl <- IRangesList(mclapply(1:length(bed), function(i) { transcriptCdsIRanges(bedgrl[[i]], start(bed$thick)[[i]], end(bed$thick)[[i]] ) } ))
+    names(irl) <- bed$name
+    irl
+}
+# For each transcript in a BED file GRanges, get the GRanges of its
+# exons as per transcriptGRanges and collec these into a GRangesList
+# with names taken from the name metadata column of the BED file
+# GRanges.
+bedGRangesList <- function(bed) {
+    grl <- GRangesList(mclapply(1:length(bed), function(i) { transcriptGRanges(bed[i]) }))
+    names(grl) <- bed$name
+    grl
+}
+# Calculate a GRangesList with named GRanges giving genomic
+# coordinates of transcript regions: trx for the entire transcript,
+# cds for the coding sequence, utr5 for the 5' UTR, and utr3 for the
+# 3' UTR.
+trxRegions <- function(trx, cds,
+                       insetUtr5Start=0, insetUtr5End=0,
+                       insetCdsStart=0,  insetCdsEnd=0,
+                       insetUtr3Start=0, insetUtr3End=0) {
+    rgns <- list( trx = trx )
+    if (length(cds) == 1) {
+        utr5Start <- insetUtr5Start
+        utr5End <- (start(cds) - 1) - insetUtr5End
+        cdsStart <- start(cds) + insetCdsStart
+        cdsEnd <- end(cds) - insetCdsEnd
+        utr3Start <- (end(cds) + 1) + insetUtr3Start
+        utr3End <- (sum(width(trx)) - 1) - insetUtr3End
+        if (cdsStart < cdsEnd) {
+            rgns$cds <- irangeOuter( IRanges( start = cdsStart, end = cdsEnd ), trx )
+        }
+        if (utr5Start < utr5End) {
+            rgns$utr5 <- irangeOuter( IRanges( start = utr5Start, end = utr5End ), trx )
+        }
+        if (utr3Start < utr3End) {
+            rgns$utr3 <- irangeOuter( IRanges( start = utr3Start, end = utr3End ), trx )
+        }
+    }
+    rgns
+}
+# GAlignments containing all reads overlapping the genomic extent of a
+#  primary transcript on the correct strand, including unspliced and
+#  purely intronic reads.
+getAllAlignments <- function(bamfile, trx) {
+    range <- GRanges(seqnames = seqnames(trx)[1],
+                     ranges = IRanges(start = min(start(trx)), end = max(end(trx))))
+    isminus <- (as.vector(strand(trx))[1] == "-")
+    readGAlignments(bamfile, param=ScanBamParam(which = range, flag=scanBamFlag(isMinusStrand = isminus)))
+}
+# GAlignments containing all reads compatible with a processed
+#  transcript on the correct strand, as per findSpliceOverlaps.
+getAlignments <- function(bamfile, trx) {
+    alns <- getAllAlignments(bamfile, trx)
+    spliceHits <- findSpliceOverlaps(alns, GRangesList(trx))
+    compatibles <- spliceHits[mcols(spliceHits)$compatible]
+    alns[queryHits(compatibles)]
+}
+# DNAString representing the transcript strand sequence of 
+getSequence <- function(fafile, trx) {
+    gseq <- unlist(scanFa(fafile, trx))
+    if (as.vector(strand(trx))[1] == "-") {
+        reverseComplement(gseq)
+    } else {
+        gseq
+    }
+}
--- a/src/PASseq.config
+++ b/src/PASseq.config
+profiles {
+  sge {
+    process{
+      withName: trimming {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/cutadapt_2.1"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
+      }
+      withName: rRNA_removal {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+      withName: hisat2_human {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "hisat2/2.1.0:samtools/1.7"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        memory = "20GB"
+        cpus = 16
+        time = "12h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+      withName: bam_to_bigwig {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "deeptools/3.0.2"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+    }
+  }
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: adaptor_removal {
+        container = "lbmc/cutadapt:2.1"
+        cpus = 1
+      }
+      withName: rRNA_removal {
+        container = "lbmc/bowtie2:2.3.4.1"
+        cpus = 4
+      }
+      withName: hisat2_human {
+        cpus = 4
+        container = "lbmc/hisat2:2.1.0"
+      }
+      withName: index_bam {
+        container = "lbmc/samtools:1.7"
+        cpus = 4
+      }
+      withName: bam_to_bigwig {
+        container = "lbmc/deeptools:3.0.2"
+        cpus = 4
+      }
+      withName: counting {
+        container = "lbmc/htseq:0.11.2"
+        cpus = 1
+      }
+    }
+  }
+}
--- a/src/PASseq.nf
+++ b/src/PASseq.nf
+/*
+*	RibosomeProfiling Analysis pipeline
+*/
+/* Trimming */
+params.output = "results"
+params.fastq_raw = "${params.output}/00_demultiplexing/*.fastq.gz"
+Channel
+  .fromPath( params.fastq_raw )
+  .ifEmpty { error "Cannot find any files matching: ${params.fastq_raw}" }
+  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
+  .set { fastq_raw_flow }
+process trimming {
+  tag "$file_id"
+  input:
+  set file_id, file(fastq_raw) from fastq_raw_flow
+  output:
+  set file_id, "*_cut.fastq.gz" into fastq_trim_filt
+  file "*.txt" into log_trim
+  script:
+  """
+  cutadapt -a AAAAAAAAAAAAAAAAAAAAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG -m 15\
+  -u 2 -o ${file_id}_cut.fastq.gz \
+  ${fastq_raw} > ${file_id}_report.txt
+  """
+}
+/* rRNA and tRNA filtering */
+params.indexrRNA = "results/human_rRNA_tRNA/*.bt2"
+log.info "index files rRNA : ${params.indexrRNA}"
+Channel
+  .fromPath( params.indexrRNA )
+  .ifEmpty { error "Cannot find any index files matching: ${params.indexrRNA}" }
+  .set { rRNA_index_files }
+process rRNA_removal {
+  tag "$file_id"
+  publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
+  input:
+  set file_id, file(reads) from fastq_trim_filt
+  file index from rRNA_index_files.toList()
+  output:
+  set file_id, "*.fastq.gz" into rRNA_removed_reads
+  file "*.txt" into bowtie_report
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
+-U - --un-gz ${file_id}_mRNA.fastq.gz 2> \
+${file_id}_bowtie2_report.txt > /dev/null
+if grep -q "Error " ${file_id}_bowtie2_report.txt; then
+  exit 1
+fi
+"""
+}
+/*	mapping against human genome with hisat2 */
+params.index_hg38 = "/media/adminmanu/Stockage/HISAT2_index_hg38_tran/*.ht2"
+log.info "index : ${params.index_hg38}"
+Channel
+  .fromPath ( params.index_hg38 )
+  .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" }
+  .set { index_file_hg38 }
+process hisat2_human {
+  tag "$file_id"
+  input:
+    set file_id, file(fastq_filtred) from rRNA_removed_reads
+    file index from index_file_hg38.toList()
+  output:
+    set file_id, "*.fastq.gz" into reads_non_aligned_hg38
+    set file_id, "*.bam" into reads_aligned_hg38
+    file "*.txt" into hisat_report
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index_id} -p ${task.cpus} \
+-U ${fastq_filtred} --un-gz ${file_id}_notaligned.fastq.gz \
+--end-to-end  --rna-strandness 'F' \
+2> ${file_id}_hisat2.txt | samtools view -bS -F 4 -o ${file_id}.bam
+"""
+}
+/*                   sorting                             */
+process index_bam {
+  tag "$file_id"
+  publishDir "${params.output}/03_mapping/", mode: 'copy'
+  input:
+    set file_id, file(bam) from reads_aligned_hg38
+    file report from hisat_report
+  output:
+    set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
+    file "*.log" into hisat_report_bis
+  script:
+"""
+samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
+samtools index ${file_id}_sorted.bam
+cat ${report} > ${file_id}.log
+"""
+}
+sorted_bam_files.into{for_htseq}
+/*                   HTseq                            */
+params.gtf = "$baseDir/data/annotation/*.gtf"
+log.info "gtf files : ${params.gtf}"
+Channel
+  .fromPath( params.gtf )
+  .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
+  .set { gtf_file }
+process counting {
+  tag "$file_id"
+  publishDir "${params.output}/04_HTseq/", mode: 'copy'
+  input:
+  set file_id, file(bam) from for_htseq
+  file gtf from gtf_file.toList()
+  output:
+  file "*.count" into count_files
+  script:
+"""
+htseq-count ${bam[0]} ${gtf} \
+            --mode=union \
+            -a 10 \
+            -s yes \
+            -t exon \
+            -i gene_id \
+            -f bam \
+> ${file_id}_exon.count
+"""
+}
--- a/src/RNAseq.config
+++ b/src/RNAseq.config
+profiles {
+  psmn {
+    singularity.enabled = true
+    singularity.cacheDir = "$baseDir/.singularity_psmn/"
+    singularity.runOptions = "--bind /Xnfs,/scratch"
+    process{
+      withName: fastqc_raw {
+        container = "lbmc/fastqc:0.11.5"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 8
+        penv = 'openmp8'
+        memory = "20GB"
+        time = "12h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: trimming {
+          container = "lbmc/cutadapt:2.1"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: fastqc_cut {
+          container = "lbmc/fastqc:0.11.5"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 8
+          penv = 'openmp8'
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: rRNA_removal {
+          container = "lbmc/bowtie2:2.3.4.1"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+  	      queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+  	      penv = 'openmp8'
+  	      cpus = 8
+      }
+      withName: fastqc_filter {
+          container = "lbmc/fastqc:0.11.5"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 8
+          penv = 'openmp8'
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: hisat2_genome {
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 16
+  	  penv = 'openmp16'
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: calc_scalingFactor{
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 1
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: coverage {
+          container = "lbmc/deeptools:3.0.2"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 16
+  	  penv = 'openmp16'
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: calc_scalingFactor_with_postgenome{
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 1
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: coverage_with_postgenome {
+          container = "lbmc/deeptools:3.0.2"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 16
+  	  penv = 'openmp16'
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: fastqc_genome {
+          container = "lbmc/fastqc:0.11.5"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 8
+          penv = 'openmp8'
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: dedup_genome {
+          container = "lbmc/hisat2:2.1.0"
+          beforeScript = "source ~/.bashrc"
+ 	  executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: hisat2_postgenome {
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          memory = "20GB"
+          cpus = 16
+  	  penv = 'openmp16'
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: dedup_postgenome {
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: sort_bam {
+          container = "lbmc/hisat2:2.1.0"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 8
+          penv = 'openmp8'
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: counting {
+          container = "lbmc/htseq:0.11.2"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: multiqc {
+          container = "ewels/multiqc:1.9"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: rnaseq_qc {
+          module = "RNAseq-QC/2.3.6"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 1
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+      withName: fastqc_postgenome {
+          container = "lbmc/fastqc:0.11.5"
+          executor = "sge"
+          clusterOptions = "-cwd -V"
+          cpus = 8
+          penv = 'openmp8'
+          memory = "20GB"
+          time = "12h"
+          queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C'
+      }
+    }
+  }
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: fastqc_raw {
+        container = "lbmc/fastqc:0.11.5"
+        cpus = 4
+      }
+      withName: fastqc_cut {
+        container = "lbmc/fastqc:0.11.5"
+        cpus = 4
+      }
+      withName: fastqc_filter {
+        container = "lbmc/fastqc:0.11.5"
+        cpus = 4
+      }
+      withName: trimming {
+        container = "lbmc/cutadapt:2.1"
+        cpus = 4
+      }
+      withName: rRNA_removal {
+        container = "lbmc/bowtie2:2.3.4.1"
+        cpus = 8
+      }
+      withName: hisat2_genome {
+        cpus = 8
+        container = "lbmc/hisat2:2.1.0"
+      }
+      withName: fastqc_genome {
+        container = "lbmc/fastqc:0.11.5"
+        cpus = 4
+      }
+      withName: dedup_genome {
+        container = "lbmc/hisat2:2.1.0"
+        cpus = 1
+      }
+      withName: sort_bam {
+        container = "lbmc/samtools:1.7"
+        cpus = 1
+      }
+      withName: counting {
+        container = "lbmc/htseq:0.11.2"
+        cpus = 1
+      }
+      withName: rnaseq_qc {
+        container = "gcr.io/broad-cga-aarong-gtex/rnaseqc:latest"
+        cpus = 1
+      }
+      withName: hisat2_postgenome {
+        cpus = 4
+        container = "lbmc/hisat2:2.1.0"
+      }
+      withName: dedup_postgenome {
+        container = "lbmc/hisat2:2.1.0"
+        cpus = 1
+      }
+      withName: fastqc_postgenome {
+        container = "lbmc/fastqc:0.11.5"
+        cpus = 4
+      }
+      withName: multiqc {
+        container = "lbmc/multiqc:1.7"
+        cpus = 1
+      }
+      withName: coverage {
+        container = "lbmc/deeptools:3.0.2"
+        cpus = 8
+      }
+    }
+  }
+}
--- a/src/RNAseq.nf
+++ b/src/RNAseq.nf
+/*
+*	RNAseq Analysis pipeline
+*/
+//////////////////////////////////////////////////////
+//                    PARAMETERS                    //
+//////////////////////////////////////////////////////
+///////////////////////
+// PARMS : FILE PATH //
+///////////////////////
+params.fastq_raw = "data/fastq/*{_R1,_R2}.fastq.gz"
+params.output = "results"
+params.filter = "data/filter/human_rRNA_tRNA/*.bt2"
+params.index_genome = "data/genome/*.ht2"
+params.gtf = "data/annotation/*.gtf"
+params.gtf_collapse = "data/annotation/*.gtf"
+params.index_postgenome = "data/post_genome/*.ht2"
+/////////////////////
+// PARMS : OPTIONS //
+/////////////////////
+params.do_fastqc = true
+params.do_dedup = true
+params.do_postgenome = true
+///////////////////////
+// LIBRARIES OPTIONS //
+///////////////////////
+params.adaptorR1 = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
+params.adaptorR2 = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
+params.strand = "FR"
+//////////////
+// LOG INFO //
+//////////////
+log.info "input raw : ${params.fastq_raw}"
+log.info "outut directory : ${params.output}"
+log.info "filter index files : ${params.filter}"
+log.info "genome index : ${params.index_genome}"
+log.info "gtf file : ${params.gtf}"
+log.info "collapsed gtf file for rnaseqc: ${params.gtf_collapse}"
+log.info "post-genome index : ${params.index_postgenome}"
+log.info ""
+log.info "do fastqc ? : ${params.do_fastqc}"
+log.info "do deduplication ? : ${params.do_dedup}"
+log.info "do post genome alignement ? : ${params.do_postgenome}"
+log.info ""
+//////////////
+// CHANNELS //
+//////////////
+Channel
+   .fromFilePairs(params.fastq_raw)
+   .ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" }
+   .into {INPUT_FASTQC_RAW;
+          INPUT_CUTADAPT}
+Channel
+   .fromPath( params.filter )
+   .ifEmpty { error "Cannot find any index files matching: ${params.filter}" }
+   .set { FILTER_INDEX }
+Channel
+   .fromPath ( params.index_genome )
+   .ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" }
+   .set { GENOME_INDEX }
+Channel
+   .fromPath( params.gtf )
+   .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
+   .set { GTF_FILE }
+Channel
+  .fromPath( params.gtf_collapse )
+  .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf_collapse}" }
+  .set { GTF_COLLAPSE }
+Channel
+   .fromPath ( params.index_postgenome )
+   .ifEmpty { error "Cannot find any index files matching: ${params.index_postgenome}" }
+   .set { POSTGENOME_INDEX }
+//////////////////////////////////////////////////////
+//                     PROCESS                      //
+//////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// PRE PROCESS ///////////////////////////////
+////////////////////////////////////////////////////////////////////////
+/////////////////////////
+/* Fastqc of raw input */
+/////////////////////////
+process fastqc_raw {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/raw/", mode: 'copy'
+  input:
+  set file_id, file(reads) from INPUT_FASTQC_RAW
+  output:
+  file "*_fastqc.{html,zip}" into OUTPUT_FASTQC_RAW
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+///////////////////////
+/* Trimming adaptors */
+///////////////////////
+process trimming {
+  tag "$file_id"
+//  publishDir "${params.output}/01_cutadapt/", mode: 'copy'
+  echo true
+  input:
+  set file_id, file(reads) from INPUT_CUTADAPT
+  output:
+  set file_id, "*cut_{R1,R2}.fastq.gz" into CUTADAPT_OUTPUT
+  file "*first_report.txt" into CUTADAPT_LOG
+//  file "*{second,third}_report.txt" into CUTADAPT_LOG_2
+  script:
+  """
+  cutadapt -j ${task.cpus} \
+           -a ${params.adaptorR1} \
+           -A ${params.adaptorR2} \
+           -u -60 \
+           -U -60 \
+           -o ${file_id}_cut_R1.fastq.gz \
+           -p ${file_id}_cut_R2.fastq.gz \
+           --minimum-length 50 \
+           ${reads[0]} ${reads[1]} > ${file_id}_first_report.txt
+#  cutadapt -j ${task.cpus} \
+#           -a ${params.adaptorR1} \
+#           -A ${params.adaptorR2} \
+#           -o ${file_id}_tmp_R1.fastq.gz \
+#           -p ${file_id}_tmp_R2.fastq.gz \
+#           --minimum-length 70 \
+#           ${reads[0]} ${reads[1]} > ${file_id}_first_report.txt
+#  cutadapt -j ${task.cpus} \
+#           -a "A{100}" \
+#           -o ${file_id}_cut_R1.fastq.gz \
+#           ${file_id}_tmp_R1.fastq.gz \
+#              > ${file_id}_second_report.txt
+#  cutadapt -j ${task.cpus} \
+#         -u -60 \
+#         -o ${file_id}_cut_R2.fastq.gz \
+#         ${file_id}_tmp_R2.fastq.gz \
+#           > ${file_id}_third_report.txt
+  """
+}
+CUTADAPT_OUTPUT.into{CUTADAPT_OUTPUT_FASTQC;
+                     CUTADAPT_OUTPUT_FILTER}
+/////////////////////////
+/* Fastqc of raw input */
+/////////////////////////
+process fastqc_cut {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/cut/", mode: 'copy'
+  input:
+  set file_id, file(reads) from CUTADAPT_OUTPUT_FASTQC
+  output:
+  file "*.{html,zip}" into OUTPUT_FASTQC_CUT
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+/////////////////////////////
+/* rRNA and tRNA filtering */
+/////////////////////////////
+process rRNA_removal {
+  tag "$file_id"
+//  publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
+  input:
+  set file_id, file(reads) from CUTADAPT_OUTPUT_FILTER
+  file index from FILTER_INDEX.toList()
+  output:
+  set file_id, "*.fastq.gz" into FILTER_FASTQ
+  set file_id, "*.bam*" into FILTER_BAM
+  file "*.{txt,stats}" into FILTER_LOG
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
+-1 ${reads[0]} -2 ${reads[1]} --no-unal \
+--un-conc-gz ${file_id}_R%.fastq.gz 2> \
+${file_id}_filter.txt | samtools view -bS - \
+| samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
+            && samtools index ${file_id}.filter.bam \
+            && samtools idxstats ${file_id}.filter.bam  > \
+               ${file_id}.filter.stats
+if grep -q "Error " ${file_id}_filter.txt; then
+  exit 1
+fi
+"""
+}
+FILTER_FASTQ.into{FILTER_FASTQ_FASTQC;
+                  FILTER_FASTQ_HISAT;
+                  FILTER_FASTQ_POSTGENOME
+                }
+/////////////////////////////
+/* Fastqc of filtred reads */
+/////////////////////////////
+process fastqc_filter {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/filter/", mode: 'copy'
+  input:
+  set file_id, file(reads) from FILTER_FASTQ_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_FILTER
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+///////////////////////////////////////////////////////////////////
+//////////////////////////// GENOME ///////////////////////////////
+///////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////
+/*	mapping against human genome with hisat2 */
+///////////////////////////////////////////////
+process hisat2_genome {
+  tag "$file_id"
+  publishDir "${params.output}/03_hisat2/", mode: 'copy'
+  input:
+    set file_id, file(fastq_filtred) from FILTER_FASTQ_HISAT
+    file index from GENOME_INDEX.toList()
+  output:
+    set file_id, "*_notaligned_R{1,2}.fastq.gz" into HISAT_UNALIGNED
+    set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED
+    file "*.txt" into HISAT_LOG
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index_id} \
+       -p ${task.cpus} \
+       -1 ${fastq_filtred[0]} \
+       -2 ${fastq_filtred[1]} \
+       --un-conc-gz ${file_id}_notaligned_R%.fastq.gz \
+       --rna-strandness ${params.strand} \
+       --dta \
+       --no-softclip\
+       --trim3 1\
+       --trim5 1\
+       2> ${file_id}_genome.txt \
+| samtools view -bS -F 4 - \
+| samtools sort -@ ${task.cpus} -o ${file_id}.bam \
+&& samtools index ${file_id}.bam
+if grep -q "ERR" ${file_id}.txt; then
+  exit 1
+fi
+"""
+}
+HISAT_ALIGNED.into{HISAT_ALIGNED_FASTQC;
+                   HISAT_ALIGNED_DEDUP;
+                   HISAT_ALIGNED_COV;
+                   HISAT_ALIGNED_COV_2}
+////////////////////////////
+/* Fastqc of genome reads */
+////////////////////////////
+process fastqc_genome {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/genome/", mode: 'copy'
+  input:
+  set file_id, file(reads) from HISAT_ALIGNED_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_GENOME
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+////////////////////////////
+/* Deduplication of reads */
+////////////////////////////
+if (params.do_dedup) {
+  process dedup_genome {
+    tag "$file_id"
+    publishDir "${params.output}/03_hisat2/dedup/", mode: 'copy'
+    input:
+    set file_id, file(bam) from HISAT_ALIGNED_DEDUP
+    output:
+    set file_id, "*.{bam, bai}" into DEDUP_GENOME
+    file "*.log" into DEDUP_LOG
+    when:
+    params.do_dedup
+    shell:
+      '''
+      samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
+      input=$(samtools view -h !{bam[0]} | wc -l)
+      output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
+      diff=$(($input - $output))
+      per=$(($diff * 100 / $input))
+      echo "Input : $input reads" > !{file_id}_dedup.log
+      echo "Output : $output reads" >> !{file_id}_dedup.log
+      echo "$per % duplicated reads" >> !{file_id}_dedup.log
+      ''' 
+  }
+} else {
+  HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME}
+}
+DEDUP_GENOME.into{DEDUP_GENOME_HTSEQ;
+                  DEDUP_GENOME_RNASEQC
+             }
+///////////
+/* HTseq */
+///////////
+process sort_bam {
+  tag "$file_id"
+  input:
+    set file_id, file(bam) from DEDUP_GENOME_HTSEQ
+  output:
+    set file_id, "*_htseq.bam" into SORTED_NAME_GENOME
+  script:
+"""
+samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
+"""
+}
+process counting {
+  tag "$file_id"
+  publishDir "${params.output}/04_HTseq/", mode: 'copy'
+  input:
+  set file_id, file(bam) from SORTED_NAME_GENOME
+  file gtf from GTF_FILE.toList()
+  output:
+  file "*.count" into HTSEQ_COUNT
+  script:
+"""
+htseq-count ${bam[0]} ${gtf} \
+            --mode=intersection-nonempty \
+            -a 10 \
+            -s yes \
+            -t CDS \
+            -i gene_id \
+            -f bam \
+> ${file_id}_CDS.count
+htseq-count ${bam[0]} ${gtf} \
+            --mode=intersection-nonempty \
+            -a 10 \
+            -s yes \
+            -t exon \
+            -i gene_id \
+            -f bam \
+> ${file_id}.count
+"""
+}
+///////////////
+/* RNASEQ QC */
+///////////////
+process rnaseq_qc {
+  tag "$file_id"
+  publishDir "${params.output}/06_RNAseqQC/", mode: 'copy'
+  input:
+    set file_id, file(bam) from DEDUP_GENOME_RNASEQC
+    file (gtf) from GTF_COLLAPSE.collect()
+  output:
+    file "*" into RNASEQC_OUTPUT
+  script:
+"""
+rnaseqc ${gtf} ${bam[0]} -s ${file_id} ./ \
+--stranded 'FR'
+"""
+}
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// POST GENOME ///////////////////////////////
+////////////////////////////////////////////////////////////////////////
+/////////////////////////
+/* HISAT2 POST_GENOMIC */
+/////////////////////////
+process hisat2_postgenome {
+  tag "$file_id"
+  publishDir "${params.output}/05_post_genome_hisat2/", mode: 'copy'
+  input:
+    set file_id, file(fastq_unaligned) from FILTER_FASTQ_POSTGENOME
+    file index2 from POSTGENOME_INDEX.collect()
+  output:
+    set file_id, "*.{bam,bam.bai}" into POSTGENOME_ALIGNED
+    file "*_postgenome.txt" into POSTGENOME_LOG
+  when:
+  params.do_postgenome
+  script:
+  index2_id = index2[0]
+  for (index2_file in index2) {
+    if (index2_file =~ /.*\.1\.ht2/ && !(index2_file =~ /.*\.rev\.1\.ht2/)) {
+        index2_id = ( index2_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index2_id} \
+       -p ${task.cpus} \
+       -1 ${fastq_unaligned[0]} \
+       -2 ${fastq_unaligned[1]} \
+       --rna-strandness ${params.strand} \
+       --dta\
+       --no-softclip\
+       --trim3 1\
+       --trim5 1\
+       2> ${file_id}_postgenome.txt \
+| samtools view -bS -F 4 - \
+| samtools sort -@ ${task.cpus} -o ${file_id}_postgenome.bam \
+&& samtools index ${file_id}_postgenome.bam
+if grep -q "ERR" ${file_id}_postgenome.txt; then
+  exit 1
+fi
+"""
+}
+POSTGENOME_ALIGNED.into{POSTGENOME_ALIGNED_FASTQC;
+                   POSTGENOME_ALIGNED_DEDUP;
+                   POSTGENOME_ALIGNED_COV}
+////////////////////////////
+/* Deduplication of reads */
+////////////////////////////
+if (params.do_dedup) {
+  process dedup_postgenome {
+    tag "$file_id"
+    publishDir "${params.output}/05_post_genome_hisat2/dedup/", mode: 'copy'
+    input:
+    set file_id, file(bam) from POSTGENOME_ALIGNED_DEDUP
+    output:
+    set file_id, "*.{bam, bai}" into DEDUP_POSTGENOME
+    file "*.log" into DEDUP_POSTGENOME_LOG
+    when:
+    params.do_dedup
+    shell:
+      '''
+      samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
+      input=$(samtools view -h !{bam[0]} | wc -l)
+      output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
+      diff=$(($input - $output))
+      per=$(($diff * 100 / $input))
+      echo "Input : $input reads" > !{file_id}_dedup.log
+      echo "Output : $output reads" >> !{file_id}_dedup.log
+      echo "$per % duplicated reads" >> !{file_id}_dedup.log
+      '''
+  }
+} else {
+}
+process fastqc_postgenome {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/postgenome/", mode: 'copy'
+  input:
+  set file_id, file(reads) from POSTGENOME_ALIGNED_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_POSTGENOME
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// POST PROCESS //////////////////////////////
+////////////////////////////////////////////////////////////////////////
+//////////////
+/* Coverage */
+//////////////
+if (params.do_postgenome){
+        HISAT_ALIGNED_COV.join(POSTGENOME_ALIGNED_COV)
+                         .into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM}
+        process calc_scalingFactor_with_postgenome{
+         tag "$file_id"
+         input:
+         set file_id, file(genome), file(post_genome) from COVERAGE_MERGED_VALUES
+         output:
+         set file_id, env(genome), env(postgenome), env(factor) into COVERAGE_MERGED_FACTOR
+         shell:
+         '''
+         genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         postgenome=$(samtools view !{post_genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         total=$(($genome + $postgenome))
+         factor=$(awk -v c=$total 'BEGIN {print 1000000/c}')
+         '''
+        }
+        COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM)
+                              .set{COVERAGE_MERGED_BIGWIG}
+        process coverage_with_postgenome{
+         tag "$file_id"
+         publishDir "${params.output}/07_coverage/", mode: 'copy'
+         input:
+         set file_id, val(genome), val(postgenome), val (factor),  file(genome_bam), file(post_genome_bam) from COVERAGE_MERGED_BIGWIG
+         output:
+         file "*.bigwig" into COVERAGE_OUTPUT
+         file "*.txt" into COVERAGE_LOG
+         shell:
+         '''
+         total=$((!{genome} + !{postgenome}))
+         echo "genome aligment : !{genome} counts" >> !{file_id}.txt
+         echo "postgenome aligment : !{postgenome} counts" >> !{file_id}.txt
+         echo "total counts : $total" >> !{file_id}.txt
+         echo "scaling factor : !{factor}" >> !{file_id}.txt
+         if [ !{genome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
+         fi
+         if [ !{postgenome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{post_genome_bam[0]} -o !{file_id}_postgenome.bigwig
+         fi
+         '''
+        }
+} else {
+        HISAT_ALIGNED_COV.into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM}
+        process calc_scalingFactor{
+         tag "$file_id"
+         input:
+         set file_id, file(genome) from COVERAGE_MERGED_VALUES
+         output:
+         set file_id, env(genome), env(factor) into COVERAGE_MERGED_FACTOR
+         shell:
+         '''
+         genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         factor=$(awk -v c=$genome 'BEGIN {print 1000000/c}')
+         '''
+        }
+        COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM)
+                              .set{COVERAGE_MERGED_BIGWIG}
+        process coverage {
+         tag "$file_id"
+         publishDir "${params.output}/07_coverage/", mode: 'copy'
+         input:
+         set file_id, val(genome), val (factor),  file(genome_bam) from COVERAGE_MERGED_BIGWIG
+         output:
+         file "*.bigwig" into COVERAGE_OUTPUT
+         file "*.txt" into COVERAGE_LOG
+         shell:
+         '''
+         echo "genome aligment : !{genome} counts" >> !{file_id}.txt
+         echo "scaling factor : !{factor}" >> !{file_id}.txt
+         if [ !{genome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
+         fi
+         '''
+        }
+}
+/////////////
+/* MultiQC */
+/////////////
+process multiqc {
+  tag "multiqc"
+  publishDir "${params.output}/multiqc", mode: 'copy'
+  input:
+//  file ('fastqc/*') from OUTPUT_FASTQC_RAW.collect().ifEmpty([])
+//  file ('*') from CUTADAPT_LOG.collect().ifEmpty([])
+  file ('fastqc/*') from OUTPUT_FASTQC_CUT.collect().ifEmpty([])
+  file ('*') from FILTER_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_FILTER.collect().ifEmpty([])
+  file ('*') from HISAT_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_GENOME.collect().ifEmpty([])
+  file ('*') from HTSEQ_COUNT.collect().ifEmpty([])
+  file ('*') from POSTGENOME_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_POSTGENOME.collect().ifEmpty([])
+//  file ('*') from RNASEQC_OUTPUT.collect().ifEmpty([])
+  output:
+  file "multiqc_report.html" into multiqc_report
+  file "multiqc_data"
+  script:
+"""
+multiqc ./
+"""
+}
--- a/src/RibosomeProfiling.config
+++ b/src/RibosomeProfiling.config
+profiles {
+  slurm {
+    process{
+      withName: trimming {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/cutadapt_2.4"
+        executor = "slurm"
+        cpus = 1
+        memory = "128GB"
+        time = "24h"
+        clusterOptions = "--partition=Lake"
+      }
+      withName: rRNA_removal {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
+        executor = "slurm"
+        cpus = 32
+        memory = "192GB"
+        time = "24h"
+        clusterOptions = "--partition=Lake"
+      }
+      withName: hisat2_human {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "hisat2/2.1.0:samtools/1.7"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        executor = "slurm"
+        cpus = 32
+        memory = "64GB"
+        time = "24h"
+        clusterOptions = "--partition=Lake"
+      }
+      withName: index_bam {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/samtools_1.7"
+        executor = "slurm"
+        cpus = 1
+        memory = "20GB"
+        time = "24h"
+        clusterOptions = "--partition=Lake"
+      }
+      withName: counting {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "htseq/0.11.2"
+        executor = "slurm"
+        cpus = 1
+        memory = "20GB"
+        time = "24h"
+        clusterOptions = "--partition=Lake"
+      }
+    }
+  }
+  sge {
+    process{
+      withName: trimming {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/cutadapt_2.4"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
+      }
+      withName: rRNA_removal {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
+        penv = 'openmp16'
+      }
+      withName: hisat2_human {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "hisat2/2.1.0:samtools/1.7"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        memory = "20GB"
+        cpus = 16
+        time = "12h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
+        penv = 'openmp16'
+      }
+      withName: index_bam {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/samtools_1.7"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
+      }
+      withName: counting {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "htseq/0.11.2"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
+      }
+    }
+  }
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: adaptor_removal {
+        container = "lbmc/cutadapt:2.1"
+        cpus = 1
+      }
+      withName: rRNA_removal {
+        container = "lbmc/bowtie2:2.3.4.1"
+        cpus = 4
+      }
+      withName: hisat2_human {
+        cpus = 4
+        container = "lbmc/hisat2:2.1.0"
+      }
+      withName: index_bam {
+        container = "lbmc/samtools:1.7"
+        cpus = 1
+      }
+      withName: counting {
+        container = "lbmc/htseq:0.11.2"
+        cpus = 1
+      }
+    }
+  }
+}
--- a/src/RibosomeProfiling.nf
+++ b/src/RibosomeProfiling.nf
+/*
+*	Ribosome Profiling Analysis pipeline
+*/
+//////////////////////////////////////////////////////
+//                    PARAMETERS                    //
+//////////////////////////////////////////////////////
+///////////////////////
+// PARMS : FILE PATH //
+///////////////////////
+params.fastq_raw = "data/fastq/*.fastq.gz"
+params.output = "results"
+params.filter = "data/filter/human_rRNA_tRNA/*.bt2"
+params.index_genome = "data/genome/*.ht2"
+params.gtf = "data/annotation/*.gtf"
+params.gtf_collapse = "data/annotation/*.gtf"
+params.index_postgenome = "data/post_genome/*.ht2"
+/////////////////////
+// PARMS : OPTIONS //
+/////////////////////
+params.do_fastqc = true
+params.do_dedup = true
+params.do_postgenome = true
+///////////////////////
+// LIBRARIES OPTIONS //
+///////////////////////
+params.adaptorR1 = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
+params.strand = "F"
+//////////////
+// LOG INFO //
+//////////////
+log.info "input raw : ${params.fastq_raw}"
+log.info "outut directory : ${params.output}"
+log.info "filter index files : ${params.filter}"
+log.info "genome index : ${params.index_genome}"
+log.info "gtf file : ${params.gtf}"
+log.info "collapsed gtf file for rnaseqc: ${params.gtf_collapse}"
+log.info "post-genome index : ${params.index_postgenome}"
+log.info ""
+log.info "adaptor sequence : ${params.adaptorR1}"
+log.info "strand of the library : ${params.strand}"
+log.info ""
+log.info "do fastqc ? : ${params.do_fastqc}"
+log.info "do deduplication ? : ${params.do_dedup}"
+log.info "do post genome alignement ? : ${params.do_postgenome}"
+log.info ""
+//////////////
+// CHANNELS //
+//////////////
+Channel
+   .fromPath(params.fastq_raw)
+   .ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" }
+   .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
+   .into {INPUT_FASTQC_RAW;
+          INPUT_CUTADAPT}
+Channel
+   .fromPath( params.filter )
+   .ifEmpty { error "Cannot find any index files matching: ${params.filter}" }
+   .set { FILTER_INDEX }
+Channel
+   .fromPath ( params.index_genome )
+   .ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" }
+   .set { GENOME_INDEX }
+Channel
+   .fromPath( params.gtf )
+   .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf}" }
+   .set { GTF_FILE }
+Channel
+  .fromPath( params.gtf_collapse )
+  .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf_collapse}" }
+  .set { GTF_COLLAPSE }
+Channel
+   .fromPath ( params.index_postgenome )
+   .ifEmpty { error "Cannot find any index files matching: ${params.index_postgenome}" }
+   .set { POSTGENOME_INDEX }
+//////////////////////////////////////////////////////
+//                     PROCESS                      //
+//////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// PRE PROCESS ///////////////////////////////
+////////////////////////////////////////////////////////////////////////
+/////////////////////////
+/* Fastqc of raw input */
+/////////////////////////
+process fastqc_raw {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/raw/", mode: 'copy'
+  input:
+  set file_id, file(reads) from INPUT_FASTQC_RAW
+  output:
+  file "*_fastqc.{html,zip}" into OUTPUT_FASTQC_RAW
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+///////////////////////
+/* Trimming adaptors */
+///////////////////////
+process trimming {
+  tag "$file_id"
+  //publishDir "${params.output}/01_cutadapt/", mode: 'copy'
+  echo true
+  input:
+  set file_id, file(reads) from INPUT_CUTADAPT
+  output:
+  set file_id, "*cut.fastq.gz" into CUTADAPT_OUTPUT
+  file "*first_report.txt" into CUTADAPT_LOG
+  script:
+  """
+  cutadapt -j ${task.cpus} \
+           -a ${params.adaptorR1} \
+           -o ${file_id}_cut.fastq.gz \
+           --minimum-length 15 \
+           ${reads[0]} > ${file_id}_first_report.txt
+  """
+}
+CUTADAPT_OUTPUT.into{CUTADAPT_OUTPUT_FASTQC;
+                     CUTADAPT_OUTPUT_FILTER}
+/////////////////////////
+/* Fastqc of raw input */
+/////////////////////////
+process fastqc_cut {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/cut/", mode: 'copy'
+  input:
+  set file_id, file(reads) from CUTADAPT_OUTPUT_FASTQC
+  output:
+  file "*.{html,zip}" into OUTPUT_FASTQC_CUT
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+/////////////////////////////
+/* rRNA and tRNA filtering */
+/////////////////////////////
+process rRNA_removal {
+  tag "$file_id"
+  //publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy'
+  input:
+  set file_id, file(reads) from CUTADAPT_OUTPUT_FILTER
+  file index from FILTER_INDEX.toList()
+  output:
+  set file_id, "*.fastq.gz" into FILTER_FASTQ
+  set file_id, "*.bam*" into FILTER_BAM
+  file "*.{txt,stats}" into FILTER_LOG
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \
+-U ${reads[0]} --no-unal \
+--un-gz ${file_id}_filter.fastq.gz 2> \
+${file_id}_filter.txt | samtools view -bS - \
+| samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
+            && samtools index ${file_id}.filter.bam \
+            && samtools idxstats ${file_id}.filter.bam  > \
+               ${file_id}.filter.stats
+if grep -q "Error " ${file_id}_filter.txt; then
+  exit 1
+fi
+"""
+}
+FILTER_FASTQ.into{FILTER_FASTQ_FASTQC;
+                  FILTER_FASTQ_HISAT;
+                  FILTER_FASTQ_POSTGENOME
+                }
+/////////////////////////////
+/* Fastqc of filtred reads */
+/////////////////////////////
+process fastqc_filter {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/filter/", mode: 'copy'
+  input:
+  set file_id, file(reads) from FILTER_FASTQ_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_FILTER
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+///////////////////////////////////////////////////////////////////
+//////////////////////////// GENOME ///////////////////////////////
+///////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////
+/*	mapping against human genome with hisat2 */
+///////////////////////////////////////////////
+process hisat2_genome {
+  tag "$file_id"
+  publishDir "${params.output}/03_hisat2/", mode: 'copy'
+  input:
+    set file_id, file(fastq_filtred) from FILTER_FASTQ_HISAT
+    file index from GENOME_INDEX.toList()
+  output:
+    set file_id, "*_notaligned.fastq.gz" into HISAT_UNALIGNED
+    set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED
+    file "*.txt" into HISAT_LOG
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index_id} \
+       -p ${task.cpus} \
+       -U ${fastq_filtred[0]} \
+       --un-gz ${file_id}_notaligned.fastq.gz \
+       --rna-strandness ${params.strand} \
+       --dta \
+       --no-softclip \
+       --trim5 1\
+       --trim3 1\
+       2> ${file_id}_genome.txt \
+| samtools view -bS -F 4 - \
+| samtools sort -@ ${task.cpus} -o ${file_id}_genome.bam \
+&& samtools index ${file_id}_genome.bam
+if grep -q "ERR" ${file_id}.txt; then
+  exit 1
+fi
+"""
+}
+HISAT_ALIGNED.into{HISAT_ALIGNED_FASTQC;
+                   HISAT_ALIGNED_DEDUP;
+                   HISAT_ALIGNED_COV;
+                   HISAT_ALIGNED_COV_2}
+////////////////////////////
+/* Fastqc of genome reads */
+////////////////////////////
+process fastqc_genome {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/genome/", mode: 'copy'
+  input:
+  set file_id, file(reads) from HISAT_ALIGNED_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_GENOME
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+////////////////////////////
+/* Deduplication of reads */
+////////////////////////////
+if (params.do_dedup) {
+process dedup_genome {
+tag "$file_id"
+publishDir "${params.output}/03_hisat2/dedup/", mode: 'copy'
+input:
+set file_id, file(bam) from HISAT_ALIGNED_DEDUP
+output:
+set file_id, "*.{bam, bai}" into DEDUP_GENOME
+file "*.log" into DEDUP_LOG
+when:
+params.do_dedup
+shell:
+'''
+samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
+samtools index !{file_id}_dedup.bam
+input=$(samtools view -h !{bam[0]} | wc -l)
+output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
+diff=$(($input - $output))
+per=$(($diff * 100 / $input))
+echo "Input : $input reads" > !{file_id}_dedup.log
+echo "Output : $output reads" >> !{file_id}_dedup.log
+echo "$per % duplicated reads" >> !{file_id}_dedup.log
+'''
+}
+} else {
+HISAT_ALIGNED_DEDUP.set{DEDUP_GENOME}
+}
+DEDUP_GENOME.into{DEDUP_GENOME_HTSEQ;
+	  DEDUP_GENOME_RNASEQC
+	}
+///////////
+/* HTseq */
+///////////
+process sort_bam {
+tag "$file_id"
+input:
+set file_id, file(bam) from DEDUP_GENOME_HTSEQ
+output:
+set file_id, "*_htseq.bam" into SORTED_NAME_GENOME
+script:
+"""
+samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_htseq.bam ${bam[0]}
+"""
+}
+process counting {
+tag "$file_id"
+publishDir "${params.output}/04_HTseq/", mode: 'copy'
+input:
+set file_id, file(bam) from SORTED_NAME_GENOME
+file gtf from GTF_FILE.toList()
+output:
+file "*.count" into HTSEQ_COUNT
+script:
+"""
+htseq-count ${bam[0]} ${gtf} \
+    --mode=intersection-nonempty \
+    -a 10 \
+    -s yes \
+    -t CDS \
+    -i gene_id \
+    -f bam \
+> ${file_id}_CDS.count
+htseq-count ${bam[0]} ${gtf} \
+    --mode=intersection-nonempty \
+    -a 10 \
+    -s yes \
+    -t exon \
+    -i gene_id \
+    -f bam \
+> ${file_id}.count
+"""
+}
+///////////////
+/* RNASEQ QC */
+///////////////
+process rnaseq_qc {
+tag "$file_id"
+publishDir "${params.output}/06_RNAseqQC/", mode: 'copy'
+input:
+set file_id, file(bam) from DEDUP_GENOME_RNASEQC
+file (gtf) from GTF_COLLAPSE.collect()
+output:
+file "*" into RNASEQC_OUTPUT
+script:
+"""
+rnaseqc ${gtf} ${bam[0]} -s ${file_id} ./
+"""
+}
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// POST GENOME ///////////////////////////////
+////////////////////////////////////////////////////////////////////////
+/////////////////////////
+/* HISAT2 POST_GENOMIC */
+/////////////////////////
+process hisat2_postgenome {
+tag "$file_id"
+publishDir "${params.output}/05_post_genome_hisat2/", mode: 'copy'
+input:
+set file_id, file(fastq_unaligned) from FILTER_FASTQ_POSTGENOME
+file index2 from POSTGENOME_INDEX.collect()
+output:
+set file_id, "*.{bam,bam.bai}" into POSTGENOME_ALIGNED
+file "*_postgenome.txt" into POSTGENOME_LOG
+when:
+params.do_postgenome
+script:
+index2_id = index2[0]
+for (index2_file in index2) {
+if (index2_file =~ /.*\.1\.ht2/ && !(index2_file =~ /.*\.rev\.1\.ht2/)) {
+index2_id = ( index2_file =~ /(.*)\.1\.ht2/)[0][1]
+}
+}
+"""
+hisat2 -x ${index2_id} \
+-p ${task.cpus} \
+-U ${fastq_unaligned[0]} \
+--rna-strandness ${params.strand} \
+--dta \
+--no-softclip \
+--trim3 1 \
+--trim5 1 \
+2> ${file_id}_postgenome.txt \
+| samtools view -bS -F 4 -F 256 -F 16 - \
+| samtools sort -@ ${task.cpus} -o ${file_id}_postgenome.bam \
+&& samtools index ${file_id}_postgenome.bam
+if grep -q "ERR" ${file_id}_postgenome.txt; then
+exit 1
+fi
+"""
+}
+POSTGENOME_ALIGNED.into{POSTGENOME_ALIGNED_FASTQC;
+		POSTGENOME_ALIGNED_DEDUP;
+		POSTGENOME_ALIGNED_COV}
+////////////////////////////
+/* Deduplication of reads */
+////////////////////////////
+if (params.do_dedup){
+  process dedup_postgenome {
+    tag "$file_id"
+    publishDir "${params.output}/05_post_genome_hisat2/dedup/", mode: 'copy'
+    input:
+    set file_id, file(bam) from POSTGENOME_ALIGNED_DEDUP
+    output:
+    set file_id, "*.{bam, bai}" into DEDUP_POSTGENOME
+    file "*.log" into DEDUP_POSTGENOME_LOG
+    when:
+    params.do_dedup
+    shell:
+    '''
+    samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | samtools view -bS -o !{file_id}_dedup.bam
+    samtools index !{file_id}_dedup.bam
+    input=$(samtools view -h !{bam[0]} | wc -l)
+    output=$(samtools view -h !{bam[0]} | awk '!seen[substr($1, length($1)-5) $3 $4 $10]++' | wc -l)
+    diff=$(($input - $output))
+    per=$(($diff * 100 / $input))
+    echo "Input : $input reads" > !{file_id}_dedup.log
+    echo "Output : $output reads" >> !{file_id}_dedup.log
+    echo "$per % duplicated reads" >> !{file_id}_dedup.log
+    '''
+  }
+} else {
+}
+process fastqc_postgenome {
+  tag "$file_id"
+  publishDir "${params.output}/00_fastqc/postgenome/", mode: 'copy'
+  input:
+  set file_id, file(reads) from POSTGENOME_ALIGNED_FASTQC
+  output:
+  set file_id, "*.{html,zip}" into OUTPUT_FASTQC_POSTGENOME
+  when:
+  params.do_fastqc
+    """
+    fastqc ${file_id}* -t ${task.cpus}
+    """
+}
+////////////////////////////////////////////////////////////////////////
+//////////////////////////// POST PROCESS //////////////////////////////
+////////////////////////////////////////////////////////////////////////
+//////////////
+/* Coverage */
+//////////////
+if (params.do_postgenome){
+        HISAT_ALIGNED_COV.join(POSTGENOME_ALIGNED_COV)
+                         .into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM}
+        process calc_scalingFactor_with_postgenome{
+         tag "$file_id"
+         input:
+         set file_id, file(genome), file(post_genome) from COVERAGE_MERGED_VALUES
+         output:
+         set file_id, env(genome), env(postgenome), env(factor) into COVERAGE_MERGED_FACTOR
+         shell:
+         '''
+         genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         postgenome=$(samtools view !{post_genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         total=$(($genome + $postgenome))
+         factor=$(awk -v c=$total 'BEGIN {print 1000000/c}')
+         '''
+        }
+        COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM)
+                              .set{COVERAGE_MERGED_BIGWIG}
+        process coverage_with_postgenome{
+         tag "$file_id"
+         publishDir "${params.output}/07_coverage/", mode: 'copy'
+         input:
+         set file_id, val(genome), val(postgenome), val (factor),  file(genome_bam), file(post_genome_bam) from COVERAGE_MERGED_BIGWIG
+         output:
+         file "*.bigwig" into COVERAGE_OUTPUT
+         file "*.txt" into COVERAGE_LOG
+         shell:
+         '''
+         total=$((!{genome} + !{postgenome}))
+         echo "genome aligment : !{genome} counts" >> !{file_id}.txt
+         echo "postgenome aligment : !{postgenome} counts" >> !{file_id}.txt
+         echo "total counts : $total" >> !{file_id}.txt
+         echo "scaling factor : !{factor}" >> !{file_id}.txt
+         if [ !{genome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
+         fi
+         if [ !{postgenome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{post_genome_bam[0]} -o !{file_id}_postgenome.bigwig
+         fi
+         '''
+        }
+} else {
+        HISAT_ALIGNED_COV.into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM}
+        process calc_scalingFactor{
+         tag "$file_id"
+         input:
+         set file_id, file(genome) from COVERAGE_MERGED_VALUES
+         output:
+         set file_id, env(genome), env(factor) into COVERAGE_MERGED_FACTOR
+         shell:
+         '''
+         genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l)
+         factor=$(awk -v c=$genome 'BEGIN {print 1000000/c}')
+         '''
+        }
+        COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM)
+                              .set{COVERAGE_MERGED_BIGWIG}
+        process coverage {
+         tag "$file_id"
+         publishDir "${params.output}/07_coverage/", mode: 'copy'
+         input:
+         set file_id, val(genome), val (factor),  file(genome_bam) from COVERAGE_MERGED_BIGWIG
+         output:
+         file "*.bigwig" into COVERAGE_OUTPUT
+         file "*.txt" into COVERAGE_LOG
+         shell:
+         '''
+         echo "genome aligment : !{genome} counts" >> !{file_id}.txt
+         echo "scaling factor : !{factor}" >> !{file_id}.txt
+         if [ !{genome} -gt 0 ]
+         then
+           bamCoverage -p !{task.cpus} --binSize 1  --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig
+         fi
+         '''
+        }
+}
+/////////////
+/* MultiQC */
+/////////////
+process multiqc {
+  tag "multiqc"
+  publishDir "${params.output}/multiqc", mode: 'copy'
+  input:
+//  file ('fastqc/*') from OUTPUT_FASTQC_RAW.collect().ifEmpty([])
+//  file ('*') from CUTADAPT_LOG.collect().ifEmpty([])
+  file ('fastqc/*') from OUTPUT_FASTQC_CUT.collect().ifEmpty([])
+  file ('*') from FILTER_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_FILTER.collect().ifEmpty([])
+  file ('*') from HISAT_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_GENOME.collect().ifEmpty([])
+  file ('*') from HTSEQ_COUNT.collect().ifEmpty([])
+  file ('*') from POSTGENOME_LOG.collect().ifEmpty([])
+//  file ('fastqc/*') from OUTPUT_FASTQC_POSTGENOME.collect().ifEmpty([])
+//  file ('*') from RNASEQC_OUTPUT.collect().ifEmpty([])
+  output:
+  file "multiqc_report.html" into multiqc_report
+  file "multiqc_data"
+  script:
+"""
+multiqc ./
+"""
+}
No results found