From 12b1d9fb428749071270eb3e744523633813fa3c Mon Sep 17 00:00:00 2001
From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr>
Date: Thu, 30 Apr 2020 18:50:02 +0200
Subject: [PATCH] src/FLOSS_score : clean folder to keep only useful files

---
 src/FLOSS_score/Makefile-align-mesc           | 108 -------
 src/FLOSS_score/Makefile-align-pull           |  91 ------
 src/FLOSS_score/analysis.sh                   |   6 +-
 src/FLOSS_score/count-calc-hek.R              |  50 ----
 src/FLOSS_score/count-mapped.R                |  14 -
 src/FLOSS_score/count-plot-hek.R              | 265 -----------------
 src/FLOSS_score/count-score-hek.R             |  92 ------
 src/FLOSS_score/floss-calc-mesc.R             |  19 --
 src/FLOSS_score/floss-plots-mesc_IP.R         | 144 ---------
 ...ss-plots-mesc.R => floss-plots-template.R} |  71 ++---
 src/FLOSS_score/human-annotation.R            |   1 +
 src/FLOSS_score/ldist-calc-mesc.R             |  76 -----
 src/FLOSS_score/ldist-calc-template.R         |  47 +++
 src/FLOSS_score/ldist-plots-mesc.R            | 281 ------------------
 src/FLOSS_score/ldist-plots-mesc_ip.R         | 281 ------------------
 src/FLOSS_score/ldist-plots-template.R        |  60 ++++
 src/FLOSS_score/mouse-annotation.R            |   2 +-
 src/FLOSS_score/orf-org-plot.R                | 117 --------
 src/FLOSS_score/orf-organization.R            | 205 -------------
 src/FLOSS_score/scspike.sh                    |  31 --
 20 files changed, 132 insertions(+), 1829 deletions(-)
 delete mode 100644 src/FLOSS_score/Makefile-align-mesc
 delete mode 100644 src/FLOSS_score/Makefile-align-pull
 delete mode 100644 src/FLOSS_score/count-calc-hek.R
 delete mode 100644 src/FLOSS_score/count-mapped.R
 delete mode 100644 src/FLOSS_score/count-plot-hek.R
 delete mode 100644 src/FLOSS_score/count-score-hek.R
 delete mode 100644 src/FLOSS_score/floss-calc-mesc.R
 delete mode 100644 src/FLOSS_score/floss-plots-mesc_IP.R
 rename src/FLOSS_score/{floss-plots-mesc.R => floss-plots-template.R} (55%)
 delete mode 100644 src/FLOSS_score/ldist-calc-mesc.R
 create mode 100644 src/FLOSS_score/ldist-calc-template.R
 delete mode 100644 src/FLOSS_score/ldist-plots-mesc.R
 delete mode 100644 src/FLOSS_score/ldist-plots-mesc_ip.R
 create mode 100644 src/FLOSS_score/ldist-plots-template.R
 delete mode 100644 src/FLOSS_score/orf-org-plot.R
 delete mode 100644 src/FLOSS_score/orf-organization.R
 delete mode 100644 src/FLOSS_score/scspike.sh

diff --git a/src/FLOSS_score/Makefile-align-mesc b/src/FLOSS_score/Makefile-align-mesc
deleted file mode 100644
index 9e884e27..00000000
--- a/src/FLOSS_score/Makefile-align-mesc
+++ /dev/null
@@ -1,108 +0,0 @@
-SHELL=/bin/bash -x
-
-all:
-
-.SECONDARY:
-
-include samples.d
-
-NEW_SAMPLES=ribo_chx_mesc ribo_emet_mesc
-OLD_SAMPLES=ribo_chx_oldmesc ribo_emet_oldmesc ribo_harr120_oldmesc ribo_harr150_oldmesc
-RIBO_SAMPLES=$(NEW_SAMPLES) $(OLD_SAMPLES)
-SAMPLES=$(RIBO_SAMPLES)
-PROJECTS=/mnt/ingolialab/FastQ/121204_SN375_0200_AC1A7LACXX/FastQ/Project_NI-nickingolia-ESdrug/ \
-	/mnt/ingolialab/FastQ/130305_SN375_0214_AC1CACACXX/FastQ/Project_NI-nickingolia/ \
-	/mnt/ingolialab/FastQ/130709_SN375_0236_BD294UACXX/FastQ-i4n3/Project_NI-nickingolia/
-
-ribo_emet_oldmesc_trim.fq:  /mnt/ingolialab/ingolia/MouseES/RawData/100426/s_3_sequence.txt.bz2
-ribo_chx_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100426/s_5_sequence.txt.bz2
-ribo_chx_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100809/s_4_sequence.txt.bz2
-ribo_harr120_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_5_sequence.txt.bz2
-ribo_harr120_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_6_sequence.txt.bz2
-ribo_harr150_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100624/s_2_sequence.txt.bz2
-ribo_harr150_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_2_sequence.txt.bz2
-
-# RUNTIME CONFIGURATION
-BOWTIE_PARALLEL=-p 32
-TOPHAT_KEEP_TEMP=#--keep-tmp
-
-# LINKER TRIMMING CONFIGURATION
-NI_NI_9=CTGTAGGCACCATCAATAGA
-
-FP_LINKER=$(NI_NI_9)
-FP_MIN_INSERT=24
-
-# RRNA ALIGNMENT CONFIGURATION
-RRNA_MAQ_ERR=60
-RRNA_SEEDLEN=23
-RRNA_MAXREAD=51
-
-RRNA_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/mm-scspike-rrna
-
-# GENOME ALIGNMENT CONFIGURATION
-TOPHAT_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/mm10-scspike
-TOPHAT_GTF=Mm.GRCm38.72-plus-scspike.gtf
-
-# BINARIES
-TOPHAT=tophat
-BOWTIE=bowtie
-SAMTOOLS=samtools
-TAM_TO_BAM=$(SAMTOOLS) view -b -S -h -
-
-# Fix up chromosome names for Ensembl
-Mm.GRCm38.72.gtf: Mus_musculus.GRCm38.72.gtf
-	awk -F$$'\t' '/^[0-9XY]/ { printf("chr%s\n", $$0) } /^MT/ { gsub(/^MT/, "chrM", $$0); print }' $< > $@
-
-$(TOPHAT_GTF): Mm.GRCm38.72.gtf /mnt/ingolialab/ingolia/Genomes/SpikeIn/sac_cer_spike.gtf
-	cat $^ > $@
-
-samples.d: $(PROJECTS)
-	ls $(addsuffix /Sample_*/*.fastq.gz,$(PROJECTS)) | \
-	  sed 's,.*Sample_\(.*\)/.*\.fastq\.gz$$,\1_trim.fq: &,' | \
-	  sed s/ribo_mesc_chx/ribo_chx_mesc/ | \
-	  sed s/ribo_mesc_emet/ribo_emet_mesc/ \
-	  > $@
-
-trims:   $(addsuffix _trim.fq, $(SAMPLES))
-norrnas: $(addsuffix _norrna.fq, $(SAMPLES))
-genomic: $(addsuffix _genome_sorted.bam.bai, $(SAMPLES))
-
-all: trims
-all: norrnas
-all: genomic
-
-# RIBO LINKER TRIMMING
-$(addsuffix _trim.fq, $(NEW_SAMPLES)): %_trim.fq:
-	mkdir -p Statistics
-	zcat $^ \
-	  | fastx_clipper -Q33 -a $(FP_LINKER) -l $(FP_MIN_INSERT) -c -n -v 2>Statistics/$*_clip.txt \
-	  > $@
-
-$(addsuffix _trim.fq, $(OLD_SAMPLES)): %_trim.fq:
-	mkdir -p Statistics
-	bzcat $^ \
-	  | fastx_clipper -Q33 -a $(FP_LINKER) -l $(FP_MIN_INSERT) -c -n -v 2>Statistics/$*_clip.txt \
-	  > $@
-
-# RRNA ALIGNMENTS
-RRNA_BOWTIE_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --maqerr=$(RRNA_MAQ_ERR) --seedlen=$(RRNA_SEEDLEN)
-
-$(addsuffix _norrna.fq, $(RIBO_SAMPLES)):%_norrna.fq: %_trim.fq
-	mkdir -p Statistics
-	bowtie $(RRNA_BOWTIE_ARGS) --un $@ --sam $(RRNA_EBWT) $< 2>Statistics/$*_rrna.txt > /dev/null
-
-# GENOME ALIGNMENTS
-TOPHAT_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --GTF $(TOPHAT_GTF) --no-novel-juncs --library-type fr-unstranded $(TOPHAT_KEEP_TEMP)
-
-$(addsuffix _genome/accepted_hits.bam, $(SAMPLES)): %_genome/accepted_hits.bam: %_norrna.fq $(TOPHAT_EBWT).1.ebwt $(TOPHAT_GTF)
-	mkdir -p Statistics
-	$(TOPHAT) $(TOPHAT_ARGS) --output-dir $(dir $@) $(TOPHAT_EBWT) $< 2>Statistics/$*_genome.txt
-
-%_sorted.bam.bai: %_sorted.bam
-	$(SAMTOOLS) index $< 2>Statistics/$*_genome_index.txt
-
-$(addsuffix _genome_sorted.bam, $(SAMPLES)): %_genome_sorted.bam: %_genome/accepted_hits.bam
-	samtools view -h $^ | \
-	  grep -E '(NM:i:[01])|(^@)' | \
-	  samtools view -S -b - > $@
-
diff --git a/src/FLOSS_score/Makefile-align-pull b/src/FLOSS_score/Makefile-align-pull
deleted file mode 100644
index 37559b1b..00000000
--- a/src/FLOSS_score/Makefile-align-pull
+++ /dev/null
@@ -1,91 +0,0 @@
-SHELL=/bin/bash -x
-
-all:
-
-.SECONDARY:
-
-include samples.d
-
-RIBO_SAMPLES=input bound
-SAMPLES=$(RIBO_SAMPLES)
-PROJECTS=/mnt/ingolialab/FastQ/130628_SN375_0234_AD24HWACXX/FastQ/Project_NI-mikeharris/ \
-	/mnt/ingolialab/FastQ/130709_SN375_0236_BD294UACXX/FastQ-i4n3/Project_NI-mikeharris/
-
-# RUNTIME CONFIGURATION
-BOWTIE_PARALLEL=-p 32
-TOPHAT_KEEP_TEMP=#--keep-tmp
-
-# LINKER TRIMMING CONFIGURATION
-NI_NI_9=CTGTAGGCACCATCAATAGA
-
-FP_LINKER=$(NI_NI_9)
-FP_MIN_INSERT=24
-
-# RRNA ALIGNMENT CONFIGURATION
-RRNA_MAQ_ERR=60
-RRNA_SEEDLEN=23
-RRNA_MAXREAD=51
-
-RRNA_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/hs-scspike-rrna
-
-# GENOME ALIGNMENT CONFIGURATION
-TOPHAT_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/hg19-scspike
-TOPHAT_GTF=gencode.v17-plus-scspike.gtf
-
-# BINARIES
-TOPHAT=tophat --bowtie1
-BOWTIE=bowtie
-
-SAMTOOLS=samtools
-TAM_TO_BAM=$(SAMTOOLS) view -b -S -h -
-
-gencode.v17.annotation.gtf.gz:
-	curl -O 'ftp://ftp.sanger.ac.uk/pub/gencode/release_17/gencode.v17.annotation.gtf.gz'
-
-gencode.v17.annotation.gtf: gencode.v17.annotation.gtf.gz
-	gunzip $<
-
-$(TOPHAT_GTF): gencode.v17.annotation.gtf /mnt/ingolialab/ingolia/Genomes/SpikeIn/sac_cer_spike.gtf
-	cat $^ > $@
-
-samples.d: $(PROJECTS)
-	ls $(addsuffix /Sample_*/*.fastq.gz,$(PROJECTS)) | \
-	  sed 's,.*Sample_\(.*\)/.*\.fastq\.gz$$,\1_trim.fq: &,' \
-	  > $@
-
-trims:   $(addsuffix _trim.fq, $(SAMPLES))
-norrnas: $(addsuffix _norrna.fq, $(SAMPLES))
-genomic: $(addsuffix _genome_sorted.bam.bai, $(SAMPLES))
-
-all: trims
-all: norrnas
-all: genomic
-
-# RIBO LINKER TRIMMING
-$(addsuffix _trim.fq, $(RIBO_SAMPLES)): %_trim.fq:
-	mkdir -p Statistics
-	zcat $^ \
-	  | fastx_clipper -Q33 -a $(FP_LINKER) -l $(FP_MIN_INSERT) -c -n -v 2>Statistics/$*_clip.txt \
-	  > $@
-
-# RRNA ALIGNMENTS
-RRNA_BOWTIE_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --maqerr=$(RRNA_MAQ_ERR) --seedlen=$(RRNA_SEEDLEN)
-
-$(addsuffix _norrna.fq, $(RIBO_SAMPLES)):%_norrna.fq: %_trim.fq
-	mkdir -p Statistics
-	bowtie $(RRNA_BOWTIE_ARGS) --un $@ --sam $(RRNA_EBWT) $< 2>Statistics/$*_rrna.txt > /dev/null
-
-# GENOME ALIGNMENTS
-TOPHAT_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --GTF $(TOPHAT_GTF) --no-novel-juncs --library-type fr-unstranded $(TOPHAT_KEEP_TEMP)
-
-$(addsuffix _genome/accepted_hits.bam, $(SAMPLES)): %_genome/accepted_hits.bam: %_norrna.fq $(TOPHAT_EBWT).1.ebwt $(TOPHAT_GTF)
-	mkdir -p Statistics
-	$(TOPHAT) $(TOPHAT_ARGS) --output-dir $(dir $@) $(TOPHAT_EBWT) $< 2>Statistics/$*_genome.txt
-
-%_sorted.bam.bai: %_sorted.bam
-	$(SAMTOOLS) index $< 2>Statistics/$*_genome_index.txt
-
-$(addsuffix _genome_sorted.bam, $(SAMPLES)): %_genome_sorted.bam: %_genome/accepted_hits.bam
-	samtools view -h $^ | \
-	  grep -E '(NM:i:[01])|(^@)' | \
-	  samtools view -S -b - > $@
diff --git a/src/FLOSS_score/analysis.sh b/src/FLOSS_score/analysis.sh
index 240ffaf5..2da4cecc 100644
--- a/src/FLOSS_score/analysis.sh
+++ b/src/FLOSS_score/analysis.sh
@@ -41,8 +41,8 @@
 #rm tmp.sam /Xnfs/lbmcdb/Ricci_team/HIV_project/results/20180326_RP_U937CHX/Results/10_FLOSS_score/U937_CHX_all_hg38_NY5.bam
 
 ### Calculating FLOSS scores
-R --no-save < ldist-calc-mesc.R
+R --no-save < ldist-calc-template.R
 # FLOSS scores needed to pick lincs in ldist plots
 #R --no-save < floss-calc-mesc.R ####### ---> this script is now merged in the first one
-R --no-save < ldist-plots-mesc.R
-R --no-save < floss-plots-mesc.R
+R --no-save < ldist-plots-template.R
+R --no-save < floss-plots-template.R
diff --git a/src/FLOSS_score/count-calc-hek.R b/src/FLOSS_score/count-calc-hek.R
deleted file mode 100644
index 39a34a02..00000000
--- a/src/FLOSS_score/count-calc-hek.R
+++ /dev/null
@@ -1,50 +0,0 @@
-library("rtracklayer")
-library("Rsamtools")
-library("biomaRt")
-
-source("../shared/transcript-utils.R")
-source("../shared/footprint-assignment.R")
-source("../shared/floss.R")
-
-options(mc.cores=32)
-
-hekBedFile <- "/mnt/ingolialab/mharr/schsgenome/gencode.v17.annotation.bed"
-yeastBedFile <- "/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/sac_cer_spike.bed"
-
-boundBamFile <- BamFile("/mnt/ingolialab/mharr/130709_YeastMix/RawData/bound.bam")
-inputBamFile <- BamFile("/mnt/ingolialab/mharr/130709_YeastMix/RawData/input.bam")
-
-aoffsets <- data.frame(asite=c(14,14,14,14,15,15), row.names=seq(26,31))
-
-yeastBed <- import(yeastBedFile, format="BED", asRangedData = FALSE)
-hekBed <- import(hekBedFile, format="BED", asRangedData = FALSE)
-
-yeastSize <- regionCountFrame(countSizes(defaultInsets, yeastBed))
-yeastInput <- regionCountFrame(countAligns(aoffsets, defaultInsets, inputBamFile, yeastBed))
-yeastBound <- regionCountFrame(countAligns(aoffsets, defaultInsets, boundBamFile, yeastBed))
-
-write.csv(yeastSize, "data/yeast-pulldown-sizes.txt", row.names=TRUE)
-write.csv(yeastInput, "data/yeast-pulldown-input.txt", row.names=TRUE)
-write.csv(yeastBound, "data/yeast-pulldown-bound.txt", row.names=TRUE)
-
-yeastInputLDists <- calcTrxRegionLengthDists(inputBamFile, yeastBed)
-writeRegionLengthDists(yeastInputLDists, "data/yeast-pulldown-input")
-
-yeastBoundLDists <- calcTrxRegionLengthDists(boundBamFile, yeastBed)
-writeRegionLengthDists(yeastBoundLDists, "data/yeast-pulldown-bound")
-
-hekSize <- regionCountFrame(countSizes(defaultInsets, hekBed))
-hekInput <- regionCountFrame(countAligns(aoffsets, defaultInsets, inputBamFile, hekBed))
-hekBound <- regionCountFrame(countAligns(aoffsets, defaultInsets, boundBamFile, hekBed))
-
-write.csv(hekSize, "data/hek-pulldown-sizes.txt", row.names=TRUE)
-write.csv(hekInput, "data/hek-pulldown-input.txt", row.names=TRUE)
-write.csv(hekBound, "data/hek-pulldown-bound.txt", row.names=TRUE)
-
-hekInputLDists <- calcTrxRegionLengthDists(inputBamFile, hekBed)
-writeRegionLengthDists(hekInputLDists, "data/hek-pulldown-input")
-
-hekBoundLDists <- calcTrxRegionLengthDists(boundBamFile, hekBed)
-writeRegionLengthDists(hekBoundLDists, "data/hek-pulldown-bound")
-
-
diff --git a/src/FLOSS_score/count-mapped.R b/src/FLOSS_score/count-mapped.R
deleted file mode 100644
index 39e80caf..00000000
--- a/src/FLOSS_score/count-mapped.R
+++ /dev/null
@@ -1,14 +0,0 @@
-options(mc.cores=16)
-bamFiles <- c("/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_chx_mesc_genome_sorted.bam",
-              "/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_emet_mesc_genome_sorted.bam",
-              "/mnt/ingolialab/mharr/130709_YeastMix/RawData/bound.bam",
-              "/mnt/ingolialab/mharr/130709_YeastMix/RawData/input.bam",
-              "/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_harr120_oldmesc_genome_sorted.bam",
-              "/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_harr150_oldmesc_genome_sorted.bam")
-
-counts <- mclapply(bamFiles, function(bamFile) {
-    res <- system(sprintf("samtools flagstat %s", bamFile), intern=TRUE)
-    c(bamFile, res)
-})
-
-cat(do.call(c, counts), file="data/mapped-count.txt", sep="\n")
diff --git a/src/FLOSS_score/count-plot-hek.R b/src/FLOSS_score/count-plot-hek.R
deleted file mode 100644
index 1792526e..00000000
--- a/src/FLOSS_score/count-plot-hek.R
+++ /dev/null
@@ -1,265 +0,0 @@
-library("biomaRt")
-library("GenomicRanges")
-
-source("../shared/transcript-utils.R")
-source("../shared/footprint-assignment.R")
-source("../shared/floss.R")
-source("../shared/plot-utils.R")
-
-source("human-annotation.R")
-
-lengthLevels <- seq(defaultLengthMin, defaultLengthMax)
-
-yeastCount <- read.csv("data/yeast-pulldown-count.txt", row.names=1)
-yeastFloss <- read.csv("data/yeast-pulldown-bound-floss.txt", check.names=FALSE, row.names=1)
-yeastCount <- cbind(yeastCount, yeastFloss[match(row.names(yeastCount), row.names(yeastFloss)),])
-
-hekCount <- read.csv("data/hek-pulldown-count.txt", row.names=1)
-hekFloss <- read.csv("data/hek-pulldown-input-floss.txt", check.names=FALSE, row.names=1)
-hekCount <- cbind(hekCount, hekFloss[match(row.names(hekCount), row.names(hekFloss)),])
-
-xref <- match(row.names(hekCount), annots$ensembl_transcript_id)
-table(is.na(xref))
-hekCount <- cbind(hekCount, annots[xref,])
-
-hekexpr <- hekCount[hekCount$inputCdsCount > 100 & !is.na(hekCount$inputCdsCount),]
-hekmed  <- median(log2(hekexpr$boundCdsCount / hekexpr$inputCdsCount), na.rm=T)
-
-yeastexpr <- yeastCount[yeastCount$inputTrxCount > 100 & !is.na(yeastCount$inputTrxCount),]
-yeastmed <- median(log2(yeastexpr$boundTrxCount / yeastexpr$inputTrxCount), na.rm=T)
-
-pcoding <- hekCount[row.names(hekCount) %in% refCodingIds,]
-lincs <- hekCount[row.names(hekCount) %in% cleanLincIds,]
-mitoch <- hekCount[row.names(hekCount) %in% mitoCodingIds,]
-miscs <- hekCount[row.names(hekCount) %in% miscIds,]
-snornas <- hekCount[hekCount$transcript_biotype == "snoRNA" & !is.na(hekCount$transcript_biotype),]
-
-pcodingPresent <- pcoding[pcoding$cdsNRead >= 10,]
-utr5Present <- pcoding[pcoding$utr5NRead >= 10,]
-lincsPresent <- lincs[lincs$trxNRead >= 10,]
-
-cutoff <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
-lincsPresent$trxClassify <- cutoff$classify(lincsPresent$trxNRead, lincsPresent$trxScore)
-
-plotBase <- function() {
-  plot(log10(pcoding$cdsNRead), pcoding$cdsScore, xlim=c(1,log10(2e5)), ylim=c(0,1),
-       pch=".", col=solarGrey01,
-       axes=F, xlab="Total Reads", ylab="Fragment Length Distribution Score", cex.lab=1.33)
-  axis(1, log10(10**seq(1,5,1)), labels=c("10", "100", "1k", "10k", "100k"), cex.axis=1)
-  axis(1, log10(c(sapply(10**seq(1,5,1), minor_decade))), labels=F, tcl=-0.5)
-  axis(2, seq(0,1,0.2))
-
-  lines(seq(1,5,0.1), predict(cutoff$extremeLoess, seq(1,5,0.1)))
-}
-
-pdf("hek-input-ldist-rplot.pdf", useDingbats=F)
-
-plotBase()
-points(log10(mitoch$cdsNRead), mitoch$cdsScore,
-       pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-points(log10(miscs$trxNRead), miscs$trxScore,
-       pch=21, cex=0.75, col=solarYellow, bg=lighten(solarYellow))
-
-plotBase()
-title(main="5' leaders")
-points(log10(utr5Present$utr5NRead), utr5Present$utr5Score,
-       pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-
-plotBase()
-title(main="lincRNAs")
-points(log10(lincsPresent[lincsPresent$trxClassify != "Extreme","trxNRead"]),
-       lincsPresent[lincsPresent$trxClassify != "Extreme", "trxScore"],
-       pch=21, cex=0.75, col=solarMagenta, bg=lighten(solarMagenta))
-points(log10(lincsPresent[lincsPresent$trxClassify == "Extreme","trxNRead"]),
-       lincsPresent[lincsPresent$trxClassify == "Extreme", "trxScore"],
-       pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-
-plotBase()
-title(main="snoRNAs")
-points(log10(snornas$trxNRead), snornas$trxScore,
-       pch=21, cex=0.75, col=solarGreen, bg=lighten(solarGreen))
-
-dev.off()
-
-write.csv(pcoding[,c("cdsNRead", "cdsScore")], "tables/hek-input-floss-refcoding-cds.txt")
-write.csv(mitoch[,c("cdsNRead", "cdsScore")], "tables/hek-input-floss-mitoch-cds.txt")
-write.csv(miscs[,c("trxNRead", "trxScore")], "tables/hek-input-floss-miscs.txt")
-write.csv(pcoding[,c("utr5NRead", "utr5Score")], "tables/hek-input-floss-refcoding-utr5.txt")
-write.csv(lincsPresent[,c("trxNRead", "trxScore", "trxClassify")], "tables/hek-input-floss-lincs.txt")
-write.csv(snornas[,c("trxNRead", "trxScore")], "tables/hek-input-floss-snornas.txt")
-
-pdf("yeast-bound-ldist-rplot.pdf", useDingbats=F)
-
-yeastInputRefLDist <- normLDist(read.csv("data/yeast-pulldown-input-ref-ldist.txt", check.names=F, row.names=1)[,1])
-yeastBoundRefLDist <- normLDist(read.csv("data/yeast-pulldown-bound-ref-ldist.txt", check.names=F, row.names=1)[,1])
-hekInputRefLDist <- normLDist(read.csv("data/hek-pulldown-input-ref-ldist.txt", check.names=F, row.names=1)[,1])
-hekBoundRefLDist <- normLDist(read.csv("data/hek-pulldown-bound-ref-ldist.txt", check.names=F, row.names=1)[,1])
-
-#plot(lengthLevels, yeastInputRefLDist, type="l", col=solarGreen, lwd=2,
-#     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length")
-#lines(lengthLevels, yeastBoundRefLDist, type="l", col=solarBlue, lwd=2)
-#lines(lengthLevels, hekInputRefLDist, type="l", col=solarBlack, lwd=2)
-#lines(lengthLevels, hekBoundRefLDist, type="l", col=solarYellow, lwd=2)
-
-plot(lengthLevels, cumsum(yeastInputRefLDist), type="l", col=solarGreen, lwd=2,
-     xlim=c(26,34), ylim=c(0,1), xlab="Fragment Length")
-lines(lengthLevels, cumsum(yeastBoundRefLDist), type="l", col=solarBlue, lwd=2)
-lines(lengthLevels, cumsum(hekInputRefLDist), type="l", col=solarBlack, lwd=2)
-lines(lengthLevels, cumsum(hekBoundRefLDist), type="l", col=solarYellow, lwd=2)
-legend(x = "bottomright", legend=c("Yeast Input", "Yeast Bound", "HEK Input", "HEK Bound"),
-       col=c(solarGreen, solarBlue, solarBlack, solarYellow),
-       lwd=2)
-
-plot(log10(yeastCount$vsyeastNRead), yeastCount$vsyeastScore, xlim=c(1,log10(2e5)), ylim=c(0,1),
-     pch=".", col=solarBlue,
-     axes=F, xlab="Total Reads", ylab="FLOSS vs Yeast Input", cex.lab=1.33)
-axis(1, log10(10**seq(1,5,1)), labels=c("10", "100", "1k", "10k", "100k"), cex.axis=1)
-axis(1, log10(c(sapply(10**seq(1,5,1), minor_decade))), labels=F, tcl=-0.5)
-axis(2, seq(0,1,0.2))
-title(main="Yeast mRNAs in Bound")
-
-plot(log10(yeastCount$vshekNRead), yeastCount$vshekScore, xlim=c(1,log10(2e5)), ylim=c(0,1),
-     pch=".", col=solarMagenta,
-     axes=F, xlab="Total Reads", ylab="FLOSS vs HEK Input", cex.lab=1.33)
-axis(1, log10(10**seq(1,5,1)), labels=c("10", "100", "1k", "10k", "100k"), cex.axis=1)
-axis(1, log10(c(sapply(10**seq(1,5,1), minor_decade))), labels=F, tcl=-0.5)
-axis(2, seq(0,1,0.2))
-title(main="Yeast mRNAs in Bound")
-
-plot(log10(yeastCount$vsyeastNRead), yeastCount$vsyeastScore, xlim=c(1,log10(2e5)), ylim=c(0,1),
-     pch=".", col=solarBlue,
-     axes=F, xlab="Total Reads", ylab="FLOSS", cex.lab=1.33)
-points(log10(yeastCount$vshekNRead), yeastCount$vshekScore, pch=".", col=solarMagenta)
-axis(1, log10(10**seq(1,5,1)), labels=c("10", "100", "1k", "10k", "100k"), cex.axis=1)
-axis(1, log10(c(sapply(10**seq(1,5,1), minor_decade))), labels=F, tcl=-0.5)
-axis(2, seq(0,1,0.2))
-legend(x = "topright", legend=c("vs Yeast Input", "vs HEK Input"),
-       col=c(solarBlue, solarMagenta), pt.bg=c(solarBlue, solarMagenta), pch=21)
-title(main="Yeast mRNAs in Bound")
-
-dev.off()
-
-plotBase <- function() {
-  plot(log10(pcodingPresent$inputCdsCount), log2(pcodingPresent$boundCdsCount / pcodingPresent$inputCdsCount), xlim=c(1,log10(2e5)), ylim=c(-6,6),
-       pch=".", col=solarGrey01,
-       axes=F, xlab="Input Reads", ylab="Bound / Input", cex.lab=1.33)
-  axis(1, log10(10**seq(1,5,1)), labels=c("10", "100", "1k", "10k", "100k"), cex.axis=1)
-  axis(1, log10(c(sapply(10**seq(1,5,1), minor_decade))), labels=F, tcl=-0.5)
-  axis(2, seq(-6,6,3), labels=c("1/64", "1/8", "1", "8", "64"), cex.axis=1)
-  axis(2, seq(-6,6), labels=F, tcl=-0.5)
-}
-
-pdf("hek-pulldown-enrich-rplot.pdf", useDingbats=F)
-
-plotBase()
-abline(h=hekmed, col=lighten(solarGrey01), lwd=2)
-points(log10(yeastCount$inputTrxCount), log2(yeastCount$boundTrxCount / yeastCount$inputTrxCount),
-       pch=".", col=solarBlue)
-abline(h=yeastmed, col=lighten(solarBlue), lwd=2)
-acc1=yeastCount["YNR016C",]
-points(log10(acc1$inputTrxCount), log2(acc1$boundTrxCount / acc1$inputTrxCount),
-       pch=21, cex=0.75, col=solarBlue, bg=lighten(solarBlue))
-text(log10(acc1$inputTrxCount), log2(acc1$boundTrxCount / acc1$inputTrxCount),
-     labels="ACC1", col=solarBlue, pos=4)
-text(4, (yeastmed + hekmed), labels=sprintf("%0.1fx\n", 2**(hekmed - yeastmed)), col=solarBlue, cex=1.5, pos=4)
-text(4,5,labels="HEK CDSes", col=solarGrey01, cex=1.5)
-text(4,-5,labels="Yeast CDSes", col=solarBlue, cex=1.5)
-
-plotBase()
-points(log10(mitoch$inputCdsCount), log2(mitoch$boundCdsCount / mitoch$inputCdsCount),
-       pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-points(log10(miscs$inputTrxCount), log2(miscs$boundTrxCount / miscs$inputTrxCount),
-       pch=21, cex=0.75, col=solarYellow, bg=lighten(solarYellow))
-text(log10(miscs[miscs$inputTrxCount>100,]$inputTrxCount),
-     log2(miscs[miscs$inputTrxCount>100,]$boundTrxCount / miscs[miscs$inputTrxCount>100,]$inputTrxCount),
-     labels=miscs[miscs$inputTrxCount>100,]$external_gene_id,
-     col=solarYellow, pos=4, cex=0.75)
-text(4,4,labels="ncRNAs", col=solarYellow, cex=1.5)
-text(4,3,labels="mito CDSes", col=solarCyan, cex=1.5)
-
-plotBase()
-points(log10(snornas$inputTrxCount), log2(snornas$boundTrxCount / snornas$inputTrxCount),
-       pch=21, cex=0.50, col=solarGreen, bg=lighten(solarGreen))
-text(4,5,labels="snoRNAs", col=solarGreen, cex=1.5)
-
-plotBase()
-title(main="5' Leaders")
-points(log10(utr5Present$inputUtr5Count), log2(utr5Present$boundUtr5Count / utr5Present$inputUtr5Count),
-       pch=21, cex=0.33, col=solarRed, bg=lighten(solarRed))
-text(4,5,labels="5' leaders", col=solarRed, cex=1.5)
-
-plotBase()
-lincsGood <- lincsPresent[lincsPresent$trxClassify != "Extreme",]
-points(log10(lincsGood$inputTrxCount), log2(lincsGood$boundTrxCount / lincsGood$inputTrxCount),
-       pch=21, cex=0.75, col=solarMagenta, bg=lighten(solarMagenta))
-lincsExtreme <- lincsPresent[lincsPresent$trxClassify == "Extreme",]
-points(log10(lincsExtreme$inputTrxCount), log2(lincsExtreme$boundTrxCount / lincsExtreme$inputTrxCount),
-       pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-lincsLabel <- lincsExtreme[lincsExtreme$inputTrxCount > 100,]
-text(log10(lincsLabel$inputTrxCount), log2(lincsLabel$boundTrxCount / lincsLabel$inputTrxCount),
-     labels=lincsLabel$external_gene_id, col=solarCyan, pos=4, cex=0.75)
-text(4,5,labels="lincRNAs", col=solarMagenta, cex=1.5)
-
-dev.off()
-
-write.csv(pcodingPresent[,c("inputCdsCount", "boundCdsCount", "cdsScore")],
-          "tables/hek-pulldown-refcoding-cds.txt")
-write.csv(yeastCount[,c("inputTrxCount", "boundTrxCount")], "tables/hek-pulldown-yeast.txt")
-write.csv(mitoch[,c("inputCdsCount", "boundCdsCount", "cdsScore")],
-          "tables/hek-pulldown-mitoch-cds.txt")
-write.csv(miscs[,c("inputTrxCount", "boundTrxCount", "trxScore")],
-          "tables/hek-pulldown-miscs.txt")
-write.csv(snornas[,c("inputTrxCount", "boundTrxCount", "trxScore")],
-          "tables/hek-pulldown-snornas.txt")
-write.csv(utr5Present[,c("inputUtr5Count", "boundUtr5Count", "utr5Score")],
-          "tables/hek-pulldown-utr5s.txt")
-write.csv(lincsPresent[,c("inputTrxCount", "boundTrxCount", "trxScore", "trxClassify")],
-          "tables/hek-pulldown-lincs.txt")
-
-write.csv(lincsPresent[,c("ensembl_gene_id", "inputTrxCount", "boundTrxCount", "trxNRead", "trxScore", "trxClassify", "external_gene_id", "external_gene_db", "gene_biotype", "transcript_biotype", "chromosome_name", "start_position", "end_position", "strand", "description")],
-          "tables/hek-pulldown-lincs-annotated.txt")
-
-l2cdf <- function(x0) { l2x <- log2(x0[!is.na(x0)])
-                        cdf <- data.frame( x = l2x[order(l2x)], n = seq(1,length(l2x))/length(l2x))
-                        cdf[ cdf$x >= -5 & cdf$x < 5, ] }
-
-pdf("hek-pulldown-enrich-chist-rplot.pdf", useDingbats=F)
-
-plot(l2cdf(with(pcodingPresent, ifelse(inputCdsCount >= 100, boundCdsCount / inputCdsCount, NA))),
-     xlim=c(-4,4), ylim=c(0,1),
-     type="l", col=solarGrey01, lwd=4)
-lines(l2cdf(with(yeastCount, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarBlue, lwd=4)
-lines(l2cdf(with(miscs, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarYellow, lwd=4)
-
-plot(l2cdf(with(pcodingPresent, ifelse(inputCdsCount >= 100, boundCdsCount / inputCdsCount, NA))),
-     xlim=c(-4,4), ylim=c(0,1),
-     type="l", col=solarGrey01, lwd=4)
-title(main="5' Leaders")
-lines(l2cdf(with(miscs, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarYellow, lwd=4)
-lines(l2cdf(with(utr5Present, ifelse(inputUtr5Count >= 100, boundUtr5Count / inputUtr5Count, NA))),
-      col=solarRed, lwd=4)
-
-plot(l2cdf(with(pcodingPresent, ifelse(inputCdsCount >= 100, boundCdsCount / inputCdsCount, NA))),
-     xlim=c(-4,4), ylim=c(0,1),
-     type="l", col=solarGrey01, lwd=4)
-title(main="snornas")
-lines(l2cdf(with(miscs, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarYellow, lwd=4)
-lines(l2cdf(with(snornas, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarGreen, lwd=4)
-
-plot(l2cdf(with(pcodingPresent, ifelse(inputCdsCount >= 100, boundCdsCount / inputCdsCount, NA))),
-     xlim=c(-4,4), ylim=c(0,1),
-     type="l", col=solarGrey01, lwd=4)
-title(main="lincRNAs")
-lines(l2cdf(with(miscs, ifelse(inputTrxCount >= 100, boundTrxCount / inputTrxCount, NA))),
-      col=solarYellow, lwd=4)
-lines(l2cdf(with(lincsPresent, ifelse(inputTrxCount >= 100 & trxClassify != "Extreme", boundTrxCount / inputTrxCount, NA))),
-      col=solarMagenta, lwd=4)
-lines(l2cdf(with(lincsPresent, ifelse(inputTrxCount >= 100 & trxClassify == "Extreme", boundTrxCount / inputTrxCount, NA))),
-      col=solarCyan, lwd=4)
-
-dev.off()
diff --git a/src/FLOSS_score/count-score-hek.R b/src/FLOSS_score/count-score-hek.R
deleted file mode 100644
index 0de05eb4..00000000
--- a/src/FLOSS_score/count-score-hek.R
+++ /dev/null
@@ -1,92 +0,0 @@
-#library("biomaRt")
-
-source("../shared/transcript-utils.R")
-source("../shared/footprint-assignment.R")
-source("../shared/floss.R")
-
-options(mc.cores=32)
-
-yeastInputCount <- read.csv("data/yeast-pulldown-input.txt", row.names=1)
-yeastBoundCount <- read.csv("data/yeast-pulldown-bound.txt", row.names=1)
-
-yeastCount <- data.frame(inputTrxCount  = yeastInputCount$trx,
-                         inputCdsCount  = yeastInputCount$cds,
-                         inputUtr5Count = yeastInputCount$utr5,
-                         inputUtr3Count = yeastInputCount$utr3,
-                         row.names = row.names(yeastInputCount) )
-yeastCount[row.names(yeastBoundCount),"boundTrxCount"] <- yeastBoundCount$trx
-yeastCount[row.names(yeastBoundCount),"boundCdsCount"] <- yeastBoundCount$cds
-yeastCount[row.names(yeastBoundCount),"boundUtr5Count"] <- yeastBoundCount$utr5
-yeastCount[row.names(yeastBoundCount),"boundUtr3Count"] <- yeastBoundCount$utr3
-
-write.csv(yeastCount, "data/yeast-pulldown-count.txt", row.names=TRUE)
-
-source("human-annotation.R")
-
-hekInputCount <- read.csv("data/hek-pulldown-input.txt", row.names=1)
-hekBoundCount <- read.csv("data/hek-pulldown-bound.txt", row.names=1)
-
-row.names(hekInputCount) <- sub("[.][0-9]*$", "", row.names(hekInputCount))
-row.names(hekBoundCount) <- sub("[.][0-9]*$", "", row.names(hekBoundCount))
-
-hekCount <- data.frame( inputTrxCount  = hekInputCount$trx,
-                        inputCdsCount  = hekInputCount$cds,
-                        inputUtr5Count = hekInputCount$utr5,
-                        inputUtr3Count = hekInputCount$utr3,
-                        row.names = row.names(hekInputCount) )
-hekCount[row.names(hekBoundCount),"boundTrxCount"] <- hekBoundCount$trx
-hekCount[row.names(hekBoundCount),"boundCdsCount"] <- hekBoundCount$cds
-hekCount[row.names(hekBoundCount),"boundUtr5Count"] <- hekBoundCount$utr5
-hekCount[row.names(hekBoundCount),"boundUtr3Count"] <- hekBoundCount$utr3
-
-write.csv(hekCount, "data/hek-pulldown-count.txt", row.names=TRUE)
-
-inputHekLDists <- readRegionLengthDists("data/hek-pulldown-input")
-boundHekLDists <- readRegionLengthDists("data/hek-pulldown-bound")
-
-inputHekLDists <- lapply(inputHekLDists,
-                         function (r) { row.names(r) <- sub("[.][0-9]*$", "", row.names(r)); r })
-boundHekLDists <- lapply(boundHekLDists,
-                         function (r) { row.names(r) <- sub("[.][0-9]*$", "", row.names(r)); r })
-
-inputHekRefLDist <- colSums(inputHekLDists$cds[refCodingIds,], na.rm=TRUE)
-write.csv(inputHekRefLDist, "data/hek-pulldown-input-ref-ldist.txt", row.names=TRUE)
-
-boundHekRefLDist <- colSums(boundHekLDists$cds[refCodingIds,], na.rm=TRUE)
-write.csv(boundHekRefLDist, "data/hek-pulldown-bound-ref-ldist.txt", row.names=TRUE)
-
-inputHekLDFloss <- flossRegionLengthDists( inputHekLDists, inputHekRefLDist )
-inputHekFloss <- flossRegionScores( inputHekLDFloss )
-write.csv(inputHekFloss, "data/hek-pulldown-input-floss.txt", row.names=TRUE)
-
-boundHekLDFloss <- flossRegionLengthDists( boundHekLDists, boundHekRefLDist )
-boundHekFloss <- flossRegionScores( boundHekLDFloss )
-write.csv(boundHekFloss, "data/hek-pulldown-bound-floss.txt", row.names=TRUE)
-
-
-inputYeastLDists <- readLengthDists("data/yeast-pulldown-input-trx")
-boundYeastLDists <- readLengthDists("data/yeast-pulldown-bound-trx")
-
-inputYeastRefLDist <- colSums(inputYeastLDists, na.rm=TRUE)
-write.csv(inputYeastRefLDist, "data/yeast-pulldown-input-ref-ldist.txt", row.names=TRUE)
-
-boundYeastRefLDist <- colSums(boundYeastLDists, na.rm=TRUE)
-write.csv(boundYeastRefLDist, "data/yeast-pulldown-bound-ref-ldist.txt", row.names=TRUE)
-
-inputYeastVsYeastFloss <- flossLengthDist( inputYeastLDists, inputYeastRefLDist )
-inputYeastVsHEKFloss <- flossLengthDist( inputYeastLDists, inputHekRefLDist )
-inputYeastFloss <- data.frame(vsyeastNRead = inputYeastVsYeastFloss$NRead,
-                              vsyeastScore = inputYeastVsYeastFloss$Score,
-                              vshekNRead = inputYeastVsHEKFloss$NRead,
-                              vshekScore = inputYeastVsHEKFloss$Score,
-                              row.names = row.names(inputYeastVsYeastFloss))
-write.csv(inputYeastFloss, "data/yeast-pulldown-input-floss.txt", row.names=TRUE)
-
-boundYeastVsYeastFloss <- flossLengthDist( boundYeastLDists, inputYeastRefLDist )
-boundYeastVsHEKFloss <- flossLengthDist( boundYeastLDists, inputHekRefLDist )
-boundYeastFloss <- data.frame(vsyeastNRead = boundYeastVsYeastFloss$NRead,
-                              vsyeastScore = boundYeastVsYeastFloss$Score,
-                              vshekNRead = boundYeastVsHEKFloss$NRead,
-                              vshekScore = boundYeastVsHEKFloss$Score,
-                              row.names = row.names(boundYeastVsYeastFloss))
-write.csv(boundYeastFloss, "data/yeast-pulldown-bound-floss.txt", row.names=TRUE)
diff --git a/src/FLOSS_score/floss-calc-mesc.R b/src/FLOSS_score/floss-calc-mesc.R
deleted file mode 100644
index df603a13..00000000
--- a/src/FLOSS_score/floss-calc-mesc.R
+++ /dev/null
@@ -1,19 +0,0 @@
-library("biomaRt")
-library("GenomicRanges")
-
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-source("./src/FLOSS_score/transcript-utils.R")
-source("./src/FLOSS_score/footprint-assignment.R")
-source("./src/FLOSS_score/floss.R")
-
-source("./src/FLOSS_score/human-annotation.R")
-
-
-chxLDists <- readRegionLengthDists("results/FLOSS_score/mesc-chx")
-
-chxRef <- colSums(chxLDists$cds[refCodingIds,], na.rm=TRUE)
-write.csv(chxRef, "results/FLOSS_score/mesc-chx-ref-ldist.txt", row.names=TRUE)
-
-chxLDFloss <- flossRegionLengthDists( chxLDists, chxRef )
-chxFloss <- flossRegionScores( chxLDFloss )
-write.csv(chxFloss, "results/FLOSS_score/mesc-chx-floss.txt", row.names=TRUE)
diff --git a/src/FLOSS_score/floss-plots-mesc_IP.R b/src/FLOSS_score/floss-plots-mesc_IP.R
deleted file mode 100644
index e622c2a7..00000000
--- a/src/FLOSS_score/floss-plots-mesc_IP.R
+++ /dev/null
@@ -1,144 +0,0 @@
-library("biomaRt")
-library("GenomicRanges")
-library("zoo")
-
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-#setwd("/Xnfs/lbmcdb/Ricci_team/HIV_project")
-
-source("./src/FLOSS_score/footprint-assignment.R")
-source("./src/FLOSS_score/floss.R")
-source("./src/FLOSS_score/human-annotation.R")
-source("./src/FLOSS_score/plot-utils.R")
-
-hg38_NY5Floss <- read.csv("results/10_FLOSS_score/hg38_NY5_chx-floss_IP.txt", check.names=FALSE, row.names=1)
-
-xref <- match(row.names(hg38_NY5Floss), annots$ensembl_transcript_id)
-table(is.na(xref))
-trxs <- cbind(hg38_NY5Floss, annots[xref,])
-trxs$name <- row.names(trxs)
-
-pcoding   = trxs[trxs$name %in% codingIds & !(trxs$name %in% mitoIds) & !(trxs$name %in% dirtyPCodingIds),]
-mitoch    = trxs[trxs$name %in% codingIds &  (trxs$name %in% mitoIds),]
-lincs     = trxs[trxs$name %in% lincIds ,]
-snolincs  = trxs[trxs$name %in% snohgIds  &   trxs$transcript_biotype == "lincRNA",]
-miscs     = trxs[trxs$name %in% miscIds,]
-hiv_genes <- c("GAG","POL","VIF", "VPU", "VPR", "ENV", "REV", "VIF", "TAT", "NEF")
-hivCds    = trxs[trxs$name %in% hiv_genes,]
-hivLtr    = trxs[trxs$name %in% "LTR",]
-
-hg38_NY5Cutoffs <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
-predx <- seq(1,6,0.1)
-
-plotBase <- function(title) {
-  plot(log10(pcoding$cdsNRead), pcoding$cdsScore, xlim=c(0,6), ylim=c(0,1),
-       pch=".", col=solarGrey01,
-       axes=F, xlab="Total Reads", ylab="Fragment Length Distribution Score", cex.lab=1.33, main = title)
-  axis(1, log10(10**seq(0,6,1)), labels=c("1", "10", "100", "1k", "10k", "100k", "1M"), cex.axis=1)
-  axis(1, log10(c(sapply(10**seq(0,5,1), minor_decade))), labels=F, tcl=-0.5)
-  axis(2, seq(0,1,0.2))
-}
-
-plotDrug <- function() {
-  plotBase("Mito CDS, mics and lincs")
-  points(log10(mitoch$cdsNRead), mitoch$cdsScore,
-         pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-  points(log10(miscs$trxNRead), miscs$trxScore,
-         pch=21, cex=0.75, col=solarYellow, bg=lighten(solarYellow))
-  text(log10(miscs$trxNRead), miscs$trxScore,
-       labels=miscs$external_gene_id, col=solarYellow, pos=4 )
-  points(log10(lincs$trxNRead), lincs$trxScore,
-         pch=21, cex=0.75, col=solarMagenta, bg=lighten(solarMagenta))
-  
-  plotBase("5'UTR of protein coding mRNA")
-  points(log10(pcoding$utr5NRead), pcoding$utr5Score
-         , pch=21, cex=0.25, col=solarRed, bg=lighten(solarRed))
-}
-
-pdf("results/10_FLOSS_score/hg38_NY5-floss-rplot_IP.pdf", useDingbats=F)
-plotDrug()
-
-plotBase("5'UTR")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-pcoding$classifyUtr5 <- hg38_NY5Cutoffs$classify(pcoding$utr5NRead, pcoding$utr5Score)
-utr5Good <- pcoding[pcoding$classifyUtr5 != "Extreme" & !is.na(pcoding$classifyUtr5),]
-utr5Bad <- pcoding[pcoding$classifyUtr5 == "Extreme" & !is.na(pcoding$classifyUtr5),]
-
-points(log10(utr5Good$utr5NRead), utr5Good$utr5Score
-       , pch=21, cex=0.25, col=solarRed, bg=lighten(solarRed))
-points(log10(utr5Bad$utr5NRead), utr5Bad$utr5Score
-       , pch=21, cex=0.25, col=solarCyan, bg=lighten(solarCyan))
-
-plotBase("lincs")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-lincs$classify <- hg38_NY5Cutoffs$classify(lincs$trxNRead, lincs$trxScore)
-lincsGood <- lincs[lincs$classify != "Extreme" & !is.na(lincs$classify),]
-lincsBad <- lincs[lincs$classify == "Extreme" & !is.na(lincs$classify),]
-
-points(log10(lincsGood$trxNRead), lincsGood$trxScore,
-       pch=21, cex=0.25, col=solarMagenta, bg=lighten(solarMagenta))
-points(log10(lincsBad$trxNRead), lincsBad$trxScore,
-       pch=21, cex=0.25, col=solarGreen, bg=lighten(solarGreen))
-
-plotBase("HIV CDS")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-hivCds$classify <- hg38_NY5Cutoffs$classify(hivCds$trxNRead, hivCds$trxScore)
-hivCdsGood <- hivCds[hivCds$classify != "Extreme" & !is.na(hivCds$classify),]
-hivCdsBad <- hivCds[hivCds$classify == "Extreme" & !is.na(hivCds$classify),]
-
-points(log10(hivCdsGood$trxNRead), hivCdsGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-points(log10(hivCdsBad$trxNRead), hivCdsBad$trxScore
-       , pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-
-plotBase("HIV LTR")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-hivLtr$classify <- hg38_NY5Cutoffs$classify(hivLtr$trxNRead, hivLtr$trxScore)
-hivLtrGood <- hivLtr[hivLtr$classify != "Extreme" & !is.na(hivLtr$classify),]
-hivLtrBad <- hivLtr[hivLtr$classify == "Extreme" & !is.na(hivLtr$classify),]
-
-points(log10(hivLtrGood$trxNRead), hivLtrGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-points(log10(hivLtrBad$trxNRead), hivLtrBad$trxScore
-       , pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-
-plotBase("HIV")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-points(log10(hivCdsGood$trxNRead), hivCdsGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-points(log10(hivCdsBad$trxNRead), hivCdsBad$trxScore
-       , pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-points(log10(hivLtrGood$trxNRead), hivLtrGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarGreen))
-
-dev.off()
-
-pcoding$classifyCds <- hg38_NY5Cutoffs$classify(pcoding$cdsNRead, pcoding$cdsScore)
-table(pcoding[pcoding$cdsNRead > 100, "classifyCds"])
-sum(pcoding[pcoding$cdsNRead > 100, "classifyCds"] != "Extreme", na.rm=TRUE) / sum(!is.na(pcoding[pcoding$cdsNRead > 100, "classifyCds"]))
-
-table(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"])
-sum(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"] != "Extreme", na.rm=TRUE) / sum(!is.na(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"]))
-
-table(lincs[lincs$trxNRead > 100, "classify"])
-sum(lincs[lincs$trxNRead > 100, "classify"] != "Extreme", na.rm=TRUE) / sum(!is.na(lincs[lincs$trxNRead > 100, "classify"]))
-
-write.csv(pcoding[,c("cdsNRead", "cdsScore", "classifyCds")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-refcoding-cds_IP.txt")
-write.csv(pcoding[,c("utr5NRead", "utr5Score", "classifyUtr5")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-refcoding-utr5_IP.txt")
-write.csv(lincs[,c("trxNRead", "trxScore", "classify")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-lincs_IP.txt")
-write.csv(mitoch[,c("cdsNRead", "cdsScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-mitoch_IP.txt")
-write.csv(miscs[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-miscs_IP.txt")
-write.csv(hivCds[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-hivCds_IP.txt")
-write.csv(hivLtr[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-hivLtr_IP.txt")
-write.csv(lincs[,c("ensembl_gene_id", "trxNRead", "trxScore", "classify", "external_gene_id", "external_gene_db", "gene_biotype", "transcript_biotype", "chromosome_name", "start_position", "end_position", "strand", "description")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-lincs-annotated_IP.txt")
diff --git a/src/FLOSS_score/floss-plots-mesc.R b/src/FLOSS_score/floss-plots-template.R
similarity index 55%
rename from src/FLOSS_score/floss-plots-mesc.R
rename to src/FLOSS_score/floss-plots-template.R
index cf991cfc..197c68c2 100644
--- a/src/FLOSS_score/floss-plots-mesc.R
+++ b/src/FLOSS_score/floss-plots-template.R
@@ -2,19 +2,17 @@ library("biomaRt")
 library("GenomicRanges")
 library("zoo")
 
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-#setwd("/Xnfs/lbmcdb/Ricci_team/HIV_project")
-
 source("./src/FLOSS_score/footprint-assignment.R")
 source("./src/FLOSS_score/floss.R")
-source("./src/FLOSS_score/human-annotation.R")
+source("./src/FLOSS_score/mouse-annotation.R")
 source("./src/FLOSS_score/plot-utils.R")
 
-hg38_NY5Floss <- read.csv("results/10_FLOSS_score/hg38_NY5_chx-floss.txt", check.names=FALSE, row.names=1)
+Floss <- read.csv("results/chx-floss.txt", check.names=FALSE, row.names=1)
+row.names(Floss) <- gsub("\\.[[:digit:]]*", "", row.names(Floss))
 
-xref <- match(row.names(hg38_NY5Floss), annots$ensembl_transcript_id)
+xref <- match(row.names(Floss), annots$ensembl_transcript_id)
 table(is.na(xref))
-trxs <- cbind(hg38_NY5Floss, annots[xref,])
+trxs <- cbind(Floss, annots[xref,])
 trxs$name <- row.names(trxs)
 
 pcoding   = trxs[trxs$name %in% codingIds & !(trxs$name %in% mitoIds) & !(trxs$name %in% dirtyPCodingIds),]
@@ -22,11 +20,8 @@ mitoch    = trxs[trxs$name %in% codingIds &  (trxs$name %in% mitoIds),]
 lincs     = trxs[trxs$name %in% lincIds ,]
 snolincs  = trxs[trxs$name %in% snohgIds  &   trxs$transcript_biotype == "lincRNA",]
 miscs     = trxs[trxs$name %in% miscIds,]
-hiv_genes <- c("GAG","POL","VIF", "VPU", "VPR", "ENV", "REV", "VIF", "TAT", "NEF")
-hivCds    = trxs[trxs$name %in% hiv_genes,]
-hivLtr    = trxs[trxs$name %in% "LTR",]
 
-hg38_NY5Cutoffs <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
+Cutoffs <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
 predx <- seq(1,6,0.1)
 
 plotBase <- function(title) {
@@ -58,9 +53,9 @@ pdf("results/10_FLOSS_score/hg38_NY5-floss-rplot.pdf", useDingbats=F)
 plotDrug()
 
 plotBase("5'UTR")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
+lines(predx, predict(Cutoffs$extremeLoess, predx))
 
-pcoding$classifyUtr5 <- hg38_NY5Cutoffs$classify(pcoding$utr5NRead, pcoding$utr5Score)
+pcoding$classifyUtr5 <- Cutoffs$classify(pcoding$utr5NRead, pcoding$utr5Score)
 utr5Good <- pcoding[pcoding$classifyUtr5 != "Extreme" & !is.na(pcoding$classifyUtr5),]
 utr5Bad <- pcoding[pcoding$classifyUtr5 == "Extreme" & !is.na(pcoding$classifyUtr5),]
 
@@ -70,9 +65,9 @@ points(log10(utr5Bad$utr5NRead), utr5Bad$utr5Score
        , pch=21, cex=0.25, col=solarCyan, bg=lighten(solarCyan))
 
 plotBase("lincs")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
+lines(predx, predict(Cutoffs$extremeLoess, predx))
 
-lincs$classify <- hg38_NY5Cutoffs$classify(lincs$trxNRead, lincs$trxScore)
+lincs$classify <- Cutoffs$classify(lincs$trxNRead, lincs$trxScore)
 lincsGood <- lincs[lincs$classify != "Extreme" & !is.na(lincs$classify),]
 lincsBad <- lincs[lincs$classify == "Extreme" & !is.na(lincs$classify),]
 
@@ -81,33 +76,9 @@ points(log10(lincsGood$trxNRead), lincsGood$trxScore,
 points(log10(lincsBad$trxNRead), lincsBad$trxScore,
        pch=21, cex=0.25, col=solarGreen, bg=lighten(solarGreen))
 
-plotBase("HIV CDS")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-hivCds$classify <- hg38_NY5Cutoffs$classify(hivCds$trxNRead, hivCds$trxScore)
-hivCdsGood <- hivCds[hivCds$classify != "Extreme" & !is.na(hivCds$classify),]
-hivCdsBad <- hivCds[hivCds$classify == "Extreme" & !is.na(hivCds$classify),]
-
-points(log10(hivCdsGood$trxNRead), hivCdsGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-points(log10(hivCdsBad$trxNRead), hivCdsBad$trxScore
-       , pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-
-plotBase("HIV LTR")
-lines(predx, predict(hg38_NY5Cutoffs$extremeLoess, predx))
-
-hivLtr$classify <- hg38_NY5Cutoffs$classify(hivLtr$trxNRead, hivLtr$trxScore)
-hivLtrGood <- hivLtr[hivLtr$classify != "Extreme" & !is.na(hivLtr$classify),]
-hivLtrBad <- hivLtr[hivLtr$classify == "Extreme" & !is.na(hivLtr$classify),]
-
-points(log10(hivLtrGood$trxNRead), hivLtrGood$trxScore
-       , pch=21, cex=0.75, col=solarRed, bg=lighten(solarRed))
-points(log10(hivLtrBad$trxNRead), hivLtrBad$trxScore
-       , pch=21, cex=0.75, col=solarCyan, bg=lighten(solarCyan))
-
 dev.off()
 
-pcoding$classifyCds <- hg38_NY5Cutoffs$classify(pcoding$cdsNRead, pcoding$cdsScore)
+pcoding$classifyCds <- Cutoffs$classify(pcoding$cdsNRead, pcoding$cdsScore)
 table(pcoding[pcoding$cdsNRead > 100, "classifyCds"])
 sum(pcoding[pcoding$cdsNRead > 100, "classifyCds"] != "Extreme", na.rm=TRUE) / sum(!is.na(pcoding[pcoding$cdsNRead > 100, "classifyCds"]))
 
@@ -117,19 +88,17 @@ sum(pcoding[pcoding$utr5NRead > 100, "classifyUtr5"] != "Extreme", na.rm=TRUE) /
 table(lincs[lincs$trxNRead > 100, "classify"])
 sum(lincs[lincs$trxNRead > 100, "classify"] != "Extreme", na.rm=TRUE) / sum(!is.na(lincs[lincs$trxNRead > 100, "classify"]))
 
+dir.create("results/tables", showWarnings = FALSE)
+
 write.csv(pcoding[,c("cdsNRead", "cdsScore", "classifyCds")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-refcoding-cds.txt")
+          "results/tables/floss-refcoding-cds.txt")
 write.csv(pcoding[,c("utr5NRead", "utr5Score", "classifyUtr5")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-refcoding-utr5.txt")
+          "results/tables/floss-refcoding-utr5.txt")
 write.csv(lincs[,c("trxNRead", "trxScore", "classify")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-lincs.txt")
+          "results/tables/floss-lincs.txt")
 write.csv(mitoch[,c("cdsNRead", "cdsScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-mitoch.txt")
+          "results/tables/floss-mitoch.txt")
 write.csv(miscs[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-miscs.txt")
-write.csv(hivCds[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-hivCds.txt")
-write.csv(hivLtr[,c("trxNRead", "trxScore")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-hivLtr.txt")
-write.csv(lincs[,c("ensembl_gene_id", "trxNRead", "trxScore", "classify", "external_gene_id", "external_gene_db", "gene_biotype", "transcript_biotype", "chromosome_name", "start_position", "end_position", "strand", "description")],
-          "results/10_FLOSS_score/tables/hg38_NY5-floss-lincs-annotated.txt")
+          "results/tables/floss-miscs.txt")
+write.csv(lincs[,c("ensembl_gene_id", "trxNRead", "trxScore", "classify", "external_gene_name", "gene_biotype", "transcript_biotype", "chromosome_name", "start_position", "end_position", "strand", "description")],
+          "results/tables/floss-lincs-annotated.txt")
diff --git a/src/FLOSS_score/human-annotation.R b/src/FLOSS_score/human-annotation.R
index cfed5ff5..c40597dc 100644
--- a/src/FLOSS_score/human-annotation.R
+++ b/src/FLOSS_score/human-annotation.R
@@ -6,6 +6,7 @@ ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host
 annots <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype",
                   "chromosome_name", "start_position", "end_position", "strand", "description", "hgnc_symbol"),
                 mart=ensembl)
+colnames(annots)[length(colnames(annots))] <- "external_gene_name"
 
 granges <- GRanges(seqnames = annots$chromosome_name,
                    ranges=IRanges(start = annots$start_position, end = annots$end_position),
diff --git a/src/FLOSS_score/ldist-calc-mesc.R b/src/FLOSS_score/ldist-calc-mesc.R
deleted file mode 100644
index 013a4d47..00000000
--- a/src/FLOSS_score/ldist-calc-mesc.R
+++ /dev/null
@@ -1,76 +0,0 @@
-library("rtracklayer")
-library("Rsamtools")
-library("parallel")
-library("biomaRt")
-library("GenomicRanges")
-
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-# setwd("/Xnfs/lbmcdb/Ricci_team/HIV_project")
-source("./src/FLOSS_score/transcript-utils.R")
-source("./src/FLOSS_score/footprint-assignment.R")
-source("./src/FLOSS_score/floss.R")
-
-source("./src/FLOSS_score/human-annotation.R")
-
-
-options(mc.cores=8)
-
-# Annotation file which in bed format with no reduncance in transcripts
-hg38_ny5_BedFile <- "/media/manu/ManuDisque/HIV_project/Ribosome_Profiling/U937_bam/FLOSS/hg38_exon_uniq.bed"
-hg38_pnl4.3_BedFile <- "/media/manu/ManuDisque/HIV_project/RibosomeProfiling_2/HEK_IP_FLOSS/hg38_exon_uniq_NC_001802.1.bed"
-
-## Bamfiles need to be sorted and indexed
-## Bamfiles need to contain alignment on human AND viral genome
-chxBamFile <- BamFile("/media/manu/ManuDisque/HIV_project/Ribosome_Profiling/U937_bam/FLOSS/U937_CHX_all_hg38_NY5_sort.bam")
-chxBamFileIP <- BamFile("/media/manu/ManuDisque/HIV_project/RibosomeProfiling_2/HEK_IP_FLOSS/HEK_IP_HIV1_CHX_rep1_hg38_pNL43.bam")
-
-aoffsets <- data.frame(asite=c(11,11,11,11,12,12), row.names=seq(26,31)) # offset determined by Ribocode software
-
-# for U937 - NY5
-hg38_NY5_Bed <- import(hg38_ny5_BedFile, format="BED")
-hg38_NY5_Bed$name <- gsub("\\.[[:digit:]]","" ,hg38_NY5_Bed$name ) # remove version of transcript
-
-hg38_NY5_Size <- regionCountFrame(countSizes(defaultInsets, hg38_NY5_Bed))
-write.csv(hg38_NY5_Size, "results/10_FLOSS_score/hg38_NY5_sizes.txt", row.names=TRUE)
-
-hg38_NY5_Chx <- regionCountFrame(countAligns(aoffsets, defaultInsets, chxBamFile, hg38_NY5_Bed))
-write.csv(hg38_NY5_Chx, "results/10_FLOSS_score/hg38_NY5_chx.txt", row.names=TRUE)
-
-chxLDists <- calcTrxRegionLengthDists(chxBamFile, hg38_NY5_Bed)
-writeRegionLengthDists(chxLDists, "results/10_FLOSS_score/hg38_NY5_Chx")
-
-# for HEK - pNL4.3 -> IP
-
-hg38_pnl4.3_Bed <- import(hg38_pnl4.3_BedFile, format="BED")
-hg38_pnl4.3_Bed$name <- gsub("\\.[[:digit:]]","" ,hg38_pnl4.3_Bed$name ) # remove version of transcript
-
-hg38_NY5_Chx_IP <- regionCountFrame(countAligns(aoffsets, defaultInsets, chxBamFileIP, hg38_pnl4.3_Bed))
-write.csv(hg38_NY5_Chx_IP, "results/10_FLOSS_score/hg38_pNL43_chx_IP.txt", row.names=TRUE)
-
-chxLDists_IP <- calcTrxRegionLengthDists(chxBamFileIP, hg38_pnl4.3_Bed)
-writeRegionLengthDists(chxLDists_IP, "results/10_FLOSS_score/hg38_pNL43_chx_IP")
-
-
-
-###########
-
-chxLDists <- readRegionLengthDists("results/10_FLOSS_score/hg38_NY5_Chx")
-
-
-chxRef <- colSums(chxLDists$cds[refCodingIds,], na.rm=TRUE)
-write.csv(chxRef, "results/10_FLOSS_score/hg38_NY5_chx-ref-ldist.txt", row.names=TRUE)
-
-chxLDFloss <- flossRegionLengthDists( chxLDists, chxRef )
-chxFloss <- flossRegionScores( chxLDFloss )
-write.csv(chxFloss, "results/10_FLOSS_score/hg38_NY5_chx-floss.txt", row.names=TRUE)
-
-# for HEK - pNL4.3 -> IP
-
-chxLDists_IP_tralala <- readRegionLengthDists("results/10_FLOSS_score/hg38_pNL43_chx_IP")
-
-chxRef_IP <- colSums(chxLDists_IP_tralala$cds[refCodingIds,], na.rm=TRUE)
-write.csv(chxRef_IP, "results/10_FLOSS_score/hg38_NY5_chx-ref-ldist_IP.txt", row.names=TRUE)
-
-chxLDFloss_IP <- flossRegionLengthDists( chxLDists_IP_tralala, chxRef_IP )
-chxFloss_IP <- flossRegionScores( chxLDFloss_IP )
-write.csv(chxFloss_IP, "results/10_FLOSS_score/hg38_NY5_chx-floss_IP.txt", row.names=TRUE)
diff --git a/src/FLOSS_score/ldist-calc-template.R b/src/FLOSS_score/ldist-calc-template.R
new file mode 100644
index 00000000..501dc3f3
--- /dev/null
+++ b/src/FLOSS_score/ldist-calc-template.R
@@ -0,0 +1,47 @@
+library("rtracklayer")
+library("Rsamtools")
+library("parallel")
+library("biomaRt")
+library("GenomicRanges")
+
+source("./src/FLOSS_score/transcript-utils.R")
+source("./src/FLOSS_score/footprint-assignment.R")
+source("./src/FLOSS_score/floss.R")
+
+source("./src/FLOSS_score/mouse-annotation.R")
+
+
+options(mc.cores=8)
+
+# Annotation file which in bed format with no reduncance in transcripts
+BedFile <- "data/wgEncodeGencodeBasicVM24_chr.bed"
+
+## Bamfiles need to be sorted and indexed
+chxBamFile <- BamFile("data/RibosomeProfiling_sort.bam")
+
+aoffsets <- data.frame(asite=c(11,11,11,11,12,12), row.names=seq(26,31)) # offset determined by Ribocode software
+
+# for Samples
+Bed <- import(BedFile, format="BED")
+Bed$name <- gsub("\\.[[:digit:]]","" ,Bed$name ) # remove version of transcript
+
+Size <- regionCountFrame(countSizes(defaultInsets, Bed))
+write.csv(Size, "results/sizes.txt", row.names=TRUE)
+
+Chx <- regionCountFrame(countAligns(asiteOffsets = aoffsets, insets = defaultInsets, bamfile = chxBamFile, trxBed = Bed))
+write.csv(Chx, "results/Chx.txt", row.names=TRUE)
+
+chxLDists <- calcTrxRegionLengthDists(bamfile = chxBamFile, trxBed = Bed)
+writeRegionLengthDists(chxLDists, "results/chxLDists")
+
+###########
+
+chxLDists <- readRegionLengthDists("results/chxLDists")
+
+
+chxRef <- colSums(chxLDists$cds[refCodingIds,], na.rm = TRUE)
+write.csv(chxRef, "results/chx-ref-ldist.txt", row.names = TRUE)
+
+chxLDFloss <- flossRegionLengthDists( chxLDists, chxRef )
+chxFloss <- flossRegionScores( chxLDFloss )
+write.csv(chxFloss, "results/chx-floss.txt", row.names=TRUE)
diff --git a/src/FLOSS_score/ldist-plots-mesc.R b/src/FLOSS_score/ldist-plots-mesc.R
deleted file mode 100644
index 5a521eae..00000000
--- a/src/FLOSS_score/ldist-plots-mesc.R
+++ /dev/null
@@ -1,281 +0,0 @@
-library("biomaRt")
-library("GenomicRanges")
-library("boot")
-
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-#setwd("/Xnfs/lbmcdb/Ricci_team/HIV_project")
-
-source("./src/FLOSS_score/transcript-utils.R")
-source("./src/FLOSS_score/footprint-assignment.R")
-source("./src/FLOSS_score/floss.R")
-source("./src/FLOSS_score/plot-utils.R")
-
-source("./src/FLOSS_score/human-annotation.R")
-
-cdist <- function(ldists) { normLDist(colSums(ldists, na.rm=TRUE)) }
-
-lengthLevels <- seq(defaultLengthMin, defaultLengthMax)
-
-hg38_NY5RefLDist <- normLDist(read.csv("results/10_FLOSS_score/hg38_NY5_chx-ref-ldist.txt", check.names=F, row.names=1)[,1])
-hg38_NY5LDists <- readRegionLengthDists("results/10_FLOSS_score/hg38_NY5_Chx")
-
-hg38_NY5RefCodingCdses <- hg38_NY5LDists$cds[row.names(hg38_NY5LDists$cds) %in% refCodingIds,]
-hg38_NY5RefCodingUtr5s <- hg38_NY5LDists$utr5[row.names(hg38_NY5LDists$utr5) %in% refCodingIds,]
-hg38_NY5Lincs          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% cleanLincIds,]
-hg38_NY5MitoCdses      <- hg38_NY5LDists$cds[row.names(hg38_NY5LDists$cds) %in% mitoCodingIds,]
-
-vault <- annots[annots$hgnc_symbol == "VTRNA1-1", "ensembl_transcript_id"]
-terc <- annots[annots$hgnc_symbol == "TERC", "ensembl_transcript_id"]
-snhg5 <- annots[annots$hgnc_symbol == "SNHG5", "ensembl_transcript_id"]
-
-hg38_NY5Vault          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% vault,]
-hg38_NY5Terc           <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% terc,]
-hg38_NY5Snhg5          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% snhg5,]
-hiv_genes <- c("GAG","POL","VIF", "VPU", "VPR", "ENV", "REV", "VIF", "TAT", "NEF")
-hg38_NY5HIVCDS          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% hiv_genes,]
-hg38_NY5HIVLTR          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% "LTR",]
-
-hg38_NY5CdsDist <- cdist(hg38_NY5RefCodingCdses)
-
-pdf("./results/10_FLOSS_score/hg38_NY5-examples-rplot.pdf")
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.6), xlab="Fragment Length", main = "LincRNA")
-lines(lengthLevels, cdist(hg38_NY5Lincs), type="l", col=solarMagenta, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "5' UTR")
-lines(lengthLevels, cdist(hg38_NY5RefCodingUtr5s), type="l", col=solarRed, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Mitochondrial genes")
-lines(lengthLevels, cdist(hg38_NY5MitoCdses), type="l", col=solarCyan, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Vault")
-lines(lengthLevels, cdist(hg38_NY5Vault),
-      type="l", col=solarYellow, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Terc")
-lines(lengthLevels, cdist(hg38_NY5Terc),
-      type="l", col=solarYellow, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "Snhg5")
-lines(lengthLevels, cdist(hg38_NY5Snhg5),
-      type="l", col=solarGreen, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "HIV CDS")
-lines(lengthLevels, cdist(hg38_NY5HIVCDS),
-      type="l", col=solarBlue, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "HIV LTR")
-lines(lengthLevels, cdist(hg38_NY5HIVLTR),
-      type="l", col=solarOrange, lwd=2)
-
-dev.off()
-
-# Write data tables
-write.csv(hg38_NY5RefCodingCdses,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-cds.txt")
-write.csv(hg38_NY5Terc,            "results/10_FLOSS_score/tables/ldist-hg38_NY5-terc-trx.txt")
-write.csv(hg38_NY5Vault,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-vault-trx.txt")
-write.csv(hg38_NY5MitoCdses,       "results/10_FLOSS_score/tables/ldist-hg38_NY5-mito-cds.txt")
-write.csv(hg38_NY5Snhg5,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-snhg5-trx.txt")
-write.csv(hg38_NY5Lincs,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-linc-trx.txt")
-write.csv(hg38_NY5RefCodingUtr5s,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-utr5.txt")
-write.csv(hg38_NY5HIVCDS,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-cds.txt")
-write.csv(hg38_NY5HIVLTR,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-ltr.txt")
-
-write.csv(rbind(refCodingCdses  = cdist(hg38_NY5RefCodingCdses),
-                cleanLincs      = cdist(hg38_NY5Lincs),
-                refCodingUtr5s  = cdist(hg38_NY5RefCodingUtr5s),
-                terc            = cdist(hg38_NY5Terc),
-                vault           = cdist(hg38_NY5Vault),
-                snhg5           = cdist(hg38_NY5Snhg5),
-                hivCds          = cdist(hg38_NY5HIVCDS),
-                hivLtr          = cdist(hg38_NY5HIVLTR)),
-          "results/FLOSS_score/tables/ldist-hg38_NY5.txt")
-
-## STOP HERE ##
-## I'm not sure that I need the following code
-
-
-# # Need FLOSS to pick out real lincRNAs.
-# emetFloss <- read.csv("results/FLOSS_score/mesc-chx-floss.txt", check.names=FALSE, row.names=1)
-# emetCutoffs <- flossCutoffs(emetFloss[refCodingIds,]$cdsNRead, emetFloss[refCodingIds,]$cdsScore)
-# emetLincFloss <- emetFloss[cleanLincIds,]
-# emetLincFloss$classify <- emetCutoffs$classify(emetLincFloss$trxNRead, emetLincFloss$trxScore)
-# flossLincIds <- row.names(emetLincFloss[emetLincFloss$classify != "Extreme" & !is.na(emetLincFloss$classify),])
-#                            
-# mescChxCdsLDist <- cdist(mescChxLDists$cds[refCodingIds,])
-# mescEmetCdsLDist <- cdist(mescEmetLDists$cds[refCodingIds,])
-# 
-# mescChxUtr5LDist <- cdist(mescChxLDists$utr5[refCodingIds,])
-# mescEmetUtr5LDist <- cdist(mescEmetLDists$utr5[refCodingIds,])
-# 
-# mescChxLincLDist <- cdist(mescChxLDists$trx[flossLincIds,])
-# mescEmetLincLDist <- cdist(mescEmetLDists$trx[flossLincIds,])
-# 
-# mescChxMitoLDist <- cdist(mescChxLDists$cds[mitoCodingIds,])
-# mescEmetMitoLDist <- cdist(mescEmetLDists$cds[mitoCodingIds,])
-# 
-# yeastChxLDists <- readLengthDists("data/yeast-spike-chx-trx")
-# yeastEmetLDists <- readLengthDists("data/yeast-spike-emet-trx")
-# 
-# yeastChxLDist <- cdist(yeastChxLDists)
-# yeastEmetLDist <- cdist(yeastEmetLDists)
-# 
-# pdf("length-shift-cumul-rplot.pdf")
-# plot(lengthLevels, mescChxCdsLDist, type="l", cex=2, col=solarOrange, xlim=c(26,34), ylim=c(0,0.3), lwd=2)
-# lines(lengthLevels, mescEmetCdsLDist, cex=2, col=solarBlue, lwd=2)
-# 
-# plot(lengthLevels, cumsum(mescChxCdsLDist), type="l", cex=2, col=solarOrange, xlim=c(26,34), ylim=c(0,1.0), lwd=2)
-# lines(lengthLevels, cumsum(mescEmetCdsLDist), cex=2, col=solarBlue, lwd=2)
-# 
-# plotFour <- function( chx, emet, name ) {
-#   plot(lengthLevels, chx, type="l", cex=2, col=solarYellow, xlim=c(26,34), ylim=c(0,0.3), lwd=2, main=name)
-#   lines(lengthLevels, emet, cex=2, col=solarCyan, lwd=2)
-#   lines(lengthLevels, mescChxCdsLDist, cex=2, col=lighten(solarOrange, lfact=0.4), lwd=2)
-#   lines(lengthLevels, mescEmetCdsLDist, cex=2, col=lighten(solarBlue, lfact=0.4), lwd=2)
-#   
-#   plot(lengthLevels, cumsum(chx), type="l", cex=2, col=solarYellow, xlim=c(26,34), ylim=c(0,1.0), lwd=2, main=name)
-#   lines(lengthLevels, cumsum(emet), cex=2, col=solarCyan, lwd=2)
-#   lines(lengthLevels, cumsum(mescChxCdsLDist), cex=2, col=lighten(solarOrange, lfact=0.4), lwd=2)
-#   lines(lengthLevels, cumsum(mescEmetCdsLDist), cex=2, col=lighten(solarBlue, lfact=0.4), lwd=2)
-# 
-#   write.csv(rbind(chxCodingCds = cumsum(mescChxCdsLDist),
-#                   emetCodingCds = cumsum(mescEmetCdsLDist),
-#                   chx = cumsum(chx),
-#                   emet = cumsum(emet)),
-#             sprintf("tables/lshift-%s.txt", name))
-# }
-# 
-# plotFour( mescChxUtr5LDist, mescEmetUtr5LDist, "5' UTR" )
-# plotFour( mescChxLincLDist, mescEmetLincLDist, "All lincRNAs" )
-# plotFour( yeastChxLDist, yeastEmetLDist, "Yeast" )
-# 
-# plotFour(cdist(mescChxLDists$trx[terc,]), 
-#          cdist(mescEmetLDists$trx[terc,]), "Terc")
-# 
-# plotFour(cdist(mescChxLDists$trx[vault,]),
-#          cdist(mescEmetLDists$trx[vault,]), "Vaultrc5")
-# 
-# plotFour(cdist(mescChxLDists$trx[snhg5,]),
-#          cdist(mescEmetLDists$trx[snhg5,]), "Snhg5")
-# 
-# dev.off()
-# 
-# shiftScore <- function(ldShort, ldLong) {
-#     cldShort <- cumsum(ldShort)
-#     cldLong <- cumsum(ldLong)
-#     sum(cldShort - cldLong)
-# }
-# 
-# ldistShiftStat <- function(ldists, sampleIndex) {
-#     lastChx <- floor(length(sampleIndex) / 2)
-# 
-#     chxLD <- cdist(ldists[sampleIndex[1:lastChx],])
-#     names(chxLD) <- sub("^", "chx", names(chxLD))
-# 
-#     emetLD <- cdist(ldists[sampleIndex[(lastChx+1):(nrow(ldists))],])
-#     names(emetLD) <- sub("^", "emet", names(emetLD))
-# 
-#     v <- append(chxLD, emetLD)
-#     v <- append(v, ldistDiff(chxLD, emetLD))
-#     v <- append(v, shiftScore(chxLD, emetLD))
-#     v
-# }
-# 
-# cdsShiftBoot <- boot(data = rbind(mescChxLDists$cds[refCodingIds,],
-#                          mescEmetLDists$cds[refCodingIds,]),
-#                      statistic = ldistShiftStat,
-#                      R = 10000,
-#                      parallel = "multicore", ncpus=32)
-# 
-# utr5ShiftBoot <- boot(data = rbind(mescChxLDists$utr5[refCodingIds,],
-#                           mescEmetLDists$utr5[refCodingIds,]),
-#                       statistic = ldistShiftStat,
-#                       R = 10000,
-#                       parallel = "multicore", ncpus=32)
-# 
-# lincShiftBoot <- boot(data = rbind(mescChxLDists$trx[cleanLincIds,],
-#                           mescEmetLDists$trx[cleanLincIds,]),
-#                       statistic = ldistShiftStat,
-#                       R = 10000,
-#                       parallel = "multicore", ncpus=32)
-# 
-# yeastShiftBoot <- boot(data = rbind(yeastChxLDists, yeastEmetLDists),
-#                      statistic = ldistShiftStat,
-#                      R = 10000,
-#                      parallel = "multicore", ncpus=32)
-# 
-# capture.output(summary(cdsShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt")
-# capture.output(cdsShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(cdsShiftBoot$t[,20]) >= abs(cdsShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(cdsShiftBoot$t[,20]) < abs(cdsShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(utr5ShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(utr5ShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(utr5ShiftBoot$t[,20]) >= abs(utr5ShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(utr5ShiftBoot$t[,20]) < abs(utr5ShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(lincShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(lincShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(lincShiftBoot$t[,20]) >= abs(lincShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(lincShiftBoot$t[,20]) < abs(lincShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(yeastShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(yeastShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(yeastShiftBoot$t[,20]) >= abs(yeastShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(yeastShiftBoot$t[,20]) < abs(yeastShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# pdf("ldist-shift-bootstrap-plot.pdf")
-# 
-# bootplot <- function(b, t) {
-#     hist(b$t[,20], xlim=c(-1,1))
-#     abline(v = b$t0[20])
-#     title(main = t)
-#     
-#     boxplot(b$t[,20], ylim=c(-1,1),
-#             pars=list(outpch=21, outcex=0.5,
-#                 boxwex=0.2, boxlty = "blank", boxfill=solarGrey01, medlwd = 0.33, medcol = "white"
-#                 ))
-#     points(c(1), c(b$t0[20]), pch=21, col=solarRed, bg=lighten(solarRed))
-#     axis(side=2, at=seq(-1,1,0.1), labels = FALSE)
-#     title(main = t)
-# }
-# 
-# plot(cdsShiftBoot, index=20)
-# plot(utr5ShiftBoot, index=20)
-# plot(lincShiftBoot, index=20)
-# plot(yeastShiftBoot, index=20)
-# 
-# bootplot(cdsShiftBoot, "Coding CDS")
-# bootplot(utr5ShiftBoot, "Coding 5 UTR")
-# bootplot(lincShiftBoot, "linc")
-# bootplot(yeastShiftBoot, "Yeast")
-# 
-# boxplot(cdsShiftBoot$t[,20], utr5ShiftBoot$t[,20],
-#         lincShiftBoot$t[,20], yeastShiftBoot$t[,20],
-#         ylim=c(-1,1),
-#         names=c("CDS", "Utr5", "linc", "yeast"),
-#         pars=list(outpch=21, outcex=0.5,
-#             boxwex=0.4, boxlty = "blank", boxfill=solarGrey01, medlwd = 0.33, medcol = "white"
-#             ))
-# points(seq(1,4),
-#        c(cdsShiftBoot$t0[20], utr5ShiftBoot$t0[20],
-#          lincShiftBoot$t0[20], yeastShiftBoot$t0[20]),
-#        pch=21, col=solarRed, bg=lighten(solarRed))
-# axis(side=2, at=seq(-1,1,0.1), labels = FALSE)
-# 
-# dev.off()
diff --git a/src/FLOSS_score/ldist-plots-mesc_ip.R b/src/FLOSS_score/ldist-plots-mesc_ip.R
deleted file mode 100644
index 373c8822..00000000
--- a/src/FLOSS_score/ldist-plots-mesc_ip.R
+++ /dev/null
@@ -1,281 +0,0 @@
-library("biomaRt")
-library("GenomicRanges")
-library("boot")
-
-setwd("~/GoogleDrive/Post-Doc_ENS/Documents/RMI2/gitlab/HIV_project")
-#setwd("/Xnfs/lbmcdb/Ricci_team/HIV_project")
-
-source("./src/FLOSS_score/transcript-utils.R")
-source("./src/FLOSS_score/footprint-assignment.R")
-source("./src/FLOSS_score/floss.R")
-source("./src/FLOSS_score/plot-utils.R")
-
-source("./src/FLOSS_score/human-annotation.R")
-
-cdist <- function(ldists) { normLDist(colSums(ldists, na.rm=TRUE)) }
-
-lengthLevels <- seq(defaultLengthMin, defaultLengthMax)
-
-hg38_NY5RefLDist <- normLDist(read.csv("results/10_FLOSS_score/hg38_NY5_chx-ref-ldist_IP.txt", check.names=F, row.names=1)[,1])
-hg38_NY5LDists <- readRegionLengthDists("results/10_FLOSS_score/hg38_pNL43_chx_IP")
-
-hg38_NY5RefCodingCdses <- hg38_NY5LDists$cds[row.names(hg38_NY5LDists$cds) %in% refCodingIds,]
-hg38_NY5RefCodingUtr5s <- hg38_NY5LDists$utr5[row.names(hg38_NY5LDists$utr5) %in% refCodingIds,]
-hg38_NY5Lincs          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% cleanLincIds,]
-hg38_NY5MitoCdses      <- hg38_NY5LDists$cds[row.names(hg38_NY5LDists$cds) %in% mitoCodingIds,]
-
-vault <- annots[annots$hgnc_symbol == "VTRNA1-1", "ensembl_transcript_id"]
-terc <- annots[annots$hgnc_symbol == "TERC", "ensembl_transcript_id"]
-snhg5 <- annots[annots$hgnc_symbol == "SNHG5", "ensembl_transcript_id"]
-
-hg38_NY5Vault          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% vault,]
-hg38_NY5Terc           <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% terc,]
-hg38_NY5Snhg5          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% snhg5,]
-hiv_genes <- c("GAG","POL","VIF", "VPU", "VPR", "ENV", "REV", "VIF", "TAT", "NEF")
-hg38_NY5HIVCDS          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% hiv_genes,]
-hg38_NY5HIVLTR          <- hg38_NY5LDists$trx[row.names(hg38_NY5LDists$trx) %in% "LTR",]
-
-hg38_NY5CdsDist <- cdist(hg38_NY5RefCodingCdses)
-
-pdf("./results/10_FLOSS_score/hg38_pNL43-examples-rplot_IP.pdf")
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.6), xlab="Fragment Length", main = "LincRNA")
-lines(lengthLevels, cdist(hg38_NY5Lincs), type="l", col=solarMagenta, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "5' UTR")
-lines(lengthLevels, cdist(hg38_NY5RefCodingUtr5s), type="l", col=solarRed, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Mitochondrial genes")
-lines(lengthLevels, cdist(hg38_NY5MitoCdses), type="l", col=solarCyan, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Vault")
-lines(lengthLevels, cdist(hg38_NY5Vault),
-      type="l", col=solarYellow, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "Terc")
-lines(lengthLevels, cdist(hg38_NY5Terc),
-      type="l", col=solarYellow, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "Snhg5")
-lines(lengthLevels, cdist(hg38_NY5Snhg5),
-      type="l", col=solarGreen, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "HIV CDS")
-lines(lengthLevels, cdist(hg38_NY5HIVCDS),
-      type="l", col=solarBlue, lwd=2)
-
-plot(lengthLevels, hg38_NY5CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
-     xlim=c(26,34), ylim=c(0,0.35), xlab="Fragment Length", main = "HIV LTR")
-lines(lengthLevels, cdist(hg38_NY5HIVLTR),
-      type="l", col=solarOrange, lwd=2)
-
-dev.off()
-
-# Write data tables
-write.csv(hg38_NY5RefCodingCdses,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-cds.txt")
-write.csv(hg38_NY5Terc,            "results/10_FLOSS_score/tables/ldist-hg38_NY5-terc-trx.txt")
-write.csv(hg38_NY5Vault,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-vault-trx.txt")
-write.csv(hg38_NY5MitoCdses,       "results/10_FLOSS_score/tables/ldist-hg38_NY5-mito-cds.txt")
-write.csv(hg38_NY5Snhg5,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-snhg5-trx.txt")
-write.csv(hg38_NY5Lincs,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-linc-trx.txt")
-write.csv(hg38_NY5RefCodingUtr5s,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-utr5.txt")
-write.csv(hg38_NY5HIVCDS,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-cds.txt")
-write.csv(hg38_NY5HIVLTR,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-ltr.txt")
-
-write.csv(rbind(refCodingCdses  = cdist(hg38_NY5RefCodingCdses),
-                cleanLincs      = cdist(hg38_NY5Lincs),
-                refCodingUtr5s  = cdist(hg38_NY5RefCodingUtr5s),
-                terc            = cdist(hg38_NY5Terc),
-                vault           = cdist(hg38_NY5Vault),
-                snhg5           = cdist(hg38_NY5Snhg5),
-                hivCds          = cdist(hg38_NY5HIVCDS),
-                hivLtr          = cdist(hg38_NY5HIVLTR)),
-          "results/FLOSS_score/tables/ldist-hg38_NY5.txt")
-
-## STOP HERE ##
-## I'm not sure that I need the following code
-
-
-# # Need FLOSS to pick out real lincRNAs.
-# emetFloss <- read.csv("results/FLOSS_score/mesc-chx-floss.txt", check.names=FALSE, row.names=1)
-# emetCutoffs <- flossCutoffs(emetFloss[refCodingIds,]$cdsNRead, emetFloss[refCodingIds,]$cdsScore)
-# emetLincFloss <- emetFloss[cleanLincIds,]
-# emetLincFloss$classify <- emetCutoffs$classify(emetLincFloss$trxNRead, emetLincFloss$trxScore)
-# flossLincIds <- row.names(emetLincFloss[emetLincFloss$classify != "Extreme" & !is.na(emetLincFloss$classify),])
-#                            
-# mescChxCdsLDist <- cdist(mescChxLDists$cds[refCodingIds,])
-# mescEmetCdsLDist <- cdist(mescEmetLDists$cds[refCodingIds,])
-# 
-# mescChxUtr5LDist <- cdist(mescChxLDists$utr5[refCodingIds,])
-# mescEmetUtr5LDist <- cdist(mescEmetLDists$utr5[refCodingIds,])
-# 
-# mescChxLincLDist <- cdist(mescChxLDists$trx[flossLincIds,])
-# mescEmetLincLDist <- cdist(mescEmetLDists$trx[flossLincIds,])
-# 
-# mescChxMitoLDist <- cdist(mescChxLDists$cds[mitoCodingIds,])
-# mescEmetMitoLDist <- cdist(mescEmetLDists$cds[mitoCodingIds,])
-# 
-# yeastChxLDists <- readLengthDists("data/yeast-spike-chx-trx")
-# yeastEmetLDists <- readLengthDists("data/yeast-spike-emet-trx")
-# 
-# yeastChxLDist <- cdist(yeastChxLDists)
-# yeastEmetLDist <- cdist(yeastEmetLDists)
-# 
-# pdf("length-shift-cumul-rplot.pdf")
-# plot(lengthLevels, mescChxCdsLDist, type="l", cex=2, col=solarOrange, xlim=c(26,34), ylim=c(0,0.3), lwd=2)
-# lines(lengthLevels, mescEmetCdsLDist, cex=2, col=solarBlue, lwd=2)
-# 
-# plot(lengthLevels, cumsum(mescChxCdsLDist), type="l", cex=2, col=solarOrange, xlim=c(26,34), ylim=c(0,1.0), lwd=2)
-# lines(lengthLevels, cumsum(mescEmetCdsLDist), cex=2, col=solarBlue, lwd=2)
-# 
-# plotFour <- function( chx, emet, name ) {
-#   plot(lengthLevels, chx, type="l", cex=2, col=solarYellow, xlim=c(26,34), ylim=c(0,0.3), lwd=2, main=name)
-#   lines(lengthLevels, emet, cex=2, col=solarCyan, lwd=2)
-#   lines(lengthLevels, mescChxCdsLDist, cex=2, col=lighten(solarOrange, lfact=0.4), lwd=2)
-#   lines(lengthLevels, mescEmetCdsLDist, cex=2, col=lighten(solarBlue, lfact=0.4), lwd=2)
-#   
-#   plot(lengthLevels, cumsum(chx), type="l", cex=2, col=solarYellow, xlim=c(26,34), ylim=c(0,1.0), lwd=2, main=name)
-#   lines(lengthLevels, cumsum(emet), cex=2, col=solarCyan, lwd=2)
-#   lines(lengthLevels, cumsum(mescChxCdsLDist), cex=2, col=lighten(solarOrange, lfact=0.4), lwd=2)
-#   lines(lengthLevels, cumsum(mescEmetCdsLDist), cex=2, col=lighten(solarBlue, lfact=0.4), lwd=2)
-# 
-#   write.csv(rbind(chxCodingCds = cumsum(mescChxCdsLDist),
-#                   emetCodingCds = cumsum(mescEmetCdsLDist),
-#                   chx = cumsum(chx),
-#                   emet = cumsum(emet)),
-#             sprintf("tables/lshift-%s.txt", name))
-# }
-# 
-# plotFour( mescChxUtr5LDist, mescEmetUtr5LDist, "5' UTR" )
-# plotFour( mescChxLincLDist, mescEmetLincLDist, "All lincRNAs" )
-# plotFour( yeastChxLDist, yeastEmetLDist, "Yeast" )
-# 
-# plotFour(cdist(mescChxLDists$trx[terc,]), 
-#          cdist(mescEmetLDists$trx[terc,]), "Terc")
-# 
-# plotFour(cdist(mescChxLDists$trx[vault,]),
-#          cdist(mescEmetLDists$trx[vault,]), "Vaultrc5")
-# 
-# plotFour(cdist(mescChxLDists$trx[snhg5,]),
-#          cdist(mescEmetLDists$trx[snhg5,]), "Snhg5")
-# 
-# dev.off()
-# 
-# shiftScore <- function(ldShort, ldLong) {
-#     cldShort <- cumsum(ldShort)
-#     cldLong <- cumsum(ldLong)
-#     sum(cldShort - cldLong)
-# }
-# 
-# ldistShiftStat <- function(ldists, sampleIndex) {
-#     lastChx <- floor(length(sampleIndex) / 2)
-# 
-#     chxLD <- cdist(ldists[sampleIndex[1:lastChx],])
-#     names(chxLD) <- sub("^", "chx", names(chxLD))
-# 
-#     emetLD <- cdist(ldists[sampleIndex[(lastChx+1):(nrow(ldists))],])
-#     names(emetLD) <- sub("^", "emet", names(emetLD))
-# 
-#     v <- append(chxLD, emetLD)
-#     v <- append(v, ldistDiff(chxLD, emetLD))
-#     v <- append(v, shiftScore(chxLD, emetLD))
-#     v
-# }
-# 
-# cdsShiftBoot <- boot(data = rbind(mescChxLDists$cds[refCodingIds,],
-#                          mescEmetLDists$cds[refCodingIds,]),
-#                      statistic = ldistShiftStat,
-#                      R = 10000,
-#                      parallel = "multicore", ncpus=32)
-# 
-# utr5ShiftBoot <- boot(data = rbind(mescChxLDists$utr5[refCodingIds,],
-#                           mescEmetLDists$utr5[refCodingIds,]),
-#                       statistic = ldistShiftStat,
-#                       R = 10000,
-#                       parallel = "multicore", ncpus=32)
-# 
-# lincShiftBoot <- boot(data = rbind(mescChxLDists$trx[cleanLincIds,],
-#                           mescEmetLDists$trx[cleanLincIds,]),
-#                       statistic = ldistShiftStat,
-#                       R = 10000,
-#                       parallel = "multicore", ncpus=32)
-# 
-# yeastShiftBoot <- boot(data = rbind(yeastChxLDists, yeastEmetLDists),
-#                      statistic = ldistShiftStat,
-#                      R = 10000,
-#                      parallel = "multicore", ncpus=32)
-# 
-# capture.output(summary(cdsShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt")
-# capture.output(cdsShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(cdsShiftBoot$t[,20]) >= abs(cdsShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(cdsShiftBoot$t[,20]) < abs(cdsShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(utr5ShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(utr5ShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(utr5ShiftBoot$t[,20]) >= abs(utr5ShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(utr5ShiftBoot$t[,20]) < abs(utr5ShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(lincShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(lincShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(lincShiftBoot$t[,20]) >= abs(lincShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(lincShiftBoot$t[,20]) < abs(lincShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# capture.output(summary(yeastShiftBoot$t[,20]), file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(yeastShiftBoot$t0[20],          file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(yeastShiftBoot$t[,20]) >= abs(yeastShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# capture.output(sum(abs(yeastShiftBoot$t[,20]) < abs(yeastShiftBoot$t0[20])),
-#                file="data/mesc-bootstrap-shift.txt", append=TRUE)
-# 
-# pdf("ldist-shift-bootstrap-plot.pdf")
-# 
-# bootplot <- function(b, t) {
-#     hist(b$t[,20], xlim=c(-1,1))
-#     abline(v = b$t0[20])
-#     title(main = t)
-#     
-#     boxplot(b$t[,20], ylim=c(-1,1),
-#             pars=list(outpch=21, outcex=0.5,
-#                 boxwex=0.2, boxlty = "blank", boxfill=solarGrey01, medlwd = 0.33, medcol = "white"
-#                 ))
-#     points(c(1), c(b$t0[20]), pch=21, col=solarRed, bg=lighten(solarRed))
-#     axis(side=2, at=seq(-1,1,0.1), labels = FALSE)
-#     title(main = t)
-# }
-# 
-# plot(cdsShiftBoot, index=20)
-# plot(utr5ShiftBoot, index=20)
-# plot(lincShiftBoot, index=20)
-# plot(yeastShiftBoot, index=20)
-# 
-# bootplot(cdsShiftBoot, "Coding CDS")
-# bootplot(utr5ShiftBoot, "Coding 5 UTR")
-# bootplot(lincShiftBoot, "linc")
-# bootplot(yeastShiftBoot, "Yeast")
-# 
-# boxplot(cdsShiftBoot$t[,20], utr5ShiftBoot$t[,20],
-#         lincShiftBoot$t[,20], yeastShiftBoot$t[,20],
-#         ylim=c(-1,1),
-#         names=c("CDS", "Utr5", "linc", "yeast"),
-#         pars=list(outpch=21, outcex=0.5,
-#             boxwex=0.4, boxlty = "blank", boxfill=solarGrey01, medlwd = 0.33, medcol = "white"
-#             ))
-# points(seq(1,4),
-#        c(cdsShiftBoot$t0[20], utr5ShiftBoot$t0[20],
-#          lincShiftBoot$t0[20], yeastShiftBoot$t0[20]),
-#        pch=21, col=solarRed, bg=lighten(solarRed))
-# axis(side=2, at=seq(-1,1,0.1), labels = FALSE)
-# 
-# dev.off()
diff --git a/src/FLOSS_score/ldist-plots-template.R b/src/FLOSS_score/ldist-plots-template.R
new file mode 100644
index 00000000..48b421d7
--- /dev/null
+++ b/src/FLOSS_score/ldist-plots-template.R
@@ -0,0 +1,60 @@
+library("biomaRt")
+library("GenomicRanges")
+library("boot")
+
+source("./src/FLOSS_score/transcript-utils.R")
+source("./src/FLOSS_score/footprint-assignment.R")
+source("./src/FLOSS_score/floss.R")
+source("./src/FLOSS_score/plot-utils.R")
+
+source("./src/FLOSS_score/mouse-annotation.R")
+
+cdist <- function(ldists) { normLDist(colSums(ldists, na.rm=TRUE)) }
+
+lengthLevels <- seq(defaultLengthMin, defaultLengthMax)
+
+RefLDist <- normLDist(read.csv("results/chx-ref-ldist.txt", check.names=F, row.names=1)[,1])
+chxLDists <- readRegionLengthDists("results/chxLDists")
+rownames(chxLDists$cds) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$cds) )
+rownames(chxLDists$utr5) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$utr5) )
+rownames(chxLDists$utr3) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$utr3) )
+rownames(chxLDists$trx) <- gsub("\\.[[:digit:]]*","" ,rownames(chxLDists$trx) )
+
+LDistCdses <- chxLDists$cds[rownames(chxLDists$cds) %in% refCodingIds,]
+LDistUtr5s <- chxLDists$utr5[row.names(chxLDists$utr5) %in% refCodingIds,]
+LDistLincs <- chxLDists$trx[row.names(chxLDists$trx) %in% cleanLincIds,]
+LDistCdses <- chxLDists$cds[row.names(chxLDists$cds) %in% mitoCodingIds,]
+
+CdsDist <- cdist(LDistCdses)
+
+pdf("./results/examples-rplot.pdf")
+plot(lengthLevels, CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
+     xlim=c(26,34), ylim=c(0,0.6), xlab="Fragment Length", main = "LincRNA")
+lines(lengthLevels, cdist(LDistLincs), type="l", col=solarMagenta, lwd=2)
+
+plot(lengthLevels, CdsDist, type="l", col=lighten(solarGrey01), lwd=4,
+     xlim=c(26,34), ylim=c(0,0.5), xlab="Fragment Length", main = "5' UTR")
+lines(lengthLevels, cdist(LDistUtr5s), type="l", col=solarRed, lwd=2)
+
+dev.off()
+
+# Write data tables
+# write.csv(hg38_NY5RefCodingCdses,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-cds.txt")
+# write.csv(hg38_NY5Terc,            "results/10_FLOSS_score/tables/ldist-hg38_NY5-terc-trx.txt")
+# write.csv(hg38_NY5Vault,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-vault-trx.txt")
+# write.csv(hg38_NY5MitoCdses,       "results/10_FLOSS_score/tables/ldist-hg38_NY5-mito-cds.txt")
+# write.csv(hg38_NY5Snhg5,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-snhg5-trx.txt")
+# write.csv(Lincs,           "results/10_FLOSS_score/tables/ldist-hg38_NY5-linc-trx.txt")
+# write.csv(hg38_NY5RefCodingUtr5s,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-refcoding-utr5.txt")
+# write.csv(hg38_NY5HIVCDS,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-cds.txt")
+# write.csv(hg38_NY5HIVLTR,  "results/10_FLOSS_score/tables/ldist-hg38_NY5-hiv-ltr.txt")
+# 
+# write.csv(rbind(refCodingCdses  = cdist(hg38_NY5RefCodingCdses),
+#                 cleanLincs      = cdist(hg38_NY5Lincs),
+#                 refCodingUtr5s  = cdist(hg38_NY5RefCodingUtr5s),
+#                 terc            = cdist(hg38_NY5Terc),
+#                 vault           = cdist(hg38_NY5Vault),
+#                 snhg5           = cdist(hg38_NY5Snhg5),
+#                 hivCds          = cdist(hg38_NY5HIVCDS),
+#                 hivLtr          = cdist(hg38_NY5HIVLTR)),
+#           "results/FLOSS_score/tables/ldist-hg38_NY5.txt")
diff --git a/src/FLOSS_score/mouse-annotation.R b/src/FLOSS_score/mouse-annotation.R
index 26d22312..0d75fdd2 100644
--- a/src/FLOSS_score/mouse-annotation.R
+++ b/src/FLOSS_score/mouse-annotation.R
@@ -4,7 +4,7 @@ library("biomaRt")
 ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="mmusculus_gene_ensembl", host="uswest.ensembl.org")
 
 annots <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype",
-                "chromosome_name", "start_position", "end_position", "strand", "description", "external_gene_id", "external_gene_db"),
+                "chromosome_name", "start_position", "end_position", "strand", "description", "external_gene_name"),
                 mart=ensembl)
 
 granges <- GRanges(seqnames = annots$chromosome_name,
diff --git a/src/FLOSS_score/orf-org-plot.R b/src/FLOSS_score/orf-org-plot.R
deleted file mode 100644
index 5d1e53e8..00000000
--- a/src/FLOSS_score/orf-org-plot.R
+++ /dev/null
@@ -1,117 +0,0 @@
-source("../shared/plot-utils.R")
-
-allLincTable <- read.csv("data/mesc-lincs-aug-analysis.txt")
-lincTable <- allLincTable[!is.na(allLincTable$bestAugHarr),]
-
-allPcodingTable <- read.csv("data/mesc-pcoding-aug-analysis.txt")
-pcodingTable <- allPcodingTable[!is.na(allPcodingTable$bestAugHarr),]
-
-pdf("mesc-lincs-aug-analysis.pdf", width = 6, height = 3, useDingbats = FALSE)
-
-bestAugCodonRankHist <- hist(lincTable$bestAugCodonRank, breaks=c(1:12), plot=FALSE)
-barplot(bestAugCodonRankHist$counts, ylim=c(0,80),
-        main = "Rank of harringonine peak at AUG start (versus all codons)",
-        ylab = "Number of transcripts",
-        names.arg = c(1:11), las=3
-        )
-
-bestAugNtHist <- hist(lincTable$bestAugNt, breaks=c(seq(0,2000,200), 1e6), plot=FALSE)
-barplot(bestAugNtHist$counts, ylim=c(0,50),
-        main = "Absolute position of AUG start [nt]",
-        ylab = "Number of transcripts",
-        names.arg = c(lapply(seq(0,1800,200), function(x) { sprintf("%d-%d", x, x+200) }), ">2000"), las=3
-        )
-
-bestAugRelHist <- hist(lincTable$bestAugRel, breaks=seq(0,1,0.1), plot=FALSE)
-barplot(bestAugRelHist$counts, ylim=c(0,30),
-        main = "Relative position of AUG start",
-        ylab = "Number of transcripts",
-        names.arg = c(sapply(seq(0,9), function(i) { c(sprintf("%0.d%%", i*10)) })), las=3
-        )
-
-bestAugFractHist <- hist(lincTable$bestAugFract, breaks=seq(0,1,0.05), plot=FALSE)
-barplot(bestAugFractHist$counts, ylim=c(0,50),
-        main = "Relative index of AUG start",
-        xlab = "Relative AUG index",
-        ylab = NA,
-        names.arg = c(sapply(seq(0,9), function(i) { c("", sprintf("%0.0f%%", 5 + i*10)) })), las=3
-        )
-
-print(table(lincTable$bestAugOrder))
-print(sum(lincTable$bestAugOrder == 1))
-print(sum(lincTable$bestAugOrder >= 2 & lincTable$bestAugOrder <= 5))
-print(sum(lincTable$bestAugOrder >= 6 & lincTable$bestAugOrder <= 20))
-print(sum(lincTable$bestAugOrder >= 21 & lincTable$bestAugOrder <= 100))
-
-upTotal <- sum( lincTable$upFrame0, lincTable$upFrame1, lincTable$upFrame2, na.rm=TRUE)
-upFrame0 <- sum( lincTable$upFrame0, na.rm=TRUE) / upTotal
-upFrame1 <- sum( lincTable$upFrame1, na.rm=TRUE) / upTotal
-upFrame2 <- sum( lincTable$upFrame2, na.rm=TRUE) / upTotal
-
-orfTotal <- sum( lincTable$orfFrame0, lincTable$orfFrame1, lincTable$orfFrame2, na.rm=TRUE)
-orfFrame0 <- sum( lincTable$orfFrame0, na.rm=TRUE) / orfTotal
-orfFrame1 <- sum( lincTable$orfFrame1, na.rm=TRUE) / orfTotal
-orfFrame2 <- sum( lincTable$orfFrame2, na.rm=TRUE) / orfTotal
-
-downTotal <- sum( lincTable$downFrame0, lincTable$downFrame1, lincTable$downFrame2, na.rm=TRUE)
-downFrame0 <- sum( lincTable$downFrame0, na.rm=TRUE) / downTotal
-downFrame1 <- sum( lincTable$downFrame1, na.rm=TRUE) / downTotal
-downFrame2 <- sum( lincTable$downFrame2, na.rm=TRUE) / downTotal
-
-framing <- matrix( data = c( upFrame0, upFrame1, upFrame2,
-                     orfFrame0, orfFrame1, orfFrame2,
-                     downFrame0, downFrame1, downFrame2 ),
-                  nrow = 3, ncol = 3 )
-
-barplot(framing, beside=TRUE,
-        
-        main = "Footprint frame versus AUG start, lincRNAs",
-        ylab = "Fraction of reads",
-        ylim = c(0,0.75),
-        names.arg = c("", "Upstream", "", "", "ORF", "", "", "Downstream", "")
-        )
-
-upTotal <- sum( pcodingTable$upFrame0, pcodingTable$upFrame1, pcodingTable$upFrame2, na.rm=TRUE)
-upFrame0 <- sum( pcodingTable$upFrame0, na.rm=TRUE) / upTotal
-upFrame1 <- sum( pcodingTable$upFrame1, na.rm=TRUE) / upTotal
-upFrame2 <- sum( pcodingTable$upFrame2, na.rm=TRUE) / upTotal
-
-orfTotal <- sum( pcodingTable$orfFrame0, pcodingTable$orfFrame1, pcodingTable$orfFrame2, na.rm=TRUE)
-orfFrame0 <- sum( pcodingTable$orfFrame0, na.rm=TRUE) / orfTotal
-orfFrame1 <- sum( pcodingTable$orfFrame1, na.rm=TRUE) / orfTotal
-orfFrame2 <- sum( pcodingTable$orfFrame2, na.rm=TRUE) / orfTotal
-
-downTotal <- sum( pcodingTable$downFrame0, pcodingTable$downFrame1, pcodingTable$downFrame2, na.rm=TRUE)
-downFrame0 <- sum( pcodingTable$downFrame0, na.rm=TRUE) / downTotal
-downFrame1 <- sum( pcodingTable$downFrame1, na.rm=TRUE) / downTotal
-downFrame2 <- sum( pcodingTable$downFrame2, na.rm=TRUE) / downTotal
-
-framing <- matrix( data = c( upFrame0, upFrame1, upFrame2,
-                     orfFrame0, orfFrame1, orfFrame2,
-                     downFrame0, downFrame1, downFrame2 ),
-                  nrow = 3, ncol = 3 )
-
-barplot(framing, beside=TRUE,
-        
-        main = "Footprint frame versus AUG start, PCoding",
-        ylab = "Fraction of reads",
-        ylim = c(0,0.75),
-        names.arg = c("", "Upstream", "", "", "ORF", "", "", "Downstream", "")
-        )
-
-dev.off()
-
-pdf("mesc-linc-orf-analysis.pdf", width = 6, height = 6, useDingbats = FALSE)
-
-plot(log10(lincTable$expInOrf), log10(lincTable$obsInOrf),
-     xlab = "Expected CHX footprints in ORF",
-     ylab = "Observed CHX footprints in ORF",
-     xlim = c(0,3), ylim = c(0,3),
-     axes = F)
-abline(a=0,b=1)
-axis(1, log10(10**seq(0,3)), labels=c("1", "10", "100", "1k"), cex.axis=1)
-axis(1, log10(c(sapply(10**seq(0,2), minor_decade))), labels=F, tcl=-0.5)
-axis(2, log10(10**seq(0,3)), labels=c("1", "10", "100", "1k"), cex.axis=1)
-axis(2, log10(c(sapply(10**seq(0,2), minor_decade))), labels=F, tcl=-0.5)
-
-dev.off()
diff --git a/src/FLOSS_score/orf-organization.R b/src/FLOSS_score/orf-organization.R
deleted file mode 100644
index b9efc5b3..00000000
--- a/src/FLOSS_score/orf-organization.R
+++ /dev/null
@@ -1,205 +0,0 @@
-library("Rsamtools")
-library("Biostrings")
-library("rtracklayer")
-
-source("../shared/transcript-utils.R")
-source("../shared/footprint-assignment.R")
-source("../shared/floss.R")
-source("mouse-annotation.R")
-
-emetFloss <- read.csv("data/mesc-emet-floss.txt", check.names=FALSE, row.names=1)
-
-xref <- match(row.names(emetFloss), annots$ensembl_transcript_id)
-table(is.na(xref))
-trxs <- cbind(emetFloss, annots[xref,])
-trxs$name <- row.names(trxs)
-
-pcoding   = trxs[trxs$name %in% codingIds & !(trxs$name %in% mitoIds) & !(trxs$name %in% dirtyPCodingIds),]
-lincs     = trxs[trxs$name %in% lincIds   & !(trxs$name %in% dirtyLincIds),]
-
-emetCutoffs <- flossCutoffs(pcoding$cdsNRead, pcoding$cdsScore)
-
-pcoding$classifyTrx <- emetCutoffs$classify(pcoding$trxNRead, pcoding$trxScore)
-lincs$classifyTrx <- emetCutoffs$classify(lincs$trxNRead, lincs$trxScore)
-
-options(mc.cores=36)
-
-bedFile <- "/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/Mm.GRCm38.72.bed"
-##lincBedFile <- "data/orflincs.bed"
-mescFaFile  <- "/mnt/ingolialab/ingolia/Genomes/SpikeIn/mm10-scspike.fa"
-
-chxBamFile <- BamFile("/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_chx_mesc_genome_sorted.bam")
-harr120BamFile <- BamFile("/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_harr120_oldmesc_genome_sorted.bam")
-harr150BamFile <- BamFile("/mnt/ingolialab/ingolia/RiboIP/Alignment-mesc-130709/ribo_harr150_oldmesc_genome_sorted.bam")
-
-allBed <- import(bedFile, format="BED", asRangedData = FALSE)
-
-pcodingBed <- allBed[match(pcoding$name, allBed$name)]
-lincsBed <- allBed[match(lincs$name, allBed$name)]
-
-# A sites from Cell 2011 data sets (harringtonine)
-harrASites <- data.frame(asite=c(14,14,15,16,16,16,17,17), row.names=seq(28,35))
-chxASites <- data.frame(asite=c(16,16,17,17,17), row.names=seq(28,32))
-
-getTrxSeq <- function(faidxfile, trx) {
-  fa <- open(FaFile(faidxfile))
-  ss <- getSeq(fa, param=GRanges(seqnames=seqnames(trx), ranges=ranges(trx)))
-  if (as.factor(strand(trx)) == "-") {
-    toString(reverseComplement(unlist(ss)))
-  } else {
-    toString(unlist(ss))
-  }
-}
-
-augIndices <- function(sequ) {
-  gregexpr("ATG", sequ)[[1]]
-}
-
-harrScore <- function(harrDensity, augidx) {
-  startOcc <- sum(harrDensity[augidx + (2:4)])
-  after <- sum(harrDensity[augidx + (5:7)])
-  before <- sum(harrDensity[(max(0, augidx - 4)):(augidx + 1)])
-  ifelse( startOcc > max(0, before, after), startOcc, NA )
-}
-
-augBestScoreIndex <- function(harrDensity120, harrDensity150, sequ) {
-  augs <- augIndices(sequ)
-  augScores <- lapply(augs, function(aug) { harrScore(harrDensity120, aug) + harrScore(harrDensity150, aug) })
-  bestAug <- which.max(augScores)
-  ifelse( length(bestAug) == 1, augs[bestAug], NA )
-}
-
-inFrameStopIndex <- function(sequ, augidx) {
-  isstop <- Vectorize(function(idx) { substr(sequ, idx, idx+2) %in% c("TGA", "TAG", "TAA") })
-  frame <- seq(augidx, nchar(sequ) - 2, 3)
-  stops <- which(isstop(frame))
-  if (length(stops) > 0) frame[min(stops)] else NA
-}
-
-ntToRelativeCodonProfile <- function(ntprof, startidx) {
-  lb <- - (startidx %/% 3)
-  ub <- (1 + length(ntprof) - startidx) %/% 3
-  relprof <- data.frame( relpos = lb:ub )
-  relprof$counts <- lapply(relprof$relpos, function(rc) { sum(ntprof[max(0, startidx + 3*rc - 1):(max(0, startidx + 3*rc + 1))]) })
-  relprof
-}
-
-analyzeTrx <- function( trxGR ) {
-  sequ <- getTrxSeq(mescFaFile, trxGR)
-
-  profChx <- getASiteProfile(chxASites, chxBamFile, trxGR)
-  prof120 <- getASiteProfile(harrASites, harr120BamFile, trxGR)
-  prof150 <- getASiteProfile(harrASites, harr150BamFile, trxGR)
-
-  res <- list(trxLength = nchar(sequ),
-              trxChxTotal = sum(profChx) )
-  
-  bestAug <- augBestScoreIndex( prof120, prof150, sequ )
-  
-  if (!is.na(bestAug)) {
-    res$bestAugHarr = sum(prof120[(bestAug + 2):(bestAug + 4)], prof150[(bestAug + 2):(bestAug + 4)])
-    
-    res$bestAugNt = bestAug
-    res$bestAugRel = (bestAug - 1) / (nchar(sequ) - 1)
-    
-    augIdxs <- augIndices(sequ)
-    res$bestAugOrder = match(bestAug, augIdxs)
-    res$bestAugFract = (res$bestAugOrder - 1) / (length(augIdxs) - 1)
-
-    codonRelProf <- ntToRelativeCodonProfile(prof120 + prof150, bestAug)
-    augCounts <- codonRelProf$counts[match(1, codonRelProf$relpos)][[1]]
-    nabove <- sum(codonRelProf$counts > augCounts, na.rm=T)
-    res$bestAugCodonRank <- 1 + nabove
-
-    stopNt <- inFrameStopIndex(sequ, bestAug)
-    res$stopNt <- stopNt
-    if (is.na(stopNt)) stopNt <- nchar(sequ)
-    res$obsInOrf <- sum(profChx[(bestAug + 2):(stopNt + 1)])
-    res$expInOrf <- res$trxChxTotal * ( stopNt - bestAug ) / nchar(sequ)
-
-    res$upFrame0 <- sum(profChx[seq(bestAug + 0, 1, -3)], na.rm=TRUE)
-    res$upFrame1 <- sum(profChx[seq(bestAug + 1, 1, -3)], na.rm=TRUE)
-    res$upFrame2 <- sum(profChx[seq(bestAug - 1, 1, -3)], na.rm=TRUE)
-
-    res$orfFrame0 <- sum(profChx[seq(bestAug + 3, stopNt + 0, 3)], na.rm=TRUE)
-    res$orfFrame1 <- sum(profChx[seq(bestAug + 4, stopNt + 1, 3)], na.rm=TRUE)
-    res$orfFrame2 <- sum(profChx[seq(bestAug + 2, stopNt - 1, 3)], na.rm=TRUE)
-
-    res$downFrame0 <- sum(profChx[seq(min(stopNt + 3, nchar(sequ)), nchar(sequ), 3)], na.rm=TRUE)
-    res$downFrame1 <- sum(profChx[seq(min(stopNt + 4, nchar(sequ)), nchar(sequ), 3)], na.rm=TRUE)
-    res$downFrame2 <- sum(profChx[seq(min(stopNt + 2, nchar(sequ)), nchar(sequ), 3)], na.rm=TRUE)
-  } else {
-    res$bestAugHarr <- NA
-
-    res$bestAugNt <- NA
-    res$bestAugRel <- NA
-
-    res$bestAugOrder <- NA
-    res$bestAugFract <- NA
-
-    res$bestAugCodonRank <- NA
-
-    res$stopNt <- NA
-    res$obsInOrf <- NA
-    res$expInOrf <- NA
-
-    res$upFrame0 <- NA
-    res$upFrame1 <- NA
-    res$upFrame2 <- NA
-
-    res$orfFrame0 <- NA
-    res$orfFrame1 <- NA
-    res$orfFrame2 <- NA
-
-    res$downFrame0 <- NA
-    res$downFrame1 <- NA
-    res$downFrame2 <- NA
-  }
-
-  res
-}
-
-bedAnalysis <- function(bed) {
-    rows <- mclapply( 1:length(bed), function(idx) {
-        trx <- transcriptGRanges(bed[idx])
-        names(trx) <- bed$name[[idx]]
-        cat(sprintf("%6d\t%s\n", idx, names(trx)[[1]]))
-        res <- analyzeTrx(trx)
-        res$trx <- names(trx)[[1]]
-        res
-    })
-
-    analysis <- as.data.frame(do.call(rbind, rows))
-
-    data.frame( trx = as.vector( analysis$trx, mode="character" ),
-               trxLength = as.vector( analysis$trxLength, mode="integer" ),
-               trxChxTotal = as.vector( analysis$trxChxTotal, mode="integer" ),
-               bestAugHarr = as.vector( analysis$bestAugHarr, mode="integer" ),
-               bestAugNt = as.vector( analysis$bestAugNt, mode="integer" ),
-               bestAugRel = as.vector( analysis$bestAugRel, mode="numeric" ),
-               bestAugOrder = as.vector( analysis$bestAugOrder, mode="integer" ),
-               bestAugFract = as.vector( analysis$bestAugFract, mode="numeric" ),
-               bestAugCodonRank = as.vector( analysis$bestAugCodonRank, mode="integer" ),
-               stopNt = as.vector( analysis$stopNt, mode="integer" ),
-               obsInOrf = as.vector( analysis$obsInOrf, mode="integer" ),
-               expInOrf = as.vector( analysis$expInOrf, mode="integer" ),
-               upFrame0 = as.vector( analysis$upFrame0, mode="integer" ),
-               upFrame1 = as.vector( analysis$upFrame1, mode="integer" ),
-               upFrame2 = as.vector( analysis$upFrame2, mode="integer" ),
-               orfFrame0 = as.vector( analysis$orfFrame0, mode="integer" ),
-               orfFrame1 = as.vector( analysis$orfFrame1, mode="integer" ),
-               orfFrame2 = as.vector( analysis$orfFrame2, mode="integer" ),
-               downFrame0 = as.vector( analysis$downFrame0, mode="integer" ),
-               downFrame1 = as.vector( analysis$downFrame1, mode="integer" ),
-               downFrame2 = as.vector( analysis$downFrame2, mode="integer" )
-               )
-}
-
-pcodingTable <- bedAnalysis(pcodingBed)
-pcodingTable <- cbind.data.frame(pcoding, pcodingTable)
-write.csv(pcodingTable, "data/mesc-pcoding-aug-analysis.txt")
-
-lincsTable <- bedAnalysis(lincsBed)
-lincsTable <- cbind.data.frame(lincs, lincsTable)
-write.csv(lincsTable, "data/mesc-lincs-aug-analysis.txt")
-
diff --git a/src/FLOSS_score/scspike.sh b/src/FLOSS_score/scspike.sh
deleted file mode 100644
index b4169bf6..00000000
--- a/src/FLOSS_score/scspike.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# http://downloads.yeastgenome.org/sequence/S288C_reference/genome_releases/S288C_reference_genome_R64-1-1_20110203.tgz
-# With ugly Fasta headers converted to chr01 etc.
-
-sed 's/^>chr/>saccer/' ../Saccharomyces_cerevisiae/YeastGenome/saccharomyces_cerevisiae.fa \
-    > sac_cer_spike.fa
-
-sed 's/^chr/saccer/' ../Saccharomyces_cerevisiae/YeastGenome/saccharomyces_cerevisiae.gtf \
-    > sac_cer_spike.gtf
-
-cat ../Mus_musculus/mm9.fa sac_cer_spike.fa > mm9-scspike.fa
-cat ../Mus_musculus/mm9_knownGene.gtf sac_cer_spike.gtf > mm9-scspike_knownGene.gtf
-
-cat /mnt/sequence/genomes/mouse/mm10.fa sac_cer_spike.fa > mm10-scspike.fa
-
-cat ../Homo_sapiens/hg19.fa sac_cer_spike.fa > hg19-scspike.fa
-cat ../Homo_sapiens/hg19_knownGene.gtf sac_cer_spike.gtf > hg19-scspike_knownGene.gtf
-
-bowtie-build mm9-scspike.fa mm9-scspike
-bowtie-build hg19-scspike.fa hg19-scspike
-
-cat ../Saccharomyces_cerevisiae/sc-rrna.fa ../Mus_musculus/mm_rrna.fa > mm-scspike-rrna.fa
-cat ../Saccharomyces_cerevisiae/sc-rrna.fa ../Homo_sapiens/refseq_rrna.fa > hs-scspike-rrna.fa
-
-bowtie-build mm-scspike-rrna.fa mm-scspike-rrna
-bowtie-build hs-scspike-rrna.fa hs-scspike-rrna
-bowtie-build mm10-scspike.fa mm10-scspike
-bowtie2-build mm10-scspike.fa mm10-scspike
-- 
GitLab