Skip to content
Snippets Groups Projects

ribotish and umi_tools merge request

Closed elabaron requested to merge LBMC/RMI2/rmi2_pipelines:master into master
Compare and
41 files
+ 3937
43
Compare changes
  • Side-by-side
  • Inline
Files
41
+ 108
0
 
SHELL=/bin/bash -x
 
 
all:
 
 
.SECONDARY:
 
 
include samples.d
 
 
NEW_SAMPLES=ribo_chx_mesc ribo_emet_mesc
 
OLD_SAMPLES=ribo_chx_oldmesc ribo_emet_oldmesc ribo_harr120_oldmesc ribo_harr150_oldmesc
 
RIBO_SAMPLES=$(NEW_SAMPLES) $(OLD_SAMPLES)
 
SAMPLES=$(RIBO_SAMPLES)
 
PROJECTS=/mnt/ingolialab/FastQ/121204_SN375_0200_AC1A7LACXX/FastQ/Project_NI-nickingolia-ESdrug/ \
 
/mnt/ingolialab/FastQ/130305_SN375_0214_AC1CACACXX/FastQ/Project_NI-nickingolia/ \
 
/mnt/ingolialab/FastQ/130709_SN375_0236_BD294UACXX/FastQ-i4n3/Project_NI-nickingolia/
 
 
ribo_emet_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100426/s_3_sequence.txt.bz2
 
ribo_chx_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100426/s_5_sequence.txt.bz2
 
ribo_chx_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100809/s_4_sequence.txt.bz2
 
ribo_harr120_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_5_sequence.txt.bz2
 
ribo_harr120_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_6_sequence.txt.bz2
 
ribo_harr150_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100624/s_2_sequence.txt.bz2
 
ribo_harr150_oldmesc_trim.fq: /mnt/ingolialab/ingolia/MouseES/RawData/100706/s_2_sequence.txt.bz2
 
 
# RUNTIME CONFIGURATION
 
BOWTIE_PARALLEL=-p 32
 
TOPHAT_KEEP_TEMP=#--keep-tmp
 
 
# LINKER TRIMMING CONFIGURATION
 
NI_NI_9=CTGTAGGCACCATCAATAGA
 
 
FP_LINKER=$(NI_NI_9)
 
FP_MIN_INSERT=24
 
 
# RRNA ALIGNMENT CONFIGURATION
 
RRNA_MAQ_ERR=60
 
RRNA_SEEDLEN=23
 
RRNA_MAXREAD=51
 
 
RRNA_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/mm-scspike-rrna
 
 
# GENOME ALIGNMENT CONFIGURATION
 
TOPHAT_EBWT=/mnt/ingolialab/ingolia/Genomes/SpikeIn/mm10-scspike
 
TOPHAT_GTF=Mm.GRCm38.72-plus-scspike.gtf
 
 
# BINARIES
 
TOPHAT=tophat
 
BOWTIE=bowtie
 
SAMTOOLS=samtools
 
TAM_TO_BAM=$(SAMTOOLS) view -b -S -h -
 
 
# Fix up chromosome names for Ensembl
 
Mm.GRCm38.72.gtf: Mus_musculus.GRCm38.72.gtf
 
awk -F$$'\t' '/^[0-9XY]/ { printf("chr%s\n", $$0) } /^MT/ { gsub(/^MT/, "chrM", $$0); print }' $< > $@
 
 
$(TOPHAT_GTF): Mm.GRCm38.72.gtf /mnt/ingolialab/ingolia/Genomes/SpikeIn/sac_cer_spike.gtf
 
cat $^ > $@
 
 
samples.d: $(PROJECTS)
 
ls $(addsuffix /Sample_*/*.fastq.gz,$(PROJECTS)) | \
 
sed 's,.*Sample_\(.*\)/.*\.fastq\.gz$$,\1_trim.fq: &,' | \
 
sed s/ribo_mesc_chx/ribo_chx_mesc/ | \
 
sed s/ribo_mesc_emet/ribo_emet_mesc/ \
 
> $@
 
 
trims: $(addsuffix _trim.fq, $(SAMPLES))
 
norrnas: $(addsuffix _norrna.fq, $(SAMPLES))
 
genomic: $(addsuffix _genome_sorted.bam.bai, $(SAMPLES))
 
 
all: trims
 
all: norrnas
 
all: genomic
 
 
# RIBO LINKER TRIMMING
 
$(addsuffix _trim.fq, $(NEW_SAMPLES)): %_trim.fq:
 
mkdir -p Statistics
 
zcat $^ \
 
| fastx_clipper -Q33 -a $(FP_LINKER) -l $(FP_MIN_INSERT) -c -n -v 2>Statistics/$*_clip.txt \
 
> $@
 
 
$(addsuffix _trim.fq, $(OLD_SAMPLES)): %_trim.fq:
 
mkdir -p Statistics
 
bzcat $^ \
 
| fastx_clipper -Q33 -a $(FP_LINKER) -l $(FP_MIN_INSERT) -c -n -v 2>Statistics/$*_clip.txt \
 
> $@
 
 
# RRNA ALIGNMENTS
 
RRNA_BOWTIE_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --maqerr=$(RRNA_MAQ_ERR) --seedlen=$(RRNA_SEEDLEN)
 
 
$(addsuffix _norrna.fq, $(RIBO_SAMPLES)):%_norrna.fq: %_trim.fq
 
mkdir -p Statistics
 
bowtie $(RRNA_BOWTIE_ARGS) --un $@ --sam $(RRNA_EBWT) $< 2>Statistics/$*_rrna.txt > /dev/null
 
 
# GENOME ALIGNMENTS
 
TOPHAT_ARGS=$(BOWTIE_PARALLEL) --solexa-quals --GTF $(TOPHAT_GTF) --no-novel-juncs --library-type fr-unstranded $(TOPHAT_KEEP_TEMP)
 
 
$(addsuffix _genome/accepted_hits.bam, $(SAMPLES)): %_genome/accepted_hits.bam: %_norrna.fq $(TOPHAT_EBWT).1.ebwt $(TOPHAT_GTF)
 
mkdir -p Statistics
 
$(TOPHAT) $(TOPHAT_ARGS) --output-dir $(dir $@) $(TOPHAT_EBWT) $< 2>Statistics/$*_genome.txt
 
 
%_sorted.bam.bai: %_sorted.bam
 
$(SAMTOOLS) index $< 2>Statistics/$*_genome_index.txt
 
 
$(addsuffix _genome_sorted.bam, $(SAMPLES)): %_genome_sorted.bam: %_genome/accepted_hits.bam
 
samtools view -h $^ | \
 
grep -E '(NM:i:[01])|(^@)' | \
 
samtools view -S -b - > $@
 
Loading