diff --git a/run.sh b/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..a6ff9bdb66831a727bb7ad27a3441d712ad7747a --- /dev/null +++ b/run.sh @@ -0,0 +1,33 @@ +cd data/ +# Download the fastq +awk '{system("fasterq-dump "$0)}' SRR_Acc_List.txt +ls -l *.fastq \ + | awk '{system("gzip "$9)}' + +# Download the fasta +aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/ ./GRCh37/ +aws s3 --no-sign-request --region eu-west-1 sync s3://ngi-igenomes/igenomes/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/ ./GRCm38/ + +# Concatenate the fasta + +cat GRCh37/genome.fa \ + | sed -E 's/(>.*)/\1_GRCh37/g' \ + > GRCh37_GRCm38_mix.fa +cat GRCm38/genome.fa \ + | sed -E 's/(>.*)/\1_GRCm38/g' \ + >> GRCh37_GRCm38_mix.fa + +cd ../ + +nextflow run main.nf \ + -profile psmn \ + --workflow hicpro \ + --input data/sample.csv \ + --fasta data/GRCh37_GRCm38_mix.fa \ + --outdir results/ \ + --digestion hindiii \ + --keep_dups \ + --filter_pcr_picard \ + -resume + +