Commit f0e7bbd3 authored by elabaron's avatar elabaron
Browse files

Initial commit

parents
This diff is collapsed.
files location /Xnfs/lbmcdb/Ricci_team/HIV_project/data/RNAseq Molecular Barcode
single/paired ends paired end start read1 position
reads length 150 6 nt longueur
adaptor position 3’ read1 ^(\w{6}) code
adaptor sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
expected insert size
min insert size 50
Library name File nick-name Barcode1 name Barcode2 name Barcode1 Barcode2
U937_RNAseq_0h_rep1_R1 LineU937CHX Bcd1 RT3 end header (\w{6})$ after 6 nt read1 ^\w{6}(\w{7}) position
U937_RNAseq_6h_rep1_R1 LineU937CHX Bcd2 RT4 Name1 Seq1 Name2 Seq2
U937_RNAseq_9h_rep1_R1 LineU937CHX Bcd3 RT1 Bcd1 ATCACG RT1 TAGTG
U937_RNAseq_12h_rep1_R1 LineU937CHX Bcd4 RT2 Bcd2 CGATGT RT2 GCTAC
U937_RNAseq_15h_rep1_R1 LineU937CHX Bcd5 RT3 Bcd3 TTAGGC RT3 ATCGA
U937_RNAseq_18h_rep1_R1 LineU937CHX Bcd6 RT4 Bcd4 TGACCA RT4 CGACT
U937_RNAseq_21h_rep1_R1 LineU937CHX Bcd7 RT1 Bcd5 ACAGTG
U937_RNAseq_24h_rep1_R1 LineU937CHX Bcd8 RT2 Bcd6 GCCAAT
U937_RNAseq_0h_rep2_R1 LineU937CHX Bcd9 RT3 Bcd7 CAGATC
U937_RNAseq_6h_rep2_R1 LineU937CHX Bcd10 RT4 Bcd8 ACTTGA
U937_RNAseq_9h_rep2_R1 LineU937CHX Bcd11 RT1 Bcd9 GATCAG
U937_RNAseq_12h_rep2_R1 LineU937CHX Bcd12 RT2 Bcd10 GGCTAC
U937_RNAseq_15h_rep2_R1 LineU937CHX Bcd13 RT3 Bcd11 CTTGTA
U937_RNAseq_18h_rep2_R1 LineU937CHX Bcd14 RT4 Bcd12 ACTGAT
U937_RNAseq_21h_rep2_R1 LineU937CHX Bcd15 RT1 Bcd13 ATGAGC
U937_RNAseq_24h_rep2_R1 LineU937CHX Bcd16 RT2 Bcd14 ATTCCT
U937_RNAseq_0h_rep3_R1 LineU937CHX Bcd17 RT3 Bcd15 CAAAAG
U937_RNAseq_6h_rep3_R1 LineU937CHX Bcd18 RT4 Bcd16 CAACTA
U937_RNAseq_9h_rep3_R1 LineU937CHX Bcd19 RT1 Bcd17 CACCGG
U937_RNAseq_12h_rep3_R1 LineU937CHX Bcd20 RT2 Bcd18 CACGAT
U937_RNAseq_15h_rep3_R1 LineU937CHX Bcd21 RT3 Bcd19 CACTCA
U937_RNAseq_18h_rep3_R1 LineU937CHX Bcd22 RT4 Bcd20 CAGGCG
U937_RNAseq_21h_rep3_R1 LineU937CHX Bcd23 RT1 Bcd21 CATGGC
U937_RNAseq_24h_rep3_R1 LineU937CHX Bcd24 RT2 Bcd22 CATTTT
Bcd23 CCAACA
Bcd24 GATGCT
File nick-name File name
LineU937CHX U937_R1_001.fastq
This diff is collapsed.
This diff is collapsed.
files location /Xnfs/lbmcdb/Ricci_team/HIV_project/data/RNAseq Molecular Barcode
single/paired ends paired end start read1 position
reads length 150 6 nt longueur
adaptor position 3’ read1 ^(\w{6}) code
adaptor sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
expected insert size
min insert size 50
Library name File nick-name Barcode1 name Barcode2 name Barcode1 Barcode2
U937_RNAseq_0h_rep1 LineU937CHX Bcd1 RT3 end header (\w{6})$ after 6 nt read1 ^\w{6}(\w{7}) position
U937_RNAseq_3h_rep1 LineU937CHX Bcd2 RT4 Name1 Seq1 Name2 Seq2
U937_RNAseq_6h_rep1 LineU937CHX Bcd3 RT1 Bcd1 ATCACG RT1 TAGTG
U937_RNAseq_9h_rep1 LineU937CHX Bcd4 RT2 Bcd2 CGATGT RT2 GCTAC
U937_RNAseq_12h_rep1 LineU937CHX Bcd5 RT3 Bcd3 TTAGGC RT3 ATCGA
U937_RNAseq_15h_rep1 LineU937CHX Bcd6 RT4 Bcd4 TGACCA RT4 CGACT
U937_RNAseq_18h_rep1 LineU937CHX Bcd7 RT1 Bcd5 ACAGTG
U937_RNAseq_24h_rep1 LineU937CHX Bcd8 RT2 Bcd6 GCCAAT
U937_RNAseq_0h_rep2 LineU937CHX Bcd9 RT3 Bcd7 CAGATC
U937_RNAseq_3h_rep2 LineU937CHX Bcd10 RT4 Bcd8 ACTTGA
U937_RNAseq_6h_rep2 LineU937CHX Bcd11 RT1 Bcd9 GATCAG
U937_RNAseq_9h_rep2 LineU937CHX Bcd12 RT2 Bcd10 GGCTAC
U937_RNAseq_12h_rep2 LineU937CHX Bcd13 RT3 Bcd11 CTTGTA
U937_RNAseq_15h_rep2 LineU937CHX Bcd14 RT4 Bcd12 ACTGAT
U937_RNAseq_18h_rep2 LineU937CHX Bcd15 RT1 Bcd13 ATGAGC
U937_RNAseq_24h_rep2 LineU937CHX Bcd16 RT2 Bcd14 ATTCCT
U937_RNAseq_0h_rep3 LineU937CHX Bcd17 RT3 Bcd15 CAAAAG
U937_RNAseq_3h_rep3 LineU937CHX Bcd18 RT4 Bcd16 CAACTA
U937_RNAseq_6h_rep3 LineU937CHX Bcd19 RT1 Bcd17 CACCGG
U937_RNAseq_9h_rep3 LineU937CHX Bcd20 RT2 Bcd18 CACGAT
U937_RNAseq_12h_rep3 LineU937CHX Bcd21 RT3 Bcd19 CACTCA
U937_RNAseq_15h_rep3 LineU937CHX Bcd22 RT4 Bcd20 CAGGCG
U937_RNAseq_18h_rep3 LineU937CHX Bcd23 RT1 Bcd21 CATGGC
U937_RNAseq_24h_rep3 LineU937CHX Bcd24 RT2 Bcd22 CATTTT
Bcd23 CCAACA
Bcd24 GATGCT
File nick-name File name
LineU937CHX U937_R1_001.fastq
This diff is collapsed.
profiles {
sge {
process{
withName: demultiplexing {
beforeScript = "source ~/.bashrc"
executor = "sge"
cpus = 8
queue = 'E5-26*'
penv = 'openmp8'
}
withName: trimming_bcd {
beforeScript = "source ~/.bashrc"
executor = "sge"
cpus = 8
queue = '*E5-26*'
penv = 'openmp8'
}
withName: R2_splitting {
beforeScript = "source ~/.bashrc ; module load Python/3.6.1"
executor = "sge"
cpus = 8
queue = 'E5-26*'
penv = 'openmp8'
}
}
}
docker {
docker.temp = 'auto'
docker.enabled = true
process {
withName: demultiplexing {
container = "rmi_splitter:1.0.0"
}
withName: trimming_bcd {
container = "rmi_splitter:1.0.0"
}
withName: R2_splitting {
container = "rmi_splitter:1.0.0"
}
}
}
}
/*
* RNAseq Analysis pipeline
*/
/* demultiplexing */
params.fastq = "data/RNA_seq_{sub_R1.fastq,barcodes.csv,Index.fastq}"
log.info "R1 fastq files : ${params.fastq}"
params.script="src/rmi_splitter.sh"
log.info "R1 splitting script : ${params.script}"
Channel
.fromFilePairs( params.fastq, size: 3 )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath (params.script)
.ifEmpty { error "Cannot find any script matching: ${params.script}" }
.set {script_flow}
process demultiplexing {
tag "$pair_id"
publishDir "results/RNAseq/U937/rmi_splitter", mode : 'copy'
input:
set pair_id, file(fastq) from fastq_files
file script from script_flow
output:
file "*.fastq" into fastq_files_demultiplex
file "*.{log,tmp}" into log_files_demultiplex
script:
"""
bash ${script} -c ${fastq[1]} -j $pair_id -i ${fastq[2]}\
--indexFile ${fastq[0]}\
--fast
"""
}
fastq_files_demultiplex
.flatten ()
.map{it -> [it.baseName,it]}
.set {fastq_files_demultiplex_trim}
/* Trimming after barcode splitting */
process trimming_bcd {
tag "$file_id"
cpus 4
publishDir "results/RNAseq/U937/01_demultiplexing", mode : 'copy'
input:
set file_idi, file(fastq) from fastq_files_demultiplex_trim
output:
file "*.fastq" into R1
script:
file_id=file_idi[0..-7]
"""
fastx_trimmer -i ${fastq} -f 14 > ${file_id}.fastq
rm ${fastq}
"""
}
/* splitting R2 file */
params.R2fastq = "data/RNA_seq_sub_R2.fastq"
log.info "R2 fastq file : ${params.R2fastq}"
params.R2script = "src/splitting_R2.py"
log.info "R2 splitting script : ${params.R2script}"
Channel
.fromPath (params.R2fastq)
.ifEmpty { error : "Cannot find any file matching ${params.R2fastq}" }
.set {R2}
Channel
.fromPath (params.R2script)
.ifEmpty { error : "Cannot find any file matching ${params.R2script}" }
.set {R2script_flow}
process R2_splitting {
tag "R2_fastq.baseName"
publishDir "results/RNAseq/U937/01_demultiplexing", mode : 'copy'
input :
file R2_fastq from R2
file R1_fastq from R1.toList()
file R2script from R2script_flow
output :
file "*" into splitted_reads
script :
"""
python ${R2script} -f ${R2_fastq} -d ./
"""
}
#! /bin/bash
##########################################################
################ NEED TO BE IMPLEMETED ###################
## paired-end files
##
##
################## Paramétrage ###########################
# Arguments : nom du fichier de config
USAGE="rmisplitter.sh [OPTIONS] -c [configFile] -j [nameJob] -i [inputFile]
OPTIONS :
--fast search only the barcode combinaison given for each sample. Run faster but can not find barcode contaminations
--num-bcd N number of barcode used (1 or 2) (default : 2 ; barcode 1 is in the header)
--paired-end <paired file>
--indexFile <Index File> If Illumina Indexes are not in the header but in a separate file"
### DEFAULT OPTION
NUMBERBARCODE=2
FAST=FALSE
pairedEnd=FALSE
bcd1InSeparateFile=FALSE
indexFile=""
### READ OPTIONS FROM TERMINAL
while test $# -gt 0; do
case $1 in
-i)
shift
initial_fastq=$1
shift
;;
-c)
shift
configFile=$1
shift
;;
-j)
shift
jobname=$1
shift
;;
--fast)
FAST=true
shift
;;
--num-bcd)
shift
NUMBERBARCODE=$1
shift
;;
--paired-end)
pairedEnd=true
shift
paire2=$1
shift
;;
--indexFile)
bcd1InSeparateFile=true
shift
indexFile=$1
shift
;;
*)
echo "invalide option : $1"
echo "$USAGE"
exit 0
esac
done
### READ OPTIONS FROM CONFIG FILES
if [[ -z $configFile && -z $jobname ]]
then
echo "ERROR : missing argument(s)"
echo "USAGE : $USAGE"
exit 1
fi
### Récupération dans le fichier config des paramètres globaux ###
workdir=$(awk -F "\t" '$2 ~ /files location/{print $3}' $configFile)
seq_adaptor=$(awk -F "\t" '$2 ~ /adaptor sequence/{print $3}' $configFile)
min_read_length=$((6+7+$(awk -F "\t" '$2 ~ /min insert size/ {print $3}' $configFile)))
### log informations ###
log="$jobname.log"
echo "informations are in the log file : $log"
date > $log
echo "parameters used :
input file : $initial_fastq
Job Name : $jobname
Config File : $configFile
fast : $FAST
number of barcodes used : $NUMBERBARCODE
paired-end : $pairedEnd
bcd1 in separate file : $bcd1InSeparateFile
Index File : $indexFile
" >> $log
echo "
Config file : $configFile
workdir : $workdir
seq_adaptor : $seq_adaptor
min_read_length : $min_read_length
" >> $log
###############################################
######### RECUPERATION DES BARCODES ###########
###############################################
# name1 = nom des barcode 1 (barcode PCR)
## la double parenthèse ($(...)) permet de récupérer la sortie de awk dans un tableau
name1=($(awk -F "\t" 'NR > 12 && NR < 48 && $7 ~ /[a-z]/ {print $7}' $configFile))
# récupération des séquences des barcodes en deux temps :
## 1. dans un tableau classique
tmp1=($(awk -F "\t" 'NR > 12 && NR < 48 && $8 ~ /[A|T|G|C]/ {print $8}' $configFile))
## 2. on passe les données d'un tableau classique à un tableau associatif (les indices ne sont pas des chiffres mais une chaine de caractère
## les indices sont le nom des barcodes de RT
declare -A seq1 # déclaration du tableau associatif
for ((i=0; i<${#name1[@]}; i++ )); do # ${#name1[@]} retourne la longeueur du tableau
seq1[${name1[$i]}]=${tmp1[$i]} # ${name1[$i]} retourne le nom du barcode -> on l'utilise comme indice du tableau seq1
done
if [ $NUMBERBARCODE -eq 2 ] ; then
# idem pour les barcodes de RT
name2=($(awk -F "\t" 'NR > 12 && NR < 48 && $9 ~ /[a-z]|[A-Z]/ {print $9}' $configFile))
tmp2=($(awk -F "\t" 'NR > 12 && NR < 48 && $(10) ~ /[A|T|G|C]/ {print $(10)}' $configFile))
declare -A seq2
for ((i=0; i<${#name2[@]}; i++ )); do
seq2[${name2[$i]}]=${tmp2[$i]}
done
fi
### récupération des informations sur les bibliothèques ###
libraries=($(awk -F "\t" 'NR>10&&NR<48&&$2~/[a-z]/ {print $2}' $configFile))
lib_bcd1=($(awk -F "\t" 'NR>10&&NR<48&&$2~/[a-z]/ {print $4}' $configFile))
if [ $NUMBERBARCODE -eq 2 ] ; then
lib_bcd2=($(awk -F "\t" 'NR>10&&NR<48&&$2~/[a-z]/ {print $5}' $configFile))
fi
lib_file=($(awk -F "\t" 'NR>10&&NR<48&&$2~/[a-z]/ {print $2}' $configFile))
### Nombre de paramètres récupérés :
echo "Bcd1 used : ${#name1[@]}" >> $log
if [ $NUMBERBARCODE -eq 2 ] ; then
echo "Bcd2 used : ${#name2[@]}" >> $log
fi
echo "libraries to analyse : ${#libraries[@]}" >> $log
# tester si toutes les cases sont remplies
if [ $NUMBERBARCODE -eq 1 ] ; then
if [ ${#libraries[@]} != ${#lib_bcd1[@]} ] ||
[ ${#libraries[@]} != ${#lib_file[@]} ] ; then
>&2 echo "Missing data in the config file"
exit 1
fi
elif [ $NUMBERBARCODE -eq 2 ] ; then
if [ ${#libraries[@]} != ${#lib_bcd1[@]} ] ||
[ ${#libraries[@]} != ${#lib_bcd2[@]} ] ||
[ ${#libraries[@]} != ${#lib_file[@]} ] ; then
>&2 echo "Missing data in the config file"
exit 1
fi
fi
echo "done" >> $log
###############################################################################
############# PRE TRAITEMENT DES FICHIERS FASTA ###############################
###############################################################################
### création du fichier de paramétrage pour le splittage par barcode
echo "Creating splitting parameters file
" >> $log
if [ $NUMBERBARCODE -eq 1 ] ; then
for (( i=0; i<${#libraries[@]}; i++ )); do
echo -e "${libraries[$i]} \t ${seq1[${lib_bcd1[$i]}]}" >> bcd1Param.tmp
done
elif [ $NUMBERBARCODE -eq 2 ] && [ $FAST == FALSE ] ; then
for (( i=0 ; i<${#name1[@]} ; i++ )) ; do
echo -e "${name1[$i]} \t ${seq1[${name1[$i]}]} " >> bcd1Param.tmp
done
for (( i=0 ; i<${#name2[@]} ; i++ )) ; do
echo -e "${name2[$i]} \t ${seq2[${name2[$i]}]} " >> bcd2Param.tmp
done
elif [ $NUMBERBARCODE -eq 2 ] && [ $FAST == true ]; then
for (( i=0; i<${#libraries[@]}; i++ )); do
echo -e "${libraries[$i]} \t ${seq1[${lib_bcd1[$i]}]}${seq2[${lib_bcd2[$i]}]}" >> bcd1Param.tmp
done
fi
echo 'done' >> $log
date >> $log
### barcode splitting ###
echo "Beginning barcode splitting" >> $log
if [ $bcd1InSeparateFile == FALSE ] ; then
if [ $NUMBERBARCODE -eq 1 ] || ([ $NUMBERBARCODE -eq 2 ] && [ $FAST == true ]) ; then
fastx_clipper -v -a $seq_adaptor -l $min_read_length -i $initial_fastq 2>> $log | # enlève l'adaptateur puis les séquences inférieurs à 30 nt ( 6 bc moleculaire + 7 bcd RT + 18 insert)
fastx_trimmer -f 7 | # enlève le barcode moléculaire (je n'enleve pas les duplicats de PCR)
awk '$1 {print $0 ;
if ($0 ~ /@/) {
bcd = substr($2,length($2)-6+1,6);
getline ;
print bcd$1;
}
if ($1 ~ /+/) {
getline ;
print "xxxxxx"$1;
}
}' | # commande awk copie le barcode du header dans la sequence
fastx_barcode_splitter.pl -bcfile bcd1Param.tmp --mismatches 2 --prefix ./ --suffix "_bcded.fastq" --bol >> $log # split par barcode : si plusieurs correspondent à cause des mismatch permis, prends celui qui a le moins
:' if [ $NUMBERBARCODE -eq 1 ] ; then
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${libraries[$i]}_bcded.fastq ] ; then
fastx_trimmer -i ./${libraries[$i]}_bcded.fastq -f 8 > ${libraries[$i]}.fastq
rm ./${libraries[$i]}_bcded.fastq
else
continue
fi
done
elif [$NUMBERBARCODE -eq 2 ] ; then
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${libraries[$i]}_bcded.fastq ] ; then
fastx_trimmer -i ./${libraries[$i]}_bcded.fastq -f 13 > ${libraries[$i]}.fastq
rm ./${libraries[$i]}_bcded.fastq
else
continue
fi
done
fi
'
elif [ $NUMBERBARCODE -eq 2 ] && [ $FAST == FALSE ] ; then
mkdir toremove
fastx_clipper -v -a $seq_adaptor -l $min_read_length -i $initial_fastq 2>> $log | # enlève l'adaptateur puis les séquences inférieurs à 30 nt ( 6 bc moleculaire + 7 bcd RT + 18 insert)
fastx_trimmer -f 7 | # enlève le barcode moléculaire (je n'enleve pas les duplicats de PCR)
awk '$1 {print $0 ;
if ($0 ~ /@/) {
bcd = substr($2,length($2)-6+1,6);
getline ;
print bcd$1;
}
if ($1 ~ /+/) {
getline ;
print "xxxxxx"$1;
}
}' | # commande awk copie le barcode du header dans la sequence
fastx_barcode_splitter.pl -bcfile bcd1Param.tmp --mismatches 2 --prefix ./toremove/ --suffix ".fastq" --bol >> $log # split par barcode : si plusieurs correspondent à cause des mismatch permis, prends celui qui a le moins
cd toremove
for f in *.fastq ; do
if [ $(cat $f | wc -l) -gt 0 ] ; then
lib=${f::-6}
cat $f | fastx_trimmer -f 7 | # on enlève le barcode de PCR pour le second trimming
fastx_barcode_splitter.pl -bcfile ../bcd2Param.tmp --mismatches 1 --prefix ../$lib- --suffix ".fastq" --bol >> $log
else
continue
fi
done
cd ..
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${lib_bcd1[$i]}-${lib_bcd2[$i]}.fastq ] ; then
fastx_trimmer -i ./${lib_bcd1[$i]}-${lib_bcd2[$i]}.fastq -f 8 > ${libraries[$i]}.fastq
else
continue
fi
done
rm -r toremove
for f in * ; do
if [ ! -s $f ] ; then
rm $f
else
continue
fi
done
fi
elif [ $bcd1InSeparateFile == true ] ; then
if [ $NUMBERBARCODE -eq 1 ] || ([ $NUMBERBARCODE -eq 2 ] && [ $FAST == true ]) ; then
awk '$1 ~ /@/ {getline ; print ":"$1"\n\n\n"}' $indexFile | \
paste -d "" $initial_fastq - |
fastx_clipper -v -a $seq_adaptor -l $min_read_length 2>> $log | # enlève l'adaptateur puis les séquences inférieurs à 30 nt ( 6 bc moleculaire + 7 bcd RT + 18 insert)
fastx_trimmer -f 7 | # enlève le barcode moléculaire (je n'enleve pas les duplicats de PCR)
awk '$1 {print $0 ;
if ($0 ~ /@/) {
bcd = substr($2,length($2)-6+1,6);
getline ;
print bcd$1;
}
if ($1 ~ /+/) {
getline ;
print "xxxxxx"$1;
}
}' | # commande awk copie le barcode du header dans la sequence
fastx_barcode_splitter.pl -bcfile bcd1Param.tmp --mismatches 2 --prefix ./ --suffix "_bcded.fastq" --bol >> $log # split par barcode : si plusieurs correspondent à cause des mismatch permis, prends celui qui a le moins
:' if [ $NUMBERBARCODE -eq 1 ] ; then
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${libraries[$i]}_bcded.fastq ] ; then
fastx_trimmer -i ./${libraries[$i]}_bcded.fastq -f 8 > ${libraries[$i]}.fastq
rm ./${libraries[$i]}_bcded.fastq
else
continue
fi
done
elif [$NUMBERBARCODE -eq 2 ] ; then
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${libraries[$i]}_bcded.fastq ] ; then
fastx_trimmer -i ./${libraries[$i]}_bcded.fastq -f 13 > ${libraries[$i]}.fastq
rm ./${libraries[$i]}_bcded.fastq
else
continue
fi
done
fi
'
elif [ $NUMBERBARCODE -eq 2 ] && [ $FAST == FALSE ] ; then
mkdir toremove
awk '$1 ~ /@/ {getline ; print ":"$1"\n\n\n"}' $indexFile | \
paste -d "" $initial_fastq - |
fastx_clipper -v -a $seq_adaptor -l $min_read_length 2>> $log | # enlève l'adaptateur puis les séquences inférieurs à 30 nt ( 6 bc moleculaire + 7 bcd RT + 18 insert)
fastx_trimmer -f 7 | # enlève le barcode moléculaire (je n'enleve pas les duplicats de PCR)
awk '$1 {print $0 ;
if ($0 ~ /@/) {
bcd = substr($2,length($2)-6+1,6);
getline ;
print bcd$1;
}
if ($1 ~ /+/) {
getline ;
print "xxxxxx"$1;
}
}' | # commande awk copie le barcode du header dans la sequence
fastx_barcode_splitter.pl -bcfile bcd1Param.tmp --mismatches 2 --prefix ./toremove/ --suffix ".fastq" --bol >> $log # split par barcode : si plusieurs correspondent à cause des mismatch permis, prends celui qui a le moins
cd toremove
for f in *.fastq ; do
if [ $(head $f | wc -l) -gt 0 ] ; then
lib=${f::-6}
cat $f | fastx_trimmer -f 7 | # on enlève le barcode de PCR pour le second trimming
fastx_barcode_splitter.pl -bcfile ../bcd2Param.tmp --mismatches 1 --prefix ../$lib- --suffix ".fastq" --bol >> $log
else
continue
fi
done
cd ..
for (( i=0 ; $i < ${#libraries[@]} ; i++ )) ; do
if [ -s ./${lib_bcd1[$i]}-${lib_bcd2[$i]}.fastq ] ; then
fastx_trimmer -i ./${lib_bcd1[$i]}-${lib_bcd2[$i]}.fastq -f 8 > ${libraries[$i]}.fastq
else
continue
fi
done
rm -r toremove
for f in * ; do
if [ ! -s $f ] ; then