From 75f2e6d4ad52470661b35fe7806f2adce7f7bbeb Mon Sep 17 00:00:00 2001 From: nservant <nicolas.servant@curie.fr> Date: Tue, 26 Mar 2019 19:40:53 +0100 Subject: [PATCH] first simple version from fastq to maps --- bin/build_matrix | Bin 0 -> 64760 bytes bin/cutsite_trimming | Bin 0 -> 20080 bytes bin/mapped_2hic_fragments.py | 837 +++++++++++++++++++++++++ bin/mergeSAM.py | 326 ++++++++++ bin/scrape_software_versions.py | 4 - bin/src/build_matrix.cpp | 1037 +++++++++++++++++++++++++++++++ bin/src/cutsite_trimming.cpp | 153 +++++ conf/base.config | 37 +- conf/curie.config | 16 + conf/hicpro.config | 17 + conf/test.config | 7 +- main.nf | 267 ++++++-- nextflow.config | 3 +- 13 files changed, 2621 insertions(+), 83 deletions(-) create mode 100755 bin/build_matrix create mode 100755 bin/cutsite_trimming create mode 100755 bin/mapped_2hic_fragments.py create mode 100755 bin/mergeSAM.py create mode 100644 bin/src/build_matrix.cpp create mode 100644 bin/src/cutsite_trimming.cpp create mode 100644 conf/curie.config create mode 100644 conf/hicpro.config diff --git a/bin/build_matrix b/bin/build_matrix new file mode 100755 index 0000000000000000000000000000000000000000..c61c6176c46edf71a6be8dcd3d090c0c1a0b9c4a GIT binary patch literal 64760 zcmeEvdtg+>_5aNaLcnZL)c9z<g+LR8kRV_Jf*X>+UEN@WsQ92sNCL!$BqkdOiW*FU zT-Vjq`c-VL#YbCOtzxB!R6}?Mt&fN=d_sNRYeXX!h|m3fK6B@8Zo;m$-{1G~`{Q?& z%$zf4&YU@O=FFM7v%A;lduK&QMJe_Zt;|peEj-IDP}W~X$vP}RiOW)kC~oBp<#c5b zFmd?j79~uroSpVc)^-{%>0*$^;$K#@g~*C_i;SI~ZlTeF_1F?JWqW;(TV}0Pf6xN$ zR8drcU_HvZ@6Q56S|T!b`WA)3qrEZM%E)>YkM(+Gy<S<*P8BJ?o!b2)JN_$@<wenM zK!n{)lsxgbFO|(Jg&+9JnOA>h5vad%i;SJ#&5q!)Q@h@Us7E<}=#wM~=gRgX=_OUl zZ>O?(WkOZu!YPv`RF#jfs;q8k9N(BVW&D&$>Gd_~6GgwtpR}{*TqIhu^{4`(%p>rR zHo)=?Uvw^7yZY)4RjaRgGiCdX*WSM3(B~{;P1fUx#2!q#A|)yi9sT|=K=Dy5yA=O^ zhJPtbhvjZKSYCAB=wGb(`$t7r-Lf>er|!u?Q`elf`2la|vt5Bp-JeI#37ngn|B5&9 z%Ofw2SUW8Dp3`eLcX$HluJHVP6IeuiHlgw3!;~(JfX{@$;pm?o8XkWDN(x7RBm(`F z5%AwYsp0s%2ZIcU|4D>)Ic{p0{+<y5e>fpL|HmTm?~EY-{SokgN4w$V9}5EshhH5* zo|hu<Pl7px<FjQ*c)TY9{}mDNTO+hPJ;HcB4(Aq5p0gt8e^!L?&4V)t$7gc{c`k^+ z|GEhJ|8)fY)I{L_VFdgG5ytU^2=c!YK|e<$@OeK1eq#i^JsLsIQzMM;(-H6q5yrPX zf`8!p7;e0xBG~7_2y*U^fG>`qhrdLSXKw^~9*@wkH4*ILjR^V~5}{vrN07fVf_;7y zf&b(P^6!hl-xa~G&WPaWy%G4|8o_RZ5$vrdg5KVbAdeEEzZDVW`AvlRJ1IiDTO-Kt zjv#+l1iNaEpy!t&&_5hOo;?xt|4jt`tr7Hgegu3H^f_EPRcU&x1e#KmB<PXPJ;472 z_;njg1l%P4D#}0QeAzJCEiqQI3iE>WC6AW~M4>Ew4e}G;@fQo<B=J)rKk;tat}5}@ zLC=EE0Q`#>7vk4#DiM@6{kOnVez$Db#$Shij8u+O+Aehq`X%Bo;w`EFrUCMQhklX2 z`qvUcY0EzZ@(BG%yse)Tq?{d6o+iov6Qpccxy&svwGv+-$FVlm!b`NGAdal}m6x4) zCS%7bN^$Yxr8U*X^}f<NUvaTgeBQF+xfP2m>wOh<xmBh0^%eC>v8Sk@xV)lHqKgW0 zt7@t%ib@w&Raj-=ONwiKOX@00%Zq<rv7)#PIUmR`n=`L|5?ZLu%qS+)dS6}5iu}3f z&nxwm<%?op=2;6%>nqEO7gbhOENochDVyiZ%q&|{T376=E3Nd^gD^iYGk+PW&!6X; zT;^L*TLEO|S>U+5vb>_YxU#y^XAz+8WQ!eIs;pidfuU$VKQBX8s?W{LFFe0&X`ft_ zp;#-eUsAkqg|DK%@cd;<mkM!fve9Z~b$vyh&$Bc?f9|}RB53h4(4Spg3<cCIFJ1^8 zlvY*MlnIglJ&9yL`zfumzP6^mqMy=gCNHXLs9z$)M*~xwO_!C{`|OspEnQ%EsSNzB zC|w#(!BQV~>%PoMvf`r1b@C^dRZ%XsWyw%u5!>nsH;{8dDaTt<dy0ySGK%x(O(`xs zf8JTeRf5~Z?Bcl#G5!@5l38(Sb$M}J#lq66((1B`g@s^~6(XswDX)Om>nrAE6wjT< z;egdilPS6wq(7uLm>Fe_rD#J8qQwJd0h6egL+{I~tf?36O!C0sY@3|tv$b3UTSaRF zjdzkTPm0`UnivPmG&76sY4AgYgg|m=r@+2St13c5NE7a}V9b^Jv&(9h)|S>)(6~j9 zLaM0wfde}SQvZPYIjN?$qMEIcyx0RZT>!*Vj0>6-LtyLZhgo8OVJp8+*zau`;)ez; zccQI3Tb2mp1-q^&t;<JOCW@}0J3i^m;D54;;l_NWl~ofniVN#1msY@Jme5hu)>KuN zt;n1N|4>~~L2D&Cf2nOfKSYoHrW6{G9;K$iS6s8GxURH%afKX8jP9gGmDT0>WtLz1 zVIwQ;2VLu%J(bmP)=Mj@eVBhvd}vl-Fy{H{!rHdwHyo3io`ya~PU^EU9~XM&O)8!@ zxwyzO^?q&iwcQWxXj)9lg;<KJi_1!VWlLyJMIinmvqe~uwfsPr^NK5$*7{apTxzlW z)zp<PuCS-x53q1cbH$4r8;ffz>gsE%ORFluY*~iT&e@AAd^o4UvINlv%>RE^N6?lf z_>|IGOrmnJGRdygFE6ddnu9gknpJ%c<zKCJLd;2Ju%u(_hMdtcvQt6T%V}Zrd{y<H zV><_7LJq=q=w(H4=ab>i>2t(j*%R)+eDa6fU!{mIDjE?ol=(bYa$&BVD^_O44F77* zW{Nr6=qq0Mb6>vI+ey%TRk+~{*W=I<h7Qk}=L`|b`$7b#FUqrhQKfA;`GsXwmXEGE ztG2<H|L^!~%LQ_!;$RGLzhVNRGok4-&o`;8qOMMe8k$GnyB<g_gL$9VD6g=+MnAM; z=5i613#ipr$P<g@Q@SFtIQ)S5XR(m0o~#Rv!bNjMmf<6!LMdx`RiPaN{qQ+*Hq4-Y zMRgg*LO5;8W}9w=wSwXGQY*49tEj9}e02@gWkfD6D=V&NL8elCQT1|oYDB^H4NEIn zSZ3A0bSSG`fxv@tH^T8nSXC4teKjJfs;HJh309xRN}sZ{VrhMaPhpVFeA&`k1%9WZ zdYMvRUshVZ2-J%jxmGD6GE=xHlW=LxvI_ELh2mniFXR8ErIppn?A+Yqv(ht^*<R1g z+~SGp6VoSK`T6sKpOl`NrR2_^pEJ`_oSA;sz_PO(W&LYq_5-t>5cek0;=dUDjlp?! zG~PL2gCy}WzzxEwG|OU?c%;nnuT>9QECIzzti{Wwv3RkjRj04isy9U9*<yldmo4!> zD;+8!)=rRln+MaV5H1QeqVQh>_L@nMNyWdyF)IKKRYv0cg%8V19OVtCR$?a}tBiG& z-*!xSjH8@wSp3OvN$;IdLxM5|_1R{_i;tl85G7y2WuiP@nJeL!9{D+T=7W_|373lU zK}wZ`_3P8XFHX5e!oIZIu=9*obijOiA1M(jw!!FC-(&B$!Cbs2uuJUyHu0C)NTel? zl?aIV%SF!mW1kBU&vy>?W1kBUf3b~3YVV(kFLB_T<o=oXQU~7N&l7)z18?uEiC^fz z+xvXt7dY_txdZX@9r)x8B?7|sTa1%kq&o1lTl-0M;AxNcli|SQs3Y`bIq+=Te%uZ` z$Hsm%2c9z7PoV>k0S-O$9e5aK=qYjFaa0p}mN@WEd#H8baXJxt8Xfq=5LQu|9C()l zzs7;5ecMl)13%nGB3<jiJI{C5Iq)Yq=+`^&d`D(K8yxr(Z6wmo4*W?De1`*nvID=z zflqeeyBzpa9Qb_>{HYH70SEpx2i|btiyinL2mVJ6e6IulV+US&v_wF(kxLx-cn6;E zr0gfjf&Yn(M4Iftk96Qw2Y!?TpX$J;IPe(`{AdS0%Yh%`z`GszpE~fG1E1=^7dr51 z4*Yxveyjsu;=rHjz%Oy&$2st|4*YlrzR`hCci@{G_z4dD8V5eZfp2r*GadM~4*Wz1 zew_n<mIJ@ufuH2SZ*brzJMfzw_$dy2hXa4M1HZ?C&vM|q9QbSpexCzB)qy|Yz)y4F z4F`U@1K;Do&v4*-9r$w{ctxHU)BfEKe7pmn<G?35@G~9wWCuRifma>)JO@72fzNl~ zGaUF?4t$maKih$KJMfwVuQ~7@2fomOKhJ@m@4%n$z?V4i<qrH32j1(z*E;Y84t%2n zKgWS@a^U%H)PB}D@P#%KX`2Iop##6xfuHNZuXEt%Iq>Tp_#y{hYyKo&^T*}itSVY- zhcC|9fkLf$XFOBK6P!E|B*D>>@Yi*Q8#%&DNMRlb;u$@bFhe%8OTZ%uGXyX@1biyt z7{VI_d^}+;`Q|zSClKaRZ?*|InlP7mvq`{T`2o{x%~}B;BFrV-ED`X>gt?TPg#vz; zZ~|esfZrg@rQ6I9@Ls}<_Dog4&k#P2aFT$x6Xw!wDgxd@m`k?V^BoYQ|3a8cwRu3m ze<D1baF>AZBYZsJ4gvp)@Ck%B2>3R_T#C(g0=|hbmteC^z&8-)(rY#e_-evja?M%+ z*AeDYYnBN3O2S-X%|Zb$B+RANbPM=0!dz0#3<1w2%xh4lD&X@7a|tz*1U!>4mrhd= z@KnNF8qJ<>+5d@zRl)}ZJeDw*NV7}8BMEb9G&=-*D&bLtHwgH6!dwc?bplQx%q7rl z6L2(PE`4T`fWNv4Fqb^DR=|e{bEz{+1pF~!E^%g|fZruNmatpEZxB9{aE5^Q66R87 zsset7@OZ*W0^UxTOP8q#cne`JS!T~SV*CknsWJ};_)mm033mzjKEe|TcL?}bgt;V{ z8w7kCVJ=1HIsxBAm`jk^Cg2+gbLlag1bj7NE;(kcfa?fzsWD3gd?jHnF=nBF7ZT>u zV!8!<8DTCdW`=<05<Z8pD&X@7b7?V?1U!>4mlRVG@KnNFO3a?G#rP9;6Fwl|v4pvF zm|X%MNtjE9*&*Ol3Fi{tAmHN(bBQq52{?f;mj<&<z|n-cB$!PC{z?bTrNFEe@FBt) z;SvFVOqj0UEEMp&gwG@F7VsN{&nKKA;Jt+D%1u?k&k!ykoFw4wgz36XMZjAK(`B1I zUy1Q2Ojm6l5b&P}Ur4x1!1ocRYc@Lo7i#{&T1=ziU#>PkA7unEr7qjiR``Un)~#$g z6>hV=*6&ufX5H>qj3%^~b4kv{ITz*V-{ur~^nYG7Ph0iLdhpWxgPMa;njZJi&r%iF zEic2r24)Af&b$<{XfQJ$WC}ASc1C07Z*<K>i|Ax`UP^sber6D`j|E7pnC7(8BLWhh z9I1$vSj`Q*qUWV#bPuEdZ{87=ll}H}rCQkzkAK`wEihsi37W1|#y7mTb>bavMbkg^ z>Ysn{dshqJ?QWd`)DsCH0iMXMaP?b%M&!^Cvh@hE;JiDob3v-Y2Cn~Y^Ge9zYIz4d zG~_kpzrrfQyc_uptRT!6BELxH=Oe!knjyZ#k<U_mLxBRjRw_>OLq}e}H>J>voLfUq z)BHs#S$<#2Bn@3nCB=OI$`oIoeohK8s%sMzhfFd`s*oljU4k^;co0j0zC#OaOld<W zwz?lLQH(pk3<kATJ2`OrzRMTnT#>UN=Vv*^J6vZdIa{f2^RKW2*BO`vTPvxGc;JM7 z8eF^Mwp{@on7<Sh%GRq`&gH}^KY4fD{Vd-d_iI5`CCJ>$)@IT{7D=~w+7&4oP#;w0 z@%u)^Yk?<)fLh@8lv_asZ5!)gpEkbzm#A-C2TW^+tMx1hX&Zy)Z;>@Oike!W(J=nn z8w`r|p#9INr|F+*dQnQF@iSO(YX_Ct*wA1}aD|Xy<%syqBU@+&Y<M=Bf%2-%hv8v4 zT3HyHlh6~YaW#mxP)S>PR<gTAAQFsu6xKiaq03XP0%IhU+}`#}w%%!+2*5muO6?n+ z@;op4BjoYTYHxQIJ=Ry`YW)T^Eph)01M&F#i@O%gA?_qI2#08qtrlc?BjV8=8c3G; z9?C+>TuWtUq8gQXx>P2rDCWcLrL<43U+nQsDCpN?A`*;J)JKoETxRz;14?i2kUbs+ zz(^kqme)x({()?WNH9*d*wlyEm`A~;SF(8vK=<SAdz`9af8{9prnSH9EV{O@$kqBQ zbj(u6a^yVzK6SXwmthH3+2x|FzeZ<4cxcUM{uyK;t=&VdeGP%AwOXk)n2%yM$}ZkG zu!}?_7-xeWG}U>*Jrm;;1_<+xYSbY~w{~U26t%AR@syj0)f|C&XbVDz&>MhsVj` zR>?sh$bpCi<1vfFun>nE$l)@{p@AF<B!@sh4x0Z*ugy=Tp%b5ATuDayJGAO^$zr%< zF%bZK0P#N+*-m3WZD1@MDiqf8cNlc$o5*?gnM<M8_P3k^w-a3Z%}n2J=YSQOqd32$ zlW2c{BXJd4;Yf^^BLOd~n4glfhT^R}fi!+|Sm<pKIsm~m|HX#!CklO%XuH$67Uh{i z<Bop<_n}jsNe+2Vw+v+>R#&G?nt3a%t^IRnwQq$?mV=pLRr?xRwX1P?gCOwU<~*9c z<(O}V;Fe?l?h7LK8%NN3d#{u61A=iw4S5Rou9o}4F)lH`qOpOo!!6!|283I@2mHHl zHtvMcLI^Ml<2tVf4OWi~b2j+52Q49t6shdDVClvssNX&a<1SpFkwxuw1Z*&kU{44< zm0<EMcIi-pB_Z$|wAQ{Z1Rf-~F9g0rF!ffu?h6DPLtqEN%^~nng7IxO@qGl{A@G+3 z*M-0qf<_2zAeiwByY3=_Z6WXyK^y`#g7a^)iL(jr34vn?DsW$xa!w<d5dwz+Y~B+z z55XNXUco*$d28~TC5kckP%yY<34*P5f?IiE&A11+t;?WjV-nP+1&UH!*bTQ|Pt_}+ za{Wi#Sr?@$#xiiV>Jqz^m-UQ=sN2pJZR>%@OAukj?P^g0!4f*-w)bVX%4}!cQzF-N zF3Pt^r12D+#l+F${6HCrm}*u#9pJgOYd$)l$1P#e7oUT&d1vyD%!3~N#h+>VyIS-6 zJ%vS{&JH&^rFCxWx`|1rI}O6Mg8HBA|K_cKDN!!c#&1TVHTNcJ`UlIS7wp(91jWYx zp9{9KD0skiMiX(H3EA}9dSgJZ>D#q!AD^p5b!jj5HlXqrtMXn{*7V(0{dZ;krs;od zR`Hr<_(iB`T!$&WV0YXdKt+9l@CO1mPv5fw1il}KL>O_=)}B8@I{kxS*8=kzR6(3% z)?4XC+Gb?<oH0+FS!8!EAFBn%eJf<pKQn0Wn*MR-5uwH%hW|66r+AH#*AC-ujNGEO zMd_|HT3ADw2Y0(<UaHdWMZ|4c(Ca<qARO-V$hL2$I^kuv5$FYAk^P0BcC!uM&szQ; zo!*~UfLM;_JH`#9zX+|6SA#|U>D+!T7xz&Yk-Ah;&jzp=rl4HD<6rk59qiZtPXXf` z$L>F@)Y<=>qKoubGWX8Q^DhR;*LnJZw4OtnYix(!bM&y@!R($xuAgl)MnMKZ<Mx0g zb8qvusH2}}?$ys9q2~?*VQvz`+8vA9z|X7ing>=GoumR-)O1gvF`4>o-lZB#(-cL^ zHe4+`(HqTQn5wnzb+tS$fSK{ynb8_-OwSzwi7k%)n)vL)byu}Kt-t1Ky&iOW51OqX zcJ!$3OBrSf*kP>3AXB&wgzUr84(I4QA(3u~ju@48VdmbPqyI$h-MWEHU`|x??r1ch z|2(=8YEas(UNcOuN$Q+Ef@%*HE^1>ID+YTj&U2pVxC<M%k3r?wxOE<+%{HN?!rhxE zw0p~Z6IyYb?I@7t9)G@~O`C7HZXAzzPz$U~Q89IC-<tl0SO3&FpYuH60~%#X#!7-% zE&w(f+NvP;7p_(l3`vx<m43n)?4fdbkSAhMSIaq&&x0sD-ZQNzCCSxtg!I(>2h8Zv zST9rJwdRia{FWmgtW@!mCD2J)U_n&B#xlSR!nN~E!HXH^p@p%jU9Ohh<k#FQcA`o1 zAxn<y_T8?QlSF^EiO#0IZuR#>vO%9RWJed$_|b<9tE0ap$aT@#cFW0N^aR>K%}bKB z)~8)9)4>J~zQ?sK8o|k}N3e*JOO`PggJ6CHQ0%KRJIvEjw@s9J^^c6l#wiMV>k=&4 z5IQOv$8P4MZZK)y>9)6OSllP)USJ24Pke%@nfxDaD52AM2!IBwgHc%F&fm~13+BP< z#NN*MBe*$b_APP9`<RqB!?+Ke%m)B&jR9}tEHY+TAtiH(fTDnsAz;ZTcR@F_j}A70 zjtjccpzGe@FYFDR7u2<0y{r2+$qf3mi4+pu;~+A(2z9xYKL5z`2ao<djrf(EqWsMF zaRkx)NtWiHh2!FMO@~ct*}G_Fu$lWYFy?s}5J!Dj-3YxfDQ4aX5~b>eBRu|#V22mM z3?Jb@(hTomhGv*Atk9z)^-tR}xY7lsE;o!>P-^C0kG|8ZAN3qv7Vp*n;n80chVz~A z<t%dP@U65=-sgiMC&AmqYuQz>Y6y_3=}BIHc}l9Q<sr1RHJQ!%7bwO%94zej;i%sC z|Dr=kF&`b71yh00CeuZ>{VcTYCzV!T4^L)Z5-Q)SqOx%d`B}3r*_Vouh}SxSXXoBA zGCyWxI5W^X-Q1XCaduM@h~iC2_Ui9?_3zL>W5ooZ&;>IK++l`Uwi)8YfR;5x2)!~T z*_dD=IJ_EvFt)~DgdyxV#-g}S|61SzOt7!aNpfTXmtgKBo1+9m5$8+#cO2j6c*OKF z-1l?yyfC~~pETlB7!m6pB-{xU;#2}oHwnX-;@Z@bvH|!t@I*!`J4OO{Fyl0eK>*tW zBhdy10f-VnOG*<AL<CA&rv+HLXh~Uv0z2Pk=L3y}q+4sl$ggwc*E{lJi39`EcR28S zWFF`)k!8%)lCn>t4NXicEYdA0WMQlo5j{ExKnn~I*pRizp~ub#HaE$3dnF2`xJ81; zYAask!TT-hiUC1O3OO0$WLpOSFy1U=gUNC!D8c>?j0o~!ld2<cf3}&-`1x6;8A$W= zjVW%V#tViV3yj<r{1ayQE7}W&mBXp|-G;v<wiem-Q4R6y<6JEdlFUE3=serqJ@XOV zT@T_M+TK$DWNh>o;&;mk<#)uxwqaWvQ)-8S1y6;3`(cnLiz;<On`mN>pBZfdg|vwU zwuH2nLK~p$xi$dVjFSL>yNW|J8l_`GG)=$XlZw?8o*cTKey0FhR}DuS{<xq|RTv2F zN2FnBoj0t(0TOtl2;i#iJWuk^Odic0WAmO^eP(iIhjAtb$cs+&fDXYdMb~$E^+U$- zkb&FZbMbDa^`L8w|3pQ}d^4w=6n*n;i_$e%X;(IvDDV-%4_M{8WdSX?nJ8`5wkRZ4 z+}jbAxwok?nBcm7yB775mc6H;J5TS~az<<Sg-1U|Zd>me+rAo<w!L!i9<-2?{i<(h zPV?@l?%xQ1L@z$WaY&SxG9%qMSBy@l@du>cd#y0_?+_cJJn+6{3MJT~7|Ss(91jA* zE*NOsfJ!inrlGV2SL=nei8xoQ2PqrF$gWJO-I9EVTQM%d>mTB3?ODGi10ot3=&rCV z=-9R_WNphjG=%8MU(L2Hs}}U_i{(OECMuJsTuAF6WcxL;G@qq`hb+^ANeRChGA)<} zs(mVK?o4M#-w{k{ma_CXl+utmkXK@B-88)d;vj<hoZZy-@q{}`nCpv%Sg(L<J`kl1 z!O|u4exxO(7X65JwN}D@o%6`+e<0S4A}l4YmagM5WKH5Avn2<%ox=m;T%j&B<Z9ou zB?x<kRL0*R5xprVO-qOGx0-HtpV((l=L~VRXkg&l6r~~PZs}+^h-`l55#&xa&q0Z` zKCuu5Ejak`0C~=khJ)TfWz_B{glMKHXgJM0k&LXk)ImQ)(t}o<$YvhF`sNL+z(J-L z2ikw4La{={BdT_7%8ByoUv`f`qb(S6HsoqqgX4*T@&$!_D^YTcd=~;q`LsX|<@4yf zx{nL-!@`GtB()$YNCREdS3?>;fjGNisN7p;9x+aZJURL<EpWTAUz%m1S3hFB`<`Xg zme=;B6dG-KONG_g)xr;@dHuCX^iXiHj==esn8XFGhyqjK1Dq_~Sd5KZB}>aW`%>J- zberX5!Lm`XboXOfBUlDqEib~pJpN^fKrT)eQBD`<Mc`^|h7gGx3%RT!I6=g-UDdL? zTrK|q&(NOeW%hph&OBOY%L`&gMQi@LcroX->>l4wggyGR0vF%~$+LBX@}`x<E*r)m zzIl6;mc6^-{T%<Sq`W|RbTFs+8_1I*<XIl?^)E{neO`tpJb~+yJ<YpS<E7J}qwF46 z%jHff<_IZfSW;|;p=kd8QjC)JB;|OHax6JUj@gi-igJ7lIiA2#O!Jc|o6-MhSBpCi z9lYgiu$H(OSIcR@Nhr4AOnc?X)8Q6BG^Wl(;GX@7?}R@5yLp&MBZ1eNIenX^x8(}8 zUBEMV5u9(yZQOX(&dXLNXnK#f&4|;EzKb)A!CK|sb*rPWBYoA?IugW9(=W1WKF_Yg zF4n{+Dhjue&ugu3y4HMroCWQ`mUvJ5$?%23$sTfDdfcdsP~;lU8MF2PcB8H&ZmF<j ze)TT=*p+gfycR|FcEQ-dlBKewj3w5o!&=~3GYRHr`f<RD^AB!Wn)cJ6VpBx$1Vpef z!~CNtb}RXr2Tc<_z&!NmFTf47-o?)Ad(4NC_W2W0{yfWHM@`n+k2JHxp3jKz&9lfy z&Tmi<#aF|^-r#x6zdFkE&RjI?kZbImq(eEbu{*^{f#)4|e}3F4Meuw6i<1svUoOnh z^uYr0veE1W1Ft_p^IskBO?yGx7K~~BEUM9ypV`svlJ*p64CZ9_UOzb}5S#U4uU6LO z2@HGDpXt9a7HeQm_CM=}<Yd3>T75k1GwRTbALnJiyW-irfEz`<*ZsX|dLV}e&^iZ) z*jr#{O8b*oNW@qb_^acMlb~#mzrn(YQ|ee?#MEkk-CA<smW<D!aF`s!{n&q?2D}b- z)rgr2{13!2KoGwES{Q}x>to@hEMM<x?_s?sQIBIe0{qRbID<BlFuHkXR`+Rbm(bb? zZ(CY>Z!WcViPRb<jkz1zYMTC(TlDA}dIG#JN`_aMAr@5R>9`nMG58X3_Ytg*Sbb7J zbZ)%Fv!We?%^G0=g7`~}mspt?kbPnUAUF04V59J})`9tRZ;{rC`P(s=KOTd*6BzSr z>(#*5=i1BvRp@d1d2)d-2Y@Jqs=AbTJFxTu;G*1r!IzPMc><zm=_nAgd<RRmyC|K- zak#i(ehB=r<~Eb&77shi%kEqm$<7Yb&R%h?K7FulXCGU3hDz7M&PJm*u(J>G{@@rp zTO4mZ4Cy`oNh!kegq__D%m6#vY1{}TTo~v0-B5XWJ8ND0Ev)C+H!y{=KXI-84yMg> z(V^HR6487Rh(&ECgITP3YkI%IG8r=~39GjG4c~px4_YB%7TPd>4W+bS1?OYo-hUI| zpX^Cuyh1S6_HV{+facqHCDHz7h~zIM=?Q`N6O0MfxSciTL1Gy@BKXIA*`k8LNX80D zBO*o^9-OTeL4JzfiI_^PG}hh(S;nIWuqH4b-G?;9vyD8{$aCk2kj38tix(5~dSte0 zq7f@{G(Lr*?Hze1zj6S)^xgJ;ywj};Oed!rN;=(ckz&J}jCECQnw~WsC7M19Tk2U^ zzWhG@*|Di{H+s;n+gb^sTZs<oGlZ^Yf2A>dk#?Xxul^y<Z|OUdphL}%`OWY=W(@p- zzLVM!RL~zi5F#HD_D*^oW`M}Ebr~!bK`O)eXl@w0K*UGvc&)6+ACP%hYZWyLw%iSn zY6D7mbZ75^Yys)`7~~d$wb1WYIA7cGJ;}TbayJSV99gn-wRagGzQ%rDTVhP#$Ei4a z&SI4>I5)s^LY<#^{nPV+5#k~YE_|$n+ct=@o&<P6`zW(Kr^H4M`*c~re1{{y$C2-n zd9VI}h_1c*ld)oJiaL#GhL0y^Wr|^3B8K5wjoe7lh~6{bh5S01Z$iFnlZ9U+Ym=tU zkzeb`uXE(rJMtT3-l)TfX?|N**UrZ!8S)eD;GGMy9d<RoMjb^Jf_x8<P-Rl))0ulE z4ei)CLG+;^NvL@>dBZ|FP{PX?Bt({@Y9wF?Shh}-bsAe<4F+K_J>ZM!^%bzzS=M$i zgeQVw2ujHfc8Tc_Z<IqE3`V?blQ%^L(tmqO2NqgCoJ@{5y~CXG=p8u8OU8LBMSoI= z(08WC{%7vh^ljqIT01)Vu1XcczRJa{15y~j$b|4X*BhfX<HnBqM9EsWvgIEru}<38 z1~BaxK={lz&5`1e-0H7&1Rgt)b0d{@y$@BiX)P%Skb3op#I$j><VRcFV@TC(Q6W^f z6xA=FzY;Tw1w!-CWGyrm(LykC{VJPSR52vNv}_YH?nKuuQ^uV5KJ&DVkTb&gTv&iL zKl`<DIohzNysPy$QI@y^r}Mz++nR$F!dUq-mk~D<^O2#xLb3YCm+2s~&%lH)qUrGH zMhg@s8@D83dA;YTwc>n=#W*KsHpYLKc|6|2wI^YOtkq>TIJdh);BtaXLf{gBW&z(K z_$R-0wjCbKC6RqPkPm>?--Izlj3AcM3o-Vg1A-J1L_Q$Eyb?97GlIeBw7DBc=mP^M zO@GbcH*;CBQ!6ihggWo&H=~qag3P>M5IiZIEcc)fV-saQj#ZE9eDS4Y)%oFJa<m+y zabPb1gfCr;P+ctn`eA_kEU*Hgy(X~hq+(UNXWB&%Qa;T0T3M7^3!PX0m}mv2z=wHS zt(~{MAMrFwKE{1eN}tQO+F_rjiZc}3?4;kYHO*7LC)f=LNVzHCI&gyk#)V*L3uNPY z;$eydg+8|;Iz&dbWE6l%5L>Kk8rIqERQ57@5LqIQa<mfKAB!%R&V~KcT7$T`eu|Li zNueMMGgxA}AQ>^m#^D#OlSv@N+-_qk&xlNvnNMR7x$ncd5B4%0=FmgzWD(!<bTz*l zrQvvJNEZq4c69#J<~wje_Hk7A!_1L$_XDBO)m(zv!MP(^f|fXkM9^ca$oL%X($9$5 zB@bpnWFMcUVVYrr{B4q)Afb9`=$$=LLdX%K%d$C47<<6DlqAD2?TKR-81J$5h=KM# zM3qKim(>M&7O@@RYlpuLL>hlQ26N{zm{x(2C&HUuP`*@&aXtknfVj_?VFPg1My3s5 zy)$A7;6>OuFf!xu7x0FygUbWIKFL}hW+DbNW{76wv7vQGrwQX?k8}|AG^}Ew!^2OB zK;!J8ajzH;$KhcYO2sQ7@hS~Vzk}6Aly(1B7Id%xW8EWCSoR$(eO8vPlcgK%(nhw5 z^|8-{FP0d=Kp1tN#B{MzD^5|aLELYo18ZJ|99hdD?%oGLylD;iQqC2@;mIIlKpA|V z4OvT^9XNhX9Li3R)<>!Avi&IQ*89D?oaK0Tx#ml<Jz^M?;u-+`DAOF2ydy=*c2XV$ z;CSiSz0nE0*pJ5;2agq!$JOLf#o46ezJZy7-a8q*EVXQDH;lVVK&MRJy9S}ziqx_k zo=RGl^J`j8EY)Ir91Nw<dm!YHKHw3}vBe>;v`ds*hG(w&+G=bS8%5}bh7BJKRfGWF zyZj?OppWi?ZxbN^?bE#2lHCqT?7^D&oZ#L+(io3X?Y<KYcE685*e`ZJ$ZoIas*ZH1 zRKrzbm50w~>;UOL<W>L{N!WP$Sr7wQ%R*s{k4ua=bD%NKU?_1fu;wI-bNBP2Ik)v8 z1LSS%G=BpEOXFXJ=4v`MejAxWrc+1&Bg706ulRk14kIWOuNtkHWz?d_<_uu^k*7Pz zeNOTmk}m|tX)b?q0%C^BX=J`<jao?X&jIs(i%0)PyaUJ4Z!xYQyxHLDPVYNk)AVl5 zKM#Q<jxDdYFK9G)=&Dxz9UW{v*zlIPZQ=2slY&i-yf1IO{|tkLb5cCE3uY#XYA`&k z*DUYDv|F0F04*Q!ow5bS&#Mo^TlZdnw}$!dGA@T2yW9Nd^#-mA>UF*PGu`j?Nw=uY zzL}%>XTwqkclD1~8&1*!*JNp(G2H1GJ)8q<ct<}4pBz%~^)Fg&egp}-cR4lro_&7! zFWURDv%My^2Sv??F229rr>%Bt*zQyP+LfP|C=x@YX?|QP!a*ZmTPJJT+vFP^>{wha zV`0sh4|M-3PW8P0#(4kyB)@w^j{kyT+551`N#g4dK=DY2xmw<X=0O~v^Au_-1^PER zX}j_QwcG^_%hx~4*AF)vQK&Yedl2~M=+E)sCl6N;hUEsvO~~{6QbuTg9@b$A&UfV= zHh#fz#h9IA9qH?xxUG855aVi~t%I;O&Ik}pcMwb;Vq6M>oWL@?p6N7}N)qc;ATZ!D zV~8<@Ja~hRx9dHU=3EDxa}$i?L4(_^{NzG>PQV+7W31U+NpOU<OY>Hs5-+zD_66 zyi;?$b?vjLf%?92J(SZl-8C+io@A2vv;@v4Z(IvB)ep^E*==kBgR%#gkl+Z!i#O1O z-cK>e*9x^LT+T3Fdm0iz7`4G*tt4&B3rFX@_q$*3RB@Ic!?S$8QPH+lrNqZ>IRQ-i z4rYuqVN6>%k=k2Bhg~NVC+I>)GDZw=-N#c5gclBZLYhbxpPX^Eyi7O9J0mz;XBdc1 z;eN@UzPE%;qCR^EqjGx#OEU8OwQ523XRg-&K#{ou4Eo-8Vb16K7pr;tksN<bipBw3 zOZz`|+)JSaaX5r{SW*la_H(Hx%XW$+#ehNok>C+DTKB-+Wbg9movs!WLN!faFcu@` zYN>(=a96%hJL-c<aXF{;QlR?Ybr^4;s(2YP9Q|Mfyo2Y_Ns4Pt4;T>v-S9|TZ2j<R zBrg6Oc(E_g@)is<tYBB@b;vE`#Se?TW8BYjtig)?Mg(+5HOnz`pzF_!RiMBXAbAAu z@jn#H2H{a%EnARjx)!Gdg|1Ecx!8Q+^xepz3NT0R0u%Fg7_oTo&M9cD?nDs{ildi| z_XNk9xc#0K4ae`M6#4SfivAhiyLA6rmRtSh9^q+oFsBbRr@sNa;3mP^A#{ih#A|Hg z80#_%u+K1#?xb){=mf6X>aR$jnQR1ik>O{?atgq4#b#)~p@SOp5xXDWXvIA#;S8ZJ zt@*QL<4jp+6tqP@g!^0G!18!}hRpJbnA&35a<%*k&3XM}3iQJTP>Rug67{1StsRZ$ zdi1w7|K(iWeCKHXYgC$@zsB%nW2ip%xMsa(?1TneI~w+S)4tVKp%209uYU#^uxz^a zZ>Wnmo^LknEXe-UHw!OXz~Cdz|1&Vy5A6sB%@%{1p0xd*Rj41l{<;&;D_rd|9FC?9 zWrF9)R#}guOuUcE(|cPxR_yLRAobVJAK{wXY`pHAKt}{?=V0U3*%D6Rz-((i3j+Wo zEFm8EJ82(0Z{5N1f{DbtS{@ZM-kqxX!K^mkoBg?~h4(`7qOJR6*{-L#lb`)5$o|gN z!reIEE@@(J{}l5&RvcKY<IrGUpXqSKXAUuRpyj;2hXW0YaSn=cLyW~@OvTHQm6Bq- zgJS#;BOesj%frhhLArw=J;6u;LDTfcF=CQm0Cns8w(P?DP`nF0`YzstS}#JgUvjlh zL?LCEAQ*^Aj74Qay6mCx2F(xCY9=_cyRE}fA>_|I&AXtWD(JcOQE0GfdK{Z<aAElR z-DC5|Exb^LhTQPzYil=_U>?8eo7Hp;=5VTOQ*JIMuyu5Fjj;Vr<0Z@L`!T^}sM-!` z{4v&jGJQX%=1$`e+Yl43yA%7tRt$v|)!zZ{Z={2|Z|$PJ|L5X4H|OD_XeDAiHv)yn z#&Xbtb*(va8239dn~mEcY4=_I!Z=~?IYkKktPoWHqG4F~Po@2dxU(LX`;mdeMX2QG z@R|FK<D`rSIByvL53D~aQvG^N9`FcUlWMFXLon;;usZybV1^;j6ZhvIUk4>?xbPj= z+J>BQmoT7fN{n@!LVjd!WW~TW8OALZCc|hT#(xdG1W#-h3uqbv*X8D=fI{|k`HlhO z1wWSaB$sGJc;b>d0;)@JbNpm&+gCASz4`?T>2DD9#T(!3z~u7BX#Vr@G9IsQf%B$q z)3$vH)C@%5&w*IMM;Uy}G#x#jCJ58M6a=!=8>l)F=J~WQQPaPpJMN`Y{nKdbmi&L% z3WR>9H|hhg{&Tbgy}>_Qu4;#+CoK?ryq3Lv*~y;Ho!Gku&CBoy<Kfji@ru2Bo%RG^ zK4)m#-i?(+h>pCjS?`+Zz}x=q-|au_=)b)G)^Iuc-~K<>e|{dio#W3H&EAW_HP40( z4Cbc?c;lz_ZD9jPEgRr?dHe_t3;eMuc%|Xxy<FY!rgdn|2YS6x1{PM|@xnInxoX@) z);b6ARf&fmR9G)KZRag2+B@ywIL$v%(=)ZsXj0+&4zBQQ!)@b_8~))5%qMFtZ671k zg22jMN!{@x?r7c-@A2bfw<|)M!i%-+ZjZjN;eGD=EJOO)Q}C(!xRipZ!v*?E2&}`C z!j@7U%bsmdBXDh{YuQv!=Wbz7iLenEQ}-t6T4swK;uK+4*dfO2ds;iL574=sVVsTe z>R#nAx$I|_Kkm_Yd$xTTYwp4v_2=|rXQN-$+x?7l`l0{S`Z4pbzwYl68k6&HC2Ym( z&qdTR4f7AdFc$a@Ev=WD*DTEsb!gt3-RZ+;uD`-9IN49=dyv$(RJ1pHJN5wqG@8Z_ zBE?{SzJkuuk^K0{?=eJc`~sHVhS~FQF7(!46CWVR^lW=S);t?UIhT0!qt=HwUZK4` z`vew#Mrq$_f$LS?g{S}N<t=rue|fwy4E^OLythDww+Wj5lC^*LHzeg}J`Ejfo%u;9 z!_odl9HE9&yRWd~(!rWO$m56Co=6*;t?AF>l6^8_aetzA^oxQ(?5iGs7P{H+GGbAX z!B88L3Urvq48-jZvAx!(&(|kCkB@^jY_s>@n*UmS9^`Md4IH3q`hs{c^Xd%<US9KT z`zVGMmVAC7b_RkQwmU)d5BKOJQn)47RjoNc8TSr%VqFm{NZ;qyy@+qR+Ziuiltg;3 zUXkSSqh9M?--+0EQ}Ck=80_8xJ>^v`CLbXeI;<ykKPbilLYgC}dPEXgFh_`#YCMAT z3?4syglWJ9=L{5x!xG2qu7=~d#!UvL(S$$!P9}cZcQWyYwTu>^=i)5N_@(4I6g-_L z1}`}c9Pz-vIo_v(-GK4Fl>3hV)$u+Vv;P0#@zyX)#dvSsY>oF(j1tFt0t&*9cRnbM zHvIV)<Glbqx5oQ^$@4Vu{J!xH=g+<;o_L5;>AyIhSP!B5#S@o2PJR5(#1mXkp#yPV z23J;tIf{jcJ0`KS?xJ78vg!4w7Wl6<x>M|UaXJc`R>orUl}mpluEFzdJwoGMa1Pk< zedfWA52w@E^m+8R3S6^3!e?9zKRSzzV<$WVSIOz{0oPymC|D%pG!z9L?iSoXe%GUa z(olwd7(Wm*->NdfoA$NFvvgFz=9lg)6lKY;({SU8{ntXU!TLUk_IjJ;yxz_1aIBzO zpayGl3+UK4Z1J@0CCR=Fgm!4vxOx*uV;Fi|tBSs)u^;TbH~VWZmL}J%m&}EbBC`XV zVY3mrJ{QuKa|u@_I9Gmb3LmQxn$^D$>qIs_3HOlLOU_Zf{*hjPrNOWS;m7kLULA%_ zDn86>Kvnp}0YVOlW?T-V$0v$}K-`VCeoUzuy%<t?UO|gXLoXQVg1F&yPuf9`+?6LI zgBne)o3|m!<Nn^YW|QEt?-&ijpNnyI1*jcQ_Ro08fE~LJ3)z07<ge6BBea6+Ngs+^ zY8Mz<wyEjEQlOO=jYnX&7{W6st(InLtG?u5xo*9a&8_+pf16e&Fc5L+<e%B$X}&gD z(OmgEJg&UAIaFxNF;Iw)NqaiDm^JtV7&74q@ewkFMAvbmh;f2Mh&X7c)i`B1h$Dtq z3{{9jVO*^g!JBqao>jPRSqYfyuKdi^;cvjGzctDDqxt$&#rHZ6O;ko9I1ACxg4e$o z)_`dLUG)1E<8hb_`km}a`^2*foNyTO2V~Gh)5^j4yeus7PFl+w=FQ)+7l@<eSqap^ z0nr;!2^7uO2z9&;mEf@Y4RZm=`;1HMSNrDcs!!IA1S9{Ctxw?vp0w_OV%wt{ul$9I zi5rtI?pc+Hb2kz|B+Y-^$v}&D|1ieB%U1JAg$I|dO09!Byov`bHtBc^CBDRsFmDq` zTIL%a`6fHB-yCbzjk_<;YHSUv^5M5186Rwfd;!THJep9D_1BV+R3*#@8u58o+MG=o zz!m<$X5MNr9&j+QKiVWJbhGD}FrnUB%v~*W_)ZIL<0AwogDEa7tplSY+i|xrU1V_k zX*4pd*@kXpc09qBG7sX=?sSN!e4#z4cdf@KKTgImWuO0$oe(D(cgNO}1Fnj<`~uDK zIK$NvMRFWzXaT&A$LB(9(d<uw7@H|_^RB2SG>mdo`90eA2JkvS3wZg7&f8PkC_N<u zL1st$S2)dpTv?P0={Zb|Ht=cg#r^q5P+fdnqWxviS?4ID9z!E=cB%k1e}*`cIu6(^ zxX-7w?*}D<F@|5gl!wQSI6r`zx8MrA(%x;A8*iepyAq#tpEv^F@xh+9@)|N|Uwo9+ z@YZ3w)#9U|+;TGOJ{*$bH)MT`6gQHRxAj8Tej9)h^ThhPDs~*j!e)^kV=1s8mT$m0 zl5w@Lj3>pN7~?s%huV8&ZBmJMTO&y2kBghBazYGZ0y6PJ6Q81MA~{FYHe)mpvR|4j zMp?X&T|oY;c5)*2`(`IN4|+Io$-aU|h&RCoG=Du-#iL&Tbw)w<w^&Jplez{R=rar8 z_tSbkt9pg|fY;_JBD{7XYUb%3^xFA?$u-tyciL<mWxhcS7F}$wig=3wn}H6j-PjsI z$0d%a+11*?YfaOaAD@cvS~!ne)V_~Ig!w$gD-B~9K%d?34Djr9<LwRTFGBWwjI|ga zKko5B8JHF=pTqMtO+S$>xLN~{3>WiehWs52q~l@g+<8`K8mMFWLkNt?U1WWN(6V-X zvlp-3@`yIxh+)el^5P7~jW;W)1$^`T2}~VV`VGwwJ$H|26W+G*T(AChf&Y4ZtT*U8 z$K$^qdOt#+T_Ak6(0dGe0KM-m==0gnNS}S}7SzQ@WP%Ml<?!+Aox%^_lMw6shR=(d za`<wP#m8;z{lBZVh!+bj`?w!voc>5K{a>Tu&|BD(9%$ou(USG91D%Ce7(fsO0w@SR zqX>!%m99)F(XU9Ug%VH4KcBHhjDk?4)S-dqsMQjoL~6794BLJ!ed3jhtxtR|>sv<Y z;dJ`QBSIO^J9N6+*6Akdw7VSwo(@}XnF=#!FS7T!R!>BJ%P0uZz9;l`8a_&?e`<M> z8^Kqc-dU5<f4BJXg>XJE#o2E5VdUh=F%Ri-X)mzUUxShLIsBhI#LxfJPJ|t8%|MGD zeZMzQn{2$o7Wl#K9X`DN&W%SAyiIPBOp`?l*-JWM*C^6+RldwA5eGn>{LBzXY@V3k z`gg{W!(@jKtiS4;54NIxym-Jciciwv<D`7$)$?ai(WDVp;0~(W9VL#jII~;FU}AVN z18aG0=EhGE0Hp!L{Wjz`|B4~NH3QXg$H3M4Tr36KZ%vmG4_dt84R)BF2%6#Sda_li zK>Nhq)$%|{f1FAEPTr%0*711^h>5AZ{3|GuiZIr5Yz9tMd`g>2fY#)_`cAi{Js4f8 zkq_O=*;|W=ix1qrB$ROlzjSXU&Sd>R7cZbNR}d{Vo?D<FE$}bH{5`_?dmVhhI|Jr# zE~NUN`71u+*09r?4d&J>9}m8A5$eIG-{1xgV+e#B_y&T(9$RO?i3u0nBVDlTmNS47 zLqNgtVg76End{MK<F)v2sBUPX(4K1Csw@y{=gVP#PP{fP2R`}&c9ao3JhX2(eKZWG zmWH!MSdVy>EZjC~^xYYOYs09??Jf`V@Ld=4(In11#FdXEy$f>RDuxzzQerIp6O3dt z>RA3X8*jvMr&Jh9oyx$K3y<c%0O9cqn*NNYds9-Og4c{MAu}|YM$=5q40>_;Z3pgi zSltTT-H%7PZk_>H-?Pm)L+>;vA}?2|l|wBTr#}aG-#5A7G{od;<lwh<;AOKf+tWE) zOfICtJdlPaS7A;kJR&D0UdKU6h13rS#O(3}E)iA$C9ge1ewbZ&qxX_;Zr=pcX@BSi zj2_>e@|_qw<0k0Enq}4ugUOj=DJNE>^z|(3w<3LuY3A{xEyqx_{sWBVY{dEOXU3*K zAPCwR`X0|%V;7bGN6dO*&1Nt8TO&|~;lk%ORpBvVdtSUp#cNToc#rz3u@V)b{82ov zf$Dp#J)P^;$*eXV%{B?ej{#_@{dB<(FWl)EJ~hZ3hGlLS108qV6crla2V6VFr`xS> z8T~1x<JY$tf#0J7Z(~_|0plqu0E##aihl2;rrCCet3{&cu3Htf%tpUN=glvK2J!Zp zw<l?T3^t61!M^)G=UXdmV+0Y(@sV6Xq=U%3n%zLd4^g20BQOn)#(4jC0|Z*&5Xg1w z2zoTZQ3MBzAQDB1H@`+@-Uh(b<fPf_n`qaQ6UL7KOAh4`Bb{C0#X_gZzs2Wfdjo~| zknzR6xU>mXwUHkJ<C;HG=zRznpF10RX3s7(5F@5Ek-&f4R)M{FgaDl(qa9j0L&uR) z^;d&w_d{SNE^e|HP!k-#(A5H{!Q|}(<D~~e-oUyRcLnzP!V`$^z4?i_i4GMTmr=dP z1MH`U(0EEU%!C3tnBz!28GwIgFTSQgKd;yP9U;dz$E@?`QXTrd-tJvE-{8T<5pl4A zLvS8|V>un{^^fZ*Xg%nw_26q&a7vB?5U>CI<bv#HePe|`!kHPr463leui<Fei>os@ z9rk3u+Asq&xNd-N9BFvd<3ATs7WiFaXwxn>8lEv0V1O_^wqb8R;AVh*4uyh#hda*g zavt7STr56uLV2z4&&4}Ky!hjn%6yKl^GF<CWWRP4&FUt;<VJiC%csT&+9w1)gV#-v z;{HL0cy-GoaK}5ztty7zxP$gb54YPl3gZ?p*Fv0<7}GFrp3Yg)+07Epjvw{m8H(6q z{puss1@rEE8Lo^2CoUvK_hZa|i$Aox*EmTqSuQ}~!2bQuj>mJb$N#JGz?+l9VnpQI zTMV`}8fD<j(SXB{qwy+qWC(XB^lyy=FA~J#+KU~Z66E*;!CA*QGl!*7ifb;uB7b7{ z>3vGm{JBXv{FWCnswY9a2&3@&3)fR}lktgAjAFyb`7IdA*BeIhyF>P6zo@xp@AkN6 zW6Tc8*0n%!ymdZ=8rj7>w({YE?S5nWeHd;1b$@~4FT`@Sqx;Fero}wSxd@I4=Yy}{ zU{K5-oo9nMfyCKh6Qs)XkIb94NUc8`p&)L@_3+gs7#%g<yBD7komPVU_GNWED!SS_ zIm1!WOO{mpTHzhbsxf<1E&GtKfwdOE`ed#8bNUXLwrj3DXRSUVHF4fGoJ?|X<tcln zS~t!*CxfwmX8eY3UyOig4e=-de5O8guYB8$@7b!x)Cui>LtiZPPj+|fd~}h+lWdeh zGAJ85@|_nUa+F_>#JlW%r874Jbs3ar$+VCu1MxsE&Tezv_@`MhIL<;pGj5}(SOCVe zl|VtfuSkU947`_Pl<;aRRlM555KGyu_i(uQfJ+Zn6n)D(AjGil{w-3jFX--o_xjn- z`2LUy@2O_zs&3b9o!$5JDHG?F|I7CHX9)h^V~_K|`QNw4=lZPpf5{%j+Ya6v&;pOF zg#pdeF%do~(foI=qdDRG17S3-mP@hE^XLchX`k*S+J!jnb|a$b@Fix~s`a1YcnG&w zIvNgnvvC3OAnN91H>j=~-$SmzpPy9_sL1f@A7Ih5z86jUr>zWTJ-A<122BY2-{ni| z%m<RToiAcz`xP-#v0Ug2)Xw}AK@qq}^#ra><;u;Y6(Q<-)&@gvG7kd6*P~(YY_xIk zoAV2!n)6d=>LvEkx?}yqmo*mn-PmY#_*^;J^VRxK0pjeVqv3NLaJyQ(LEI8vpt^3z zMF#fV{g~zV@X|dc12MXsjPiTo8MV)+pq@ZJ+(I6{NF9F)aop5>yj2eh)%<fsWR{p+ zC-gWMdfeM^2wx-Ti-T&HrEym^%p>D29)q62V^Da$-Mr_kZaMB(ZpFBt5um>oleIAw z>#p(M8|Z9cRtDBuArn&JAUygkH~wi1lil<OnI8S(EbbCCk3PrEOd&E5bY{Ge8eMP; zUH6HPnLl5AD-IZ8pDX)l)UruG&XGYbM&f>~E8sT_<7@X9M*w~OJ<r)+Z}wr|pq%VE z>N0$PDejOvJ1owtbW`+{8q6>@BCbU@J^qVfS&L{{RN9H%XWDU){z_jvm-TPw1L$>u zx~xIk-a+G|d;Zh5cO7i+Q)4%k&uMa(=(>Ht@gN{+dn0~B+nZGw1<Mtyt<(JeUE?u7 z=I(~~gkw{TslVdD$Kq!BE^`$3p!gn;mIP!hFPtyDu=ypnadwS(M~$U0V+@a}WA=!e zH1K{krvahiGz{y>4t{9rrhVkJ-`I;D;qxDt<mf>e0*h7L8yMX)QiY!njPFG{&ztrE zzU6b<w=rDSjo+<BWyHllw&NA-`!IR)z#}-~E#CIVC7_0n`<7JkBGd8xv|sFiZ81Q= zI*aTv4}Xo(`Ak??SyNfPsAfU~ep0i30)MD|S!uOzLQZXM{RFKtx3I2eLVa0XWv#D% zLVaD?goO>2RprG?OYsBgjp=2zwKgwx{P>0VnfJ=8E7X~uIrBW1<*yVN{z^T{%9hlz ztX`ev@#X`r6nFwSvgD&$&-n2*wRIJXDjU_px%sm^^VQ1*m(ubp8|r-v2-W*4YSno~ z`Go>Pn>C9T;a5J@3ueulmtV9%va|W2f+g$tMKyKEsrc>F`jrj%q2RieRW-};mp}Pk zg<l0-08&VqqvmRJ)6>%}nP*B!v~^WOWrZa}ZC%acx{CUGfwq3#dVC0HNq}FBt*M); zF03dQJ*}?ssr5B=zKU`cLhz@L)lpWz@k696<Zq9vmDRoq{vdB<O|{BuQ&lBl`VV^S zZY%2S3ZD-@ZVX;lj|H!qI@Oj>U07NN>L0{)dP0aVRD)5kP3LcDr&gCPtw>X+PgiX$ zelIsQO^)OE@ylRIY_{JsRV}ZqudOOwp~_;5wR)`KSk<7r%4(HHBM2p(Q0C11JYhzH zQ0Q1_OD(IZ#t)5Gv-h+`H5KBi_<h$pp9%rfLVSRB7^9SO>M=Y(R<pFGzGi8K5Tia+ zzt4CbJ)8=S<A;ck8AGd^3gsVQ+E(SMDt=>{6{X$REK(O&RKpBtE9@@%VVSU6T3t?q z5*?BHuqK8>40Wn{c}*=XsI+Q<DvNDR){j%mD;AYDR6)#obx~<my`@|+{~S{*B0n+t zsJniQGSNpVPru11*z}V}42B#Z)uKIylQ1K}um8-Qb-~<%oTC2ylend@s)ZF(scO+? z_&?*t<e#caZ9)TmiY_fHtEnreWWE|XkmaSmQZ-fT%QBvS#b5Xp3}|)L3e~ryLKSeh z_7c8dPT`pxd<6WT;A8u?siB^#3u}B!)LKl5dgxyWDb<l`2}>Ij&81C~_8wlD0&7{k zP}EZRXL|x#3aEjPV?(Os3?F1MR;_XBbGQ9k=w~lwHC6B<^%dYF=0jB_T7lf93+u}& z>+4JDfaDylpXRjf(yA(F8TBpN6At~~Z%^0;ByjTl*X@Y~qhDhH;0J3|dVJd*mR7G& zmzUPjZOM5Qa@Eedeg4xfmg6&i{D2k6s?|qt756Owsnrz~<@NM8Rh3ICt4r%vz)4or z)^qsqi`!xnRKmdv{+KRMG;w_kJVvr_ZMU3_pykYk)e7%ZZ|OTnI4!AqVIvFSO|e|p zS1yGcDy^<*sIOXq+Rl1#SHjQJ7W-P23uT1*V(DB4kEZ@)x>{UZ-B4AfBsA2QF6JT% zE}=yS`6U%qwF$~-;3tGzU|}qI4NGg;XKQXGD3w^3$vD(Xy0zk|&ea>P*<Q6(ti39K zcU%mrU7(IsMmm;%5}^TAgbI+SULD<l{}t=^+41}1s62IQeZxXzp`V88pI6r`uXdQA zsx0EKm!~c&AE%C~_lXrbLs?X=&X}%dDE(v$EqOAm=vQdtDD?8WN`?>6!CXsxE~O+$ zP{3oRL%ZrshJsElNh_WT^A?LY`c_+04}O@#%G7Y7j6>BjQo<^GY-oZoB;6Vcr5IB` z9kfa@=A_6t0z@S=&WbZmwIdD{<@V%3R!mQ%1Evo!R$DS;R2VWU!aFL$fho3I1tljA z5~(9A$Mw<5St}g!B4R))BLL*=0kN_<Rx%R_jZ;au06)xe!JK*Mc;CQV6APMjOi7!K zoOLAco3B=!Voy+Ki5-d9Wpgel&hzAI7buzO6V=NnW@JttpP4a!V#Wea>3+|$n#yuD zcS+scip3b#`l(Z;8=F~Co?7oKpE|V$)>=`zbjPB77lTvk!$tEoFDr?Ijgb*0B1 zJ7Y|mMdWBfcBt5=_9vb?wG?h>adk0Nok~YV&YU4^$cpERF#|-2gWWONzUHS+?aN2x z=wRM2yRfFF%I+c9NNtQwQ{W*~r|lIZ9`&Ubi2YJXCuK`Y>&A|=R8xsLJV0TxTb4Te zivJaCX+`pO3Mf(i%0oJ{dtnpVx*Ug4fqiuJ4)W8si+y9PSpOL10%6;S9OJ1|IawNf z74uNBo&mYV;fE<R)WD@~E8BB!)M+u(?#2PZL?AZkc;|t9&n>}V61KI2ZVd+cz2+*? zeYhT0i_LTfE~Cx7%lbK#4D9ROC~rc_3yB%O#sv?;NR1d=^g`N&SLO%q4h9*dtov;+ zxCdzoj^2`S>8%!N2GS;^P2j(wJs9jj{o1X;U@y`GNLBDD<ma7`c5Dj<*CI{Y6%1y9 zZwXTVa&6&m@Zmj*Jt#+-@if{+T8mV{H~4fQ%|M#*41WKSDL#4M#`IaxA>HsC+QFOe zb<YQbB}m)4g24?)oA#nUP6V6qeLcKX(e-jLxE^W7YtRd>b8Ywlaw9GIFc>_5)QvYu z{H5C-zEMKI_<id99>5-?8;~9_Q4e1wa^T}&kY54S)D3<}Yd=Bz?0*mHLBHzfkPj)J z5mur^T|HNcYD|hcZP?)Wwy42L#Pe3<4R;2EV<iof@brq3f-=%~+=#wFH%emCti<H= zTtk<~H!0_yI&JJ(DI<Xu_4APKyEzy<k%*kcq<M+)0)GX-SAjoO;+c%YKOg=Xkc0I| zzdHK-#N_6fyhOD!Au%~GF)1f8J_r5hLm3{xzj)Zv1|mS;93A~Inh+29?Zm&eDE}e+ zCV}>!_-Fhr7~F_dnVFb$b97!}^6HqJM723KFEJy!HX%{XNlZpdGed%;fOH&ez4l3r z8S4o-W+$pu3H{_HT@mOugH9aI*mS=BbY9S{1fB8}PN|qej$5N=CMMq!Gc!>Q#Li4i zy*Vx?F=O?hoW!i=!Fh>IQ85KkiCIA9B&LD{q^JS`Q5O&SJqtc6=7a|}|3mn7L??t} zNK+K!bUNl#8^&o4U`tQX%IcVW>WQLS3Syx(PE4b&F7yh^8yr)c>Q-Vb`?L5_H(i*U z1xRC`UGKKW9=dOinVpzX+;8m5L320S?*YAuyrVy}!s^Ez{af?~!#6-ZGlv$0#m}+~ zj5Znzef8cE3=WawaC5ZiC+x8~7J^;d-yEVb;(g|!4ZY9_<_^sRh%Q;W$sfvol!JdV ze9=hs$I*}Q<6IkaOH|ll$r*Ze1U$GZwCiWl&VlX0;DbbjvFpW7N#z9N{E3alS558+ z26=tOsy90{A8PEbunZ6BCxLzm=+7fUw9`kwasWN&Z#n4KfPNwA#e9f{(&Fa~#XLU; zs1}s(LpeWiKz;!l*UhjAb#*L8vpH^dqC0wRLSiarC`QII``Om`vHnifU%T@^SD*7H z7XEEM)`qM4&qFO_XRxZ!JS>Sm5Ox?G@)UvJ8muur-`Age2lPkUBjxd89J4xu!FM4~ z>|2;8=<%)5`61tsPv797Oq%5z3Zvf-YvXxCg<rTE{Ptn36Z~TT_<zNZ`ksU}un}wH ze`dVOQQwF9Sy)r^`|CUWd?<{5EUdoshZX@p>zMg2?Rq8Jc?<2RShMGb=ADN+g#S9m zzs^o9q=kiXuen2`uaBVI=$f!Kge4TA{SjO@5hGlWwK;a2t@nsNes*G4evBRip1H^L z>O9NZ73B)B+=y5s^{rs=UgRy?5IhI;E6=hGcQmGb7{90u1;McmaeSYJ+>PM>BJ#1f zTXNqLjX56R+H^D4$kf$bo0<n*XBjb?w<hKJL-SB)81%mRwqWot(0lBYc6|^9sC#Gq zS&6kVe}ucE)5hcu@yQv=e%_6>t?R$3pPXkKA!pa^!Qdv&Go#=52wjSK=A8SrF-s%) z<`LlMWlV>4WEk@HLwq^#S->M?3qADLlNdvyf0rU3oAf8S9>PYs9;yf!xQf6>dN5SU zxgsn4??&+3gL?ep1jibR^(y+ZP$fAJd?0zB?Y|uienfiVU(v?RjCWRZEa_h}qGt~9 zM>#`b-Tgf7`9onz{V-y+OT~H|^2)MRD;~Kb0Wm7t{UX#ZeAvy5^H#@t*lr&D`c2>o zOo&7N(6Rxoi1{=QJRSnSl6QW1zuy9V7wG>d`%SwY27PP$gTWbCmtsd?T%q$@xTZPh zkH+~kE3qvq`h#Qb|NNmbRS3?Gofb+F_*a2{?=OSF^dEtLn#Etnjsxsg_<-{gH)O~B zNBCGTG+yMp6MWsPaGwon?6e2vIsv(5Ogdmb%uYNI74rlq#IbG!E06VzKvIqgk#$or zxR)qe>2rzk^b;$AFL^f@tRTK`jqB?JbR^fff<$*!IPWa>89Tve4fxQ_JD*@MYS=LN zvXOC_iiR{8Gdk+j3CPThI}jc9Np#%j(J{Y{iQ5&Ubj1J*FL14>{)6d)Q44oR#qB#y z`7A2#)zg$4qi248s`66I+UOn0%6IXP$02z&@&2esPf<QjdV<KuPK<l|H08OIPC)*S zljA@gNRGSt6lG(wi?qp*;)PRwgzSs_NtNEy$U(|o0Yp<&+(S`O_eLT6WK`U?C`A0o zaupR1`kU&Gdn#7BAnt=$#kVC2+0D^$H^nJyV}3{ARYp1oVoAb2p7wUsMT+uFblj~m zQHP>YbZ<=D?_-olr7~=uxJMi{d<|RLgYtOg@wgiiPyiW-&IPhC?v`w2LENFUl`G?3 z&r+JBPkcE``7HXxo3fO*V&ZPdQa+Da6MN58MUT(;AWQjse90|Sm8PNh9=9Pw`6_uV zQ16`bt7rf>o<0i1Pn<pq`7ch716ANh=f0b%9Q@H^QGcDNtUn{}`H9L0YTQ?6D=j1A zK0aGnKPv8nvz0xg;vPR+d1X{w&lKhJQR9s%%Ht_<f19klHahO*vy|XZRnR|`8V9By zq>h3Zccq;LroR|F4w&c0#)1Bgu_KWGdTd<hS&BX`?(50Qz2oBcPFCI!`9tI4KAEWe zZoCToO%s|a%7YnXw>1-;-<iqmrYWOQ7aBQlLrh21bBW5gv2nk5DQgFfLjIY-%-<A$ zCGt%P%x_G%0aHM^={N!pA2%BL4-(1DMMO(d2TT9|^ZzyQU#J0fl3Q?Vn`otqgzYA5 zCo(WCk&fKPHw=JplQ3Mcc-D_{i`0&F^86NjiojwY^5+tfJ59n#G8Jb;$Q!c!bcsg@ zD4xUL1#6gD8|`MIV7iE>X_#B22<5~RALAA&mVWX4hZM|SkLXYNh_fmyEB?ZxS<i+N zk=kp?9>i#TxQ_9WpmMSWDhEz;i=2(`IMsq}xmBq+EUDtL+y5?T_2<AT7JX2{2pPnK zqdn2zlVrI>|F<W-QvU-R`cRJR5t$B3aSM`@WI9HsQ)D_zrWea}kxZA#bhS+Hkm>Jb zx=E(HW%{a2Ka}YanGPB)+n4DWnNE@EESX*`(?v2}Cezh2y+fwIm+2;%?w0ARGW}4d zM`Su^jBH<~V`Mr-rn6*vu}l}qbeT+7%k&PJ{$8e=WV&0Xugdg8nI4hppr6Y2WjaQt zQ)D_zrWea}kxZA#bhS+Hkm>Jbx=E(HW%{a2Ka}YanGQ;o?aOqGOsB|nmP{{}=^~jf zlj&-i-XYW9%XE`WcgyrunSLnKBQhP7X0@;2nVp+ERZX2e=b|)qQhHYUWR)+_GNw#C zOHG}NcfuOpg9`lk%qeM#l3u@JsjqY)QeT~wF0pgs^>VF}j_Uyx={Yk!<9(%zCA7G@ zAzi#z8edtiq!U_FTE9d|FJDm&epZUt#G*vL>+S~>qYN+nOIbnYYO8!oI$uvI>As3a z{Keg_N|e>`LWh!Gv7~rW9WHtmFDXY|8v@g^vf_%yvWi+?F~qN`uy_=g*434+uz1+{ zE6Y$3?UgRAEJM8-G=V?qqFt+vh4uAHI^Ga1t*G`<li&ZuAk@r?b{|6&qeLr3Vev7F zFDyPb<j0-NV&K(n|J@%yNU{BVfBax2oPUhLOBma)_NO1B*#5acJ|X1C`{Rcq$aFhV zA|Im+Q@X<9k5iHyc4=3PQ4;Yvk#KxmN=aCJlA``99Q|;m<JaNv$16NeANa&5Cxq5p zXITtZGkcxwk3UiA3FH4`l#`Tj>p~1x!QMwooGe5>M!_m4*HLF-jB<*y0bgJ~@QG1Q zRXW1r?I@wIMijEK%IEx&*?tecW&>8(r+<5Wzfj`G$Z@gbj8(u#DJ}#QZshO_mEtdc zCj%+@6w3H0PvZF{;>53$_ze>OD)3Q|KWWVpfw9;9{gS?LvRmM7KL0>qLHePR9%SPA zRN!4o$6am#^BoBv{%$Gd<hP^pxmf&Fl-q!(Jj&T_f#F#jpJyaJ@7(jT`F|kk*G;qV zJUif%5o^hlFhHJsiB~q42s-{uGM~8;A3uP;SmK8az*h=9>|E-(L~v1*KT14*1%!_w z{^Fw;ah5!a<YVjeQsCKNJAM^s38-`2Ad5bEK)a^{AI?sGD(H{3bDn7n{%5(RBv#^) z_>KYiD<pnk99<{)pQ>zlyhJdt^}kBskDYhxB_BIJw;Si#WK@{>8>wF}3Hss6o&kQ~ z9|C`Df7wnr{TvkZ$IjmvG5jYS<2Ob~ydBru^8Xb0s4#vb19&xzAD9L_=kUPivIzLU zMZkX@0q=q#jfDMlEOCn__%4LcNCf7`1Mil8*pB-r0#Ev$YPX;h?+}oCRnjjUU=MEt z{}YEFYZA5aUNj+mzjA;lz5B5eL8(f5Uj+Iafu}qN%G`o-p`^bf0{!|3c;2T7r{^yu z|F%ml{!1kPRBW8W(R+YbC5<zg9|2zlJmp`v)Gf%xyC3AbBk&JKz@LfzTsV2AN5IdI zfY*Vi{!0ev|5-tQ?0UU70-sMJ;D^ICv%g*Qt^RHly~cY};D@7Ke%hZ;lC;C?fe+WO zUqrz3rx?T04@#8nE_Mt4J>oCUqa}XBl@{K%|6(kG;rQGg0lz5%z6*HjbKS-g(PESM zi+dYM;rU-00p9{V^;WXVEjZjO`7{r=@B{60Gw_tZX|zQz$(6T(S8c$Nekti4D3P53 zCTusO*e&1!NzefNaIC*}oQ7pfJS~!5X>ki!yyHM_=?UTcy9;>ouf5Eo7k55DKWKzS zzhQv<KLVcg2QIhh#hnh&pCjp!?e`P{ulB=Oz?Hx!h4Ht`f#-Ose=QM&g|hC32>jmy zUdVrgTTuQ&(qD`~i})mozf|ILPYSPxGT=#HSm73wUrPFiBtEIx!ha<3=i{J-{&}5@ zqvlBbQsBv_Yk-|UBJqV+S$z2JmCuV3KhS?3kocx1w?!xZDhdzYg`A5lyto^HvTtmB znT4M&>5ESZ-`~0j_|?FZf1BiUp5zmKYIr^~BH&j>z^{#f-wgcl=u?!<e<_hITFK4` z^zW0N{z}@3?XSL$Kz}wucG0iZZo!!E!1#E8A0F*emdJJ1!XloP^o8YafwJx4K8at~ zYT<2rNX5Y*=lj6t9D#>D+~gKT6_UUkfqo6}!hWu|=<k*E&4`P`@xNE#5kF*G^n7Q` zXV#BNk8zyl7BJkhc-9EK1RTkp2>8)wgm3rf5%5g{f1J`J+r=^@p6wCn_W>Wyo`<X9 z+Z`PNzZ`h-Sy$y&_}Jssu3Gqk{_qb!q5nBm**L)79s@p{o})*~ahdNXdu5~et0<QO zACBG^0lzx}{=*3P#8Kh-kBoqy0=z0^b0+5lFV?|LB?7I=!j=epUI1Q<%K(3voDyE1 z>A(y60rN$d_<`fn6@kzDl3tPX%l7B_qr<m*3-BCARqnH7x0GK3Pyf*Ic!@w|$byG} zrye?_9-1WnL*Rw|EOZOZgA(60CVaa?e;OWtRs?))1pHqG9`XESt6g!|4{d)Cf&RGE z@a=jc;432F{lHT{ZmAzt_IFDJ`oq9eZzYo6UYE1d!pqYX0e=th)UzVvjau36|LN{( zV%$iouoYlgi9KvCh=t2scUM?$+w*sGGSk`gj3*h{-c^zVZ|!zZ))U+9b+_k-(;m1W zBu+@IfCJ*R2M`fL+z`2fI3N%b5)ukGKuC+kB^M;VS5>e7+v7<#h)rue?yj!a_4n#~ zKh>}N2a`T~x200lJBc`Z?G3FbWasG`(s|zT%bdyM8q)dsACOM}ySe#elRuf4&wo{t z{^yePf0v|ReY3b9-Y7}GQIdYAB>lT89robsE!6~m7lMx;sPw!)@JA)jNd>^DKyA9$ z4ab4g4K0h`^`~&&ABBPA+I9zN4IOo4qBk73M|O9e4)@(E$M99xD|GMHzST?zWmvU1 zD8ugTcY|Y{+p29N|4Ib5wM~Rr?_TdN-FW-f4u`!EacQVjjc&rpt-Q=mH|U1VS${Nl zP|l%MLtKK1F|fORr`seO6k9|(ng^s^Z;YrDD+q0*d+#s$V+wsd2A56to%89f?gm*~ zbl?wZ-k;uwuNyZIRHAOwYqYDQW%bRWx+}L1=ZKM`xyKYV^T#9iTt99zuC(-Vdtlk^ zK@A_v#(^du5F!+P7a@f-Shx?oFt8`pQ84dUt<4RzzP+wu*G6!TBh&>!OS5X-YVeIF zM=)L7Kbetw(fWw_vAq=qb1}0eS;DF(!%pnu<H6y+x^Dt?O1X}CmRZZ>vubg86Q+p| z9uGHkFq3nboej)}4btOV$kJ`Afe83fh?pSahwzNS+>0Kli#A+snA%1|yJ6efo{c=$ zI~@fcDVCTo+>ragswbn3M#q@3UGTp8+qj_u47r$rRo5{{I_Y?zb7+>UtLfUd*7e+> zM7N@k!JtP+-duHf6eq?Us^jOfUB6>#B)CMMx2?@&BUH##XG7N)?O}LoYIu@hDaS2> zG2NY#*Ef+<$h!_O+adm|(*Pg*9rI_<(LiEGyE7@}i}vO_7=K_>h2Co4zJ9GuguqO4 z2tZ|OYHjw1!!$l}Pb?W;W|j^HqL`Jo)m<R-Vv0~lZ|?E78#BhiZAw5`7TmyuF-{^Y z_))VEu#va=z;d69=<g7NT6HT#dpjL!JKY{YmkYU%2x||*VL~oKuf3pBkOYwR>HgRs z08)pD>kFvAIE$hpht-Wnvxz4cWFlT-)XRf}CBON3MK3W97n8|3ND^0y3sPB$Zgj34 z*Z}%nmJq7u=G=^l-UEbs*RHp1&cUsX9)Xb2WP;j?phCNMZ(TcZb}GCcNhS@lC)ns? zkC5rm2LDZ3t+(3F^_@F+s8L13FdsMXdm(S)QN)LsxM`V+9_0W(r$@aS7;%$2-2n?M zuw~CeLX)xuu<Tof<JU1pfoGqj=_lR<5!NP+>eafI-Y>2^ty_?%*zm)fhqRcE-d}h$ z^@pB4nH(wsuhj^Z#{I6PFt}kn8o1;J3&oWfSEWF729VmGm(e%kX}sm|CM}3GkKkT) zY@}^C7+_X0uyF^JEod8<kVvuXEcC%PQc<WGQEd?=4U&3;OyL^hiKEW@m|yAvovsCz z77p#bgHqR%k>Wt<a&AmB2y?KtK@>uTI;BB>NNzs}Gh9UXxQuOsyFL^}5M?#T^m7$A z=J?LQ<E~|&R9tT~&Se0Cjr;)h2)vzomYI1}M$qOC@Xi4=RyTBx88k<oUV7G4S>xn+ zvwElOqA*1iKxXkg2U_#oO*e|@{=}c^j%V1}J0TVqjFLNJsK0>7=gm3IH0Kx=XQt6i zac?+2%#9u}tcjY=RNt`{!$?usLho#pMGNAFVKyE03&smFZQTveXFxyOYM^eXqoFrd z*qRx=s%4>bdOZw_FcPp3`n5sC0+!O~nHd80VhWLa)`!BaxVF^-9ODfcvZ|~V^UM_< zYC1rDjA%iR6bRFlC9$$t1#H{4;%J^rO&41@q^k_TA7&U`!;PB25agq2=C)#91NsTo zB|qnyyj7IavCypzaxisnBV_`_OVy#O>;?3U=$9$vf+W&*h==X3`Yz0nzWadXxU&b1 z1g$&So!#z%-&MVq7-FAxL*prh2Seyqgx5pQoqPNe(_t!^rI5n4)WE1dWz1p4Jab3! zIIP0_lvHpmW;$=kQ3Ci`_)_C%tovnTnyuJ;SjttXJS0_3ga`JnBL7I+$Dqx$2G$gw z_5w4HvgCnILmGJ4Jw@~mhUJNwI+u_~EFlZ9jBzH@u}<#*jt)`Hdj~tLtM}B~csxE& z;pWK{fQQV+MIvA0GZa{*&9eRqF5ZBn2&Qj^oZAqT*b|1Zn;`UUh08KYoa4wa!C8ZC znPW~;iqf&tlVA#==OEy1s6$B?j?>*ziZUxxIJ4_@RtOAoB{V6=L2bgiRbVVr2(c9| zu-6|lp%fHxlN^W3;=-VnYsD|2b#a-?v@+ZmknD!@LJdiGSHi(|fiaLBhFkk$T|SK` zLXALBz(1In^4x~;aA<QI8iikhbRK(I`NS+&%y5*los?#}it&<bYD!`8PznFMSYdgN ze08C!^`awMlhI7{N~qpq>Dq_xI1lf!a$)>y3FlS4RKws&%6C&bMkGu>QJi=dqYKm9 z=+|lAVngq$cwKLI3g@B*x=X5APFBrI`5i4*AhD+sOKhbla2aAriQN*ryV1-(9Cfgu zGIpzaXwpv(SJ<sCC9xG4SMDA!^Nv%gbDZxK`^E_%8~O8#UE|6$ADyA!>qhmIUWMBW z*dLTDt+;hDKS9i7<O=vOErXabu_8b)y<sr1J{Ozitj{^jx|MLgn96i8E|ws0`_m(X zYDu@tbK&geRRrs5rd-L5RAeln-BG<^+KGD0`dpk|uHQnl)ku=}o&iCFO?J=s=jh*@ z&PdQ^#Ki~w!Pw?Ni~`dfT1wM}3Yr%stNG6gC8L>TnB_w?w`myoSoxU_5$d2MkFaFn z0st!(N-GrYB(|&224y!UEvu+6lc={Mmp+q62~4cO4PrL*vp$*FGp78-s@1|tOKsD_ z4!ytz_|&iDE!F=wc$IJL6-_b0^Z)CGgQ${Qm=AX^y}Xe|p{2Z2?93Klr4L4h6Ww@E zi!2lHGq76wD@|^3xf}aL*jzjEBah+bTvK@#hrfN|pNR7%Mt;$-x##XYMbFXE8N5XT zZt-(trRxHrsN|&xMYx{5so0Uphe`8GT)a(&03`=wOBMs6y$`^%0_=?dGPj_~!nFh) z|5qAendP{*isiT$TaJ5dII=qNY?dP-1TOOG!`j@A?NGg-0l@>i3rlVNsp}0&{7cX! zJKeNH+|0b(>fy6EJVo*J*N?`F;Jut%Liw6jvHezeHYGi~?65CjH>*yp`i@+z<G{Li zm18!Y58&ZWW+u%rKc~&k(`iIAxR;@N(*y6Ep!C2MhmKSp>{Z&8E>+9Tc-p}6UE;LU zZh}pyox%Ckg{?KoEjlI~xtFoEn5}u%2@EJawUi!CgY(b}4);&Eu-Kh0tA)`lE_%<Q zT+LzQE~n;1&$<L6C`CJ}H$XNwf~tR?NrSrB+`n)M3}Vac&lw2n$`=oRJV0Bdfvg#0 zDVZl^(j42wW~jWP&zgE>IWNN3y!XF|!(LzqtLa_i7Zvfo%#U5DX?1;e1n4DVXTl4J ze$o12%D9P*o>cNOn<qBNi;ZB57pkXN$fUu8&`wYBs<oBnkc-}8;{2(W4NF@F^jJxO zFX%nBBmw@#B5~-xmqZGbkH>DxS^zI3tGC(;Kw7UwqF1`?whhlA(q<5Q{u~>$7pV}d zjC5WTGZRTP8X)}T*BaTTL~9G3-%C}@%*<5XHo!{pw6NoNKvtIwyEVNupy89(pYdAY zQ@xy4Ay0+gV_Ey`jPNr!0sm6S2qSV*%Zx3%I%9NAzkeC)ps7_w_4*rHmU#QJ?0txo zqj7qgo(zh!Wr-{&3*Vle@{&u(!><)E(C+Nu#hjX=35~r~#2c6^4Qxeii{D1DnY|3n z;1o#Q$*#xoQmYx)q{_SscNUv7yzpC5OXXZ$)xqYC)e^*D3B|wHfC<BIn&O?7bWbsC zd=dx3T`C+>a>YDs>q<+VZJ7jwtE3Ljn>-Lkp!TK_`reh6y4^DgYw9Zg_u2SVomu$* zd3=OEywbuI_UbeV2p32lS^8bv#c$a9LjU2FmdgL3Nf@iEc;B9v&9!`ST-O)y5qjAN z;^oii9B1Y|?3ZxKZ5R44>DwwUp0f(Q>=*HY35=0_fJ}Dv3_0iTui?Xc@Cd!^EAhqG zbgtj%uV~*P$tT|raG&?|k!#t1;<2HBZ0NcE+;>uclU(__Nf&zAr{WRftoT?n1O7RE z8PQAozkzgq?saqj@mE^v>@hPI4%x?u>A6d!e4+m~(zpn&A0M*s#YfCoI7~J41^#nc z$>W>2NYcyx7{4?0EvdL6dNuR?oeaI~qw(tNI%9oc2r_h%`aPT^%a{E&cE6=_81|9u zx%&Um&`bYIyyv@y{*j3zln1QIiTVBr-(0@X%l;i}TrnKHXMXa?_TMjXGf6M|d_4NT zrg}vJ%gjS${N$OBaFV2#{Xl-p6~pmMshGaZwg2}>=O&uz7VR7Ir=Ms7881T~@sC{p z1t$mrj_757k-r)GY<;uW{{Su5U&@z#NdC1V{l}nX-W7V;ujGlLmw^%fmhz=tPcr4p zz9vt8iW@j2zrKpECH+6Rk*t5T|H(s5@i5c>LMPuZfjCJo`=mVlnWoS33+GK9uONY+ zCGD5}RDN#g-~A(_dwi_Lr|RsgDZiFU7y7Se(xrU)zKZ)&b~5W<P5EDzjOJ$MGR3<| odI<o$i8y0E5I$Mi#Yg&0@QdenQTaE2jT<=r`KJDs|Jm~Y3me_1{r~^~ literal 0 HcmV?d00001 diff --git a/bin/cutsite_trimming b/bin/cutsite_trimming new file mode 100755 index 0000000000000000000000000000000000000000..aef62c5802acbb650dc7f60040cfde09f1e9d57f GIT binary patch literal 20080 zcmeHPdwf*Yoj;i*FhrOc@Wo4*YCuql83-7l-A+gXcW{tM0!1s_49NtdNoJh613|3@ zlekXDC~e)Ax_!{Cb*Z+M7S}~<NsyP?&!-}-e(3r_>*Ef=ilUfSZT9;+_ngU@$qeh- z{p?>od@|?$zUTKkzw@}~o_i0sRr?p)^75EWHug`9sKpb#9I}2H+qsyN0xM^un3qju zli3I$h4}OG63VSBnGXonGPeu9eB=fAE4Nudxy{QjWIow~VZln$L(0kCQ1XgPEC0nM z7AbSa+#Db|^7ZH}P9m@87c%#gLrBL`%H5nM_egF)$PEZNnKR+P%w_$kZuD0t%Ij=i zP(*tvV^#cjC?{S@HE$Y?bZt)OByummka>#*Gnq@dt06~ze%9m?j4On_TydEp{Fk|G zhs#@<*Uc-tyrpqgOLJSSeO7z<yjk<gN}{2X*}UGQpLmyCwUS$sadd+spU=S`g`CQN z+fZ=orWyYlx_;i-e_V2R%e;qvd20cc72{8~8*{r@AkURY*fji6c&4?EuI#zS``(P| z=l$nw8(q2Ccf+HAv!)l|z_=_W-Ebxw-VfevIMvIYMbFe6ct;NXS`K^<^klPpZw~(N z=io2R!T;SH_|FmOY<BO;!GBqfc+SZY2Ok<VoBriF_;1ZoulsWNxh04Go*aJW<*3)R z9Qt3*;ZJi8|4-)7vowc4J9GG7nFEjJ(7!%Mz4CML_vg@mNe=uY;A7cD=JnL0=8U<Z zm(mYWFTy+K)N|U+e;K=7*exCF6^ND17kcVv*K<JX*(>66GJ~ICzKI%*4VPX@bId%Z z>+4%XZF<xQL=0VL`qGX1is1U@s1b}*wgjTlV3g^;x@CG}Fe2c(WtA<VwqRXgT}#j^ z%U+^~jSZ1tpi#dexLI$&l>zo^ud0of!9v(ms*}>F5eaRsUU6k@z}HYs#aGrEa~q7! z;UH+w9Pn&vZVa~R&27y_^~PcH&Czfu8dT+LLvx#2V$ltpxz_N^({;EJ+N7_8Ck=t9 z!Ii*@XSNP3+T5TwHMfwpE1IKvC>(54Btvj77-)s{XfR^<8X)Cq*bs>5MkLT|L?KkY z0@xf&kKp#z>2;-gb?rR8=E~YRdW+?BrH8zXwzhK8C~po$$$YTPhniJ+WZl44b7g}P z52!B_>YFTYvbrJElQX&xs+Xf8t)WIM;>LVS18im$p4-q83`D9SJevzcfnl{)tueb) z1~3#e^iY!?3AC*b!bhtf(z)i(>l|WI*3{hANYxg>u{sD0*XqI6u(4T>1jFc{p-5nT z5Pd{n-`=i=gOO;cEzr`8;*F*7rPjD&ebB%?6itbWWHb}+UTRyRgHy}Owz@)JQM=Ap zTc+2})$2THyfO?0-dCfW0$~VZ6u?AT1A>67woTNGwMI+SH{6zLMs|sYI_R8>4)MPU zo9GoV8*FQA$yTjwrc%9+S0FSe95brPDtRA_L^%4{`c$e{X7pC{H5x>XEj1fyHfgmc z5?rlsXwaiXd6>SkZ4+7*BRv{x4H8>}#RhkxEF5WWGn&wp4ebFPJ*~YNQPqh_Ohm1L z<~FvZvQnQ@Qp%S2eTypf*(I|}=CaDwt1A}ybWh0~3tqh%EM+D0(<o1Rp~s_^VwSXF z#f0e*iE^R}_-~=Wsf=*z-w5PH7Yf=ga#=o-6k&ap$F9LWL#}ITrf)urF*|z*m@MC@ zl*cAEk0SfKQssqwjYjg6e*f?dSV50uy+S5_+f2AIf;}VXhJo*ZE@Vdq9pH2Udt1=P z%x_`|ZDXfFQyRFup69f-6%eQ8{fCz8#T>DI<o$&3ixphnmk6Jx;Nwf{IYrSBw=zz< z6`bNI(+ma2Ei9Ev6<qbRT*1}#xmUrhmatT53ND`|sH8^0(EzEmTEUA_C}Z^sjs{Jo z4GKOwg)$aa@G%PBuHX&@?@(~3g5Rm&v__X{tAdY}Ao6Vrezt;dS8#e(kZFg4pDRJ+ zI~Dvq1>ddU=PP)hf{$15rxbjGf*)4!NeX^c!R50m6(3XZ3lx4+!DlG=Nd=#x-~$Ry z&lssB)?d_j+$j(vSHZua;4THfP{GG5_(ck?b)K+miNfcnx|!D9XB3*XDAYRl+xIC< zxE!4OBv=Ng?Z>Zksux#8Zy=`Ru|cG1j}uKpD|wjHzapB3RI-oL4-!p7DY=u=|3)+o zq2zW>-$yhJ+vHYGf0Jk$O34mRf1PLwLo&?i+li*3ldR|TO+?d>N!D;WLNpDPq?gmz z6HP-TS<2~kMAOhnx;cF<(KIBIE>5o?nubD>ae67yGz5|-{{~>%BBE(JOdjL(0-~wQ zCl7OaHqqmV?&I_&M4wIcPEKD)G<Espc1}+un!0*&E2qa1O<g?M!RaERscR?0oVF27 zT{>CM>Ay|^O<g%z!|C^lo=CKp)5nRXE}bmp^y@@ZS5CS){a-{=7f!l3eT3*KL^Do5 zL-bUlPku`Ef0Af7(Z@j7Xo-<pe!H32<nH`kp1BMh*Y0a7aZc?52V+|+zETk%tXSoX zpRQP0QMa;I+j3$za4j*Sb1+XkR8{QSh9?T`P*?F*E`TCjZ&R+fr>?7t-Nbybs(2LB z;)PGU(IvaiMb~iN9YKfF5^L=p3uigIA3^V`_ziW*{x!Y!>0b62bq6i}%BcsOUBAa= z?@R)zj|p%(`e*Ou1X4%gV62er<W+PY$g7_JuJdjeEchA@_!5&3awE%O<T2`o;O=D* zJ@r8Bo5Zd4)D@ql_1seI1-rlJeqJ$OqU@k~81d42jAAd9y3Ai9yjr4)$aYiPGDx>D z<DSQ$l4(6(acSL8JG-uglbufuo^<ZDb@w^%X+wCmo=1v1z+>JBb;;|1(72SA!=$?g zmMONS6s|NEmI)VXh^^Et5iWpCx!~-cfzimzAEOw0j_etS>Pa=`B~amcN9!CI)Z+Uj z>g@UkShe_G&pQ(8pFn<51xvN~AGMiDZQ%yf`OowqOhG6m3-wz3xY;axs`tm=Hm`G` z7&7%~9%)>t);8o7_etxyA<w+;6U4;wCiV@>+<X|i{e3i0%1~H4v=@<r%g4>XkN`Xg zlf4rd*(8LxDG`X0J?^=R1`sXTAMT;SQ0+ObC5&QMuO9*O^vRl=Z~t|0uy?H*w8D>J zGx0Ot7qo>x;~hL!l`KJko19at(1jnZtaR=<>5spg(z1p8j2}wTuaS<<19mCz?0T1C zo`|ttS`|q?EwRcpe*#`#{D77~x!3e1zE^mXc)MS8c6Fm8ciddgVs~lrSEZ17KU~%l z`I6pajhI&lYf6(r#0^e%K>@~<=;5Qao+pYs0AzOZeo`d5C^eVce45w2i*u_{?%IwD zrgwK2;;wY}+hj|b-O!p06Nz}cd!y;`-t-qLz+9qrwwss}egMC;gf)$5ms(Scws#;O zcRFX+7G8+Cgfkm4S!J!qUp$HiubbR<`QvYydI4kcgS~ffYvx#C1?AQVq{^6A0;P`h zEzBI24{GTwlG(DKraVvISJzZrSFxsIZH1m1H|AZaaaH^y^Bk^x?uV1yte4il(kt_3 z6zo|557e&EMBMRLRH&9iL%rospi@d{eyfNdOkRafn;!AY&;yl-e}_k0OfW8}NOlA1 zJqPy9=LX2=J~1wgVuw|KLJ@mJ#V9V~P&pt9Ne4QS2#GAZfa`eG>{h*d>LaeWf4AzI zu|#o;x4ej}xfwc=Yspc|8BAW5!?L>EiCs|2R|JWgfu5y<acv-exPO-rsh1+0^-f}) z#%Bj!J1<pbOq#YJv78GQMO*T>m=AKg{A*<#m<LJxr-YEpawr(Z&NL&8xOPIRVx)hU zLL}cFsx>Aor57GJ4bgQFwVG-=G)dLwOf+%)kohaHsJecJCTQA9>%iWfxJ~f#$Df+! zWqXM1r42n=4<?_1oaY^1{JFJS{B^DKjgvKXzC(RG!H7B4%d<lr<Iq30fboOfm!tEp z)Mm{<hFRI6#oydyTeFV_0AFZ+u%?%a29G(Xb`Z9kD9KMlH2$U*e^T3f{L5P2VePqr z7=#}uMy>mZ@xdAm!;#CsF62AD^w@YeW3jO-A!*)<7Fu(#@Bu)1r}pvtPv@7+abPf} zq+B>>W$#J6<pKZT;Wf#dA>y3Ml2I$K)e;wkA-?A@YQ{&R`Iir=-Dt3xf5zpW?bcv4 zUn4wm!J>5T-*WCl5HS0Q`zS>SqPsxsMoX}-?rYjQv_Qi8XceuDeDOa#POAx2<3la} zw^}X!F_uc(3Q%#HXlZ#9_ZQpguFe-h<|(ox=HSOmAxRUKLx2O|H$7;NE=+i9NeOj2 zQi8jImiS&lm~L=%H+UKg4BW<2^E=%coLzULnC?h45HPSU2b_Bj`+Hi7Ys`UDRB@M< zSYe*7a=8ng-P2VQa^XR~dac!6RdEf!$h1Bm)-8+oDi*Oc$gp?^x0s^}uNO)s7b{9I zh%=N_oJq;IQc502n5gc&uBwvwpUuC$k8zdw8uKswn%&t&&8Z#QhmM1lz#IMN^8Pk| zpXOY0)Yo~)PE3Z!m$-tjT0BRz#8swwf}Fr=#dx{8>y6lRk`MDj#S_#!V$W)w9}haa zt|Q$E8GRH~bRNv(xAtd91KMT6<HV1yg>~wLB6F935j?)G`lIKF>HGk!P*TsULEN-9 zhzrh%ih_wnW`tl&<X~1-LKn*(2x*E8+w&Sav{}t{+=69O9ESV|&5yY_R#j=XE)cdR zDYhmRnInWPFe|o5KY37-hW<gIXi$ZBtw2#-1s%`P?ER$s3%L22*S%-0m&uO31FN9B z)1u4+v;nP55uU6#J-NudnqWi|CegDH3eLPxFkhfBUr=NgbJ*fj;x<O;Kou1oAHgXy zI7QJhrN}%6IN6U$9rF`ktI^>5_#}COg&GKTm3B354JzO7bMQ?=6jZ*4a`5fW(ASxR z?@I9Pc|%T?)&y~g^UmSR6S&FabT%qO>t#`I^Az!jA!;7v0%8#nrY1E{y^HcK`)LNO zil0ndI+04;N2PFF3Zolx(cL5(f7NtTSm_xB0{t&Ph8AO~2P-EAkFvlxgs4ypgw09N zmfSsToiqqxg?~$mzaPeukzxG3>HHLR(qC`>n)uHf#{Y0S|5+*h3F7}G?Y_?kG(vRv z&roWO7o&J8uljEUgux=A2L*&~E1~ZYBpc#-GVpNvsv~J;(B+up)WyR<M!AM^HNeJv zC0I|;0{{>19vDvkoJecwhPj5%7^qA>6_uo$>ECPhr+<HkmNRHY^ZU4hY_brH7LpLi zWfDn_N3c@&GrF^Wf^CP+6S!Z6u;n#XoBy&gKUJ<J&enQvl8@i9X<Fw#J2+x*`x0L) z*189=Gj<JG=-lt3KDbYdzuRx_L1UpSEypwZX=B3UuuM<rW4n*s)<+d`nlC`4|B=Mf zfu0)&<B@^*GyQ)O>lJ>#-$QqL>#lG4HbYA+v1^x(ln*K0FN47!|454)#dfnCq46bJ zitWo11;u!(^Y^sdwYce@*{5|L8}R3uSTPynXlg?JMrd)Hc4!3MZhiAjXV(s{=}CV) z$)7W^t6+@L60^0qM>}NWH|5SJ?ONwv)cSbrHDAwaQmxHAs_ixNm-XCoz}0WJ)=~TH zzJy)t*_?{iDs6thFMc%k#!#qAP7Dq%i(gi}Ebqf*@mt_<yoDah_RmnpFG62M(L0A} z^AB%a;5&2>>-oW?gW4~CsQ*zB+$B7?^Bxnywa1_8?z^pr_hEAr%=B;3cp1vkq|a>n zPhb3?Z|_?L$ph3iD+YSPgYg>&`kxtEYFTzE&<mHij48D3iAbEM#a$v^T4x_Bzr>D( zBTLpo4RUq=$`^mWqR!tlt;6odI<05coycqbGvD;QsO|kUpH|Z5PmZInbiZRvk<ZKi z_;G*ybQSi%2G4m3ohuJPiyfzhwGt9#J$Py#VnchgVk<Te?@H7Zi{fqL0ylHdnzbR= z5-wtw7P0C`BouKka90M}+Cqk#UKYFQ<uM>OYgQ9pF5if}0dKgP@vi$aX1#UAo5tq0 z^$Qk6V|ei%0Rz25oHZ-b8f!yID_h`>wcXGb+SCSxMsujm%`UoVG3nyE7r3WIi-6!o z8n_!c1F4TL0JuD6;C-5VF?_K2iY~gS4mw+djc$7N9tGM+I}F&{77n@>SJc*Bz1Zik z1|nQ_SFWt9_0<sq-`vYr*75SM4mr8TZE3-~ok{r%>6<Cq*j=5(QX6@D|KQ*;<lB&! zB8=r&b=--3E#<&9ERP0|??zsZ`vFZHTaoWT{uJ^V=mNB&SdDx;@~4oS$SKKp7I`<V zV0rDXyh)=++PCJ7bP-OQtG~mI{t{voDLcRQZj1Q0nS+CF((Q09c8p)@JZqD^gME48 z!b|29Uq~uQ9>a!hMF%^VfC`7J)?w$it^=u|PK^TZwAt37h$qrbDetBlKlgNGK7D$T zZ~2Sw)lgQ9zjnxfWhCCgotWumiyW?dY>OP@@6KQ3aQ74}a?H4^(B~+<W5g0idFRNb zj>Dt!ZG}aS@+wDZg<}SIzzdm04toXdpHeEr-v{{Xz(a4G0F>Wlt8$FLBfrAo?ks?) z&0FMfS2)I_CPR{bu+pByQOMnhT=7Lq`2yiPMpTe5RgQYwF?doj%#$USClt3^pvR6j z6`%M4vFbe;cs=^c52)V0OGLe^sovEoThJ_QEpeR8%Rf~BYn8*SrRq)gPC)MtEF-3q zp(=-K4H|>mt^n=36F0vg;XKAewyG({8rjmQUdtTbmTa}+{lE`BN1=zhi<$-p^R6i` zJXXY(75=J-?aC`W;9x(o_2lhxuwNFG;`jZ6$sdhjj~1Sf>sJaVKRBAbJA#<LHIg&! zw2udI)V=}5e;73Z*Dn^~vFLus7XUzcAYm)qF`C_HEByHwc6WZ^vuCmI=NG;-nmw1l z3fB)86y7t2y;d*}*FP*QJmO#<6mGFW;C6fAJr4GU{Q)ALIg8}JJsOICJet&f=pf*B zCtd#|{UZYZh`?uxz$7ncb5FGLV<MNbG<8zS%;~k2iqp3tDox*lP?BSDyvxgJ`OUy1 zc+#UZf&bF?*7f{~UNBN}SsA_*w2~>x<$Xxpme_}X8w?T0Hk+3+d@*9B9V5K_0vkg- z9kzLSF7NX;!6@%D<9R7#{GKSTDZ@^Xl}hV*F6YYeqJZXUN|^e2N_{&p#>;7mZy#;Z z((ig+%J6->m85+v4S0Q+;K#C*)6x&z4mkbbSzaOe|C6P@W6v<RO0;W}$Ty1o4v~Lb z<PV7aQIQ`M`3oX{OXR0TJ_3siO6Q4uy2$5=e6h$^iM&bV8%2JH$iFS}2Som;$PbGA z1(Clc^3x(8F;3VQ`E-%b6ZvA1uM#==iVwOLxM$!?^_lLnlJb(d?%AcDxux@F&vDOK zffcJ3Ff4qQXWmT4N}`)vjleqOM#RcD$SZt37L0^h30C335_}If%LuF&)cUqq$+}oG zzNc?)WF<sx2t+rqlE%$#&}ZdXh4Ye)_=pe-?leM2StQsJAOUd|ZZTL1eSpeJ@TnSp z@x5X*%0i6+Bfv_68}z0~pf#v(XoRe!pcG%j1=|~fVMB-e_*Rzd&;yZ3V6&w|USHn; zLD&nlHa9>n1QYlv;dU(>>!MLs(h$PJzRjRWo;l@XHY&GyhcVIDZgpAld}d_D3sUoj zsw^L~$l<K~Bbc05()mZSY~wK>-^a;uoX$Ur$@w51$Lu8Ljdc7hCg;y|d^Gcl@sf^@ zVf9MClx6wM!Q{LvG0O9qll6)DI30JT&XuI&W7+D9y(+}7^YP_AE&DT5K07-#zpG{W zScS@YJsm%n(VLabl+Vt?`I>C_`Kj;!RGEB+S+z79{{-gtWWy(>zE@AzBkxK>DBTYW z*n6~{mzL;*8yg#nzr3HU6nLq)-!=-VR^)lO%k7v`&k=cl*a(XBY#03WOhxHO_#<52 zulch9u>Zgx>7jRSl;r(-Kd!U+`7Gxd9_N>Ulm75TFRq5td${EOWZ(rbM0n=9VIpwy zXM2V}^b#>IOS{(yJxtVBTBi3Gd0F~Tu6o_f`NuLdqaVgOKD@qUCtG~}h4T;ZKMw(C z@F%n1J|S?qjv+2e&vHG(`|VNSZmfPX`{AE}kF!M6eygzX$cC>5ej(!T6604sn|_`1 zk74wFm6BYy+$V5(ZjTq@=SLiu)KGp9xO<39T|J)zPv+3`aSq&xg-y14EzE&0%Yom@ z@!|7ZR}TK40?&3nr7s8nt2yx51=;=g=fF1!oL-Bo>3hK4Dj=?R=g{*h@UhCg4U#8% z9(IdsHJc9Hm8HMU0iG@XI&kV&TQBl*HGB^RSKEM(wM}63@?1^#=g_mC_%RR2@2%ze zofmWPpN|FmSeuj42Lx&=2A<8&N{)|V^Z@~-Apd2oHV6Mbz^PtL)Qj&aLFPe$XO5$n za_E^qGJAX?z+I?saYlS@1)k0S?*exVpVjOaIru*k`ge<YRmyw<JlnadOL0TVX7|<{ z_(9;=;`17C>OYzFGKC)63{=x7JlJHjJ2?klo&&GRf!~$`-<AV^gyUma$2iMR+N-1V z7{{}$uf*OljzVF-85`Us4dF09X7V`>#gv3``V0qN^=tie`Nn%Q7Q1axBOD2ZOJ<i5 zUWV)Sbm1u_i#p$J$GIGwznZr(*nrbPzS<~``P4?Mt84WY>zKa8zkE@JU)QnkuKRR; zqJ&Pt_{(*@ImDOq4G|-1#G0B)a6U#~TDd~^`)ccO2yW=Cn{Kq~4RqQq%H$z81=97# zkiNbpv@X!1H^PIc9*DIwEYrg+K_l2$GH*^9e9y*&GiPnh*79G+l0LGTS&tBnvDVhj z5K$<8^ev55*B7s-SXQl9UsXjX_PFQbTGV;@;>ESqI0#g+$X`tyRNIt_#&m$6{9}66 zHCI(E^Hnl>-mnhg@W%W<K2+uP^20pVsU2|)NMPcC51qOi-lCq_I0@DgXb9rWT*D1M z-pbV*tHR6csCxY1P(w7vqskQTvz-1*_uw;}exw6SnJ&`Nrwm0I2OHB5CZ_Y!ai>g; zXFm6sCSQW{OroX~tMrqEbT%uEo3|oA`IP}@9=96i5l*)KJtq&-%$0cNpq++Jdu19s z!#Tq=J*dsFD$_Bw45l+3Sp9s*_A(7DM=xt^ZqsAYAjT8flk-FR@mM`=Je}$AYlemF z!(cuR#Hu5KCLG^Vn)8N6100Z319i%@6Q!9(#F55K7%`S@f5wBISwgQjMnn3B01hnj zv#06DJ2UJI4O~7Q^R|_CLuWcrn#FF`cva07=W74Q86`c2=p!_Bgs|WAcOU;vt1}KN z=9u8Jwrs{chh3o5<aX82LFY6J7-l)nU4PC~z!}<c6qQ%ujE8hHWT<UqyPmN><vPWj zrjO4hbe#2bor+CUm7_7NgReop^_K0pvDd2{L*=J|hij#AXC3&?C`&sSi!yP@72V>@ zhtf0DAg1DM@9=_XnLM?eUS_R<_*k<RcYnt(?fB&3509-pmD{``=UM9io3SX1S6)+o z5&me2Ddpw9WSs!qC{vSsev_C77plD6zchq=v1Al^rhFA}e775t_P5!*oO7E1$~iUk zz68tov~(3HdN(5F58J$;81_YZ#*X57m=Ye3c$}m>c1(C$g<oQk@^asF2MG|4=RY-3 ztktv@1ikx^@^XLmLGi+7g<zIyB%jQ0Ksmijk#uSwR=kkfF67A_@>BXRV0IHOD5g?g z?$>S?^7VpI+L!XO{#`<Tk>E@1>xvg(^8FFXlgdo_yMa+mM7UD>zvEp1k>q<Zi?V!` z*ObTcr^<U>UV^b-%roYe6|+b@+gVBG_kvXA>&JOHL%n#YTXD9<FMp++r0>IpDlhk) zADL!x;IS($W%l3gC?$F6rre+I6Y?{tP)Ra=654?aRbKwTNb>(nYLSd0&$Rz&hP>SG zUVX78aIbVs$Y<K`%aE7*;wNTW0#6D#nPpUm9YKKue`1)W_Rr=2zmys8O#81u)~U$L z{Yq185N7gc(#Ii7K1us>KmO!rlz$hpa%|x;wQn!~e=OOsvVTkavR(s_QSHm~{R5Yi zwxRK2QA4!U^C=2c`P6%WQcJ>}5pXFd^ARXl<>fxDn<i$Y%>I(0j*SLDx@7$5EEc6= zA>W?Sf84xu=x403FWgNPNO{o=DO}o@`B;=l+bJeoTrKR={=AwpNM8307dd-)wk1Vp gvM5c#Um5;n+~oX6<2#%ElV3yuQg6ltkZJ$F0UGAUZU6uP literal 0 HcmV?d00001 diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py new file mode 100755 index 0000000..391a58b --- /dev/null +++ b/bin/mapped_2hic_fragments.py @@ -0,0 +1,837 @@ +#!/usr/bin/env python + +# HiC-Pro +# Copyleft 2015 Institut Curie +# Author(s): Nicolas Servant, Eric Viara +# Contact: nicolas.servant@curie.fr +# This software is distributed without any guarantee under the terms of the +# GNU General +# Public License, either Version 2, June 1991 or Version 3, June 2007. + +""" +Script to keep only valid 3C products - DE and SC are removed +Output is : readname / +""" + +import time +import getopt +import sys +import os +import re +import pysam +from bx.intervals.intersection import Intersecter, Interval + + +def usage(): + """Usage function""" + print "Usage : python mapped_2hic_fragments.py" + print "-f/--fragmentFile <Restriction fragment file GFF3>" + print "-r/--mappedReadsFile <BAM/SAM file of mapped reads>" + print "[-o/--outputDir] <Output directory. Default is current directory>" + print "[-s/--shortestInsertSize] <Shortest insert size of mapped reads to consider>" + print "[-l/--longestInsertSize] <Longest insert size of mapped reads to consider>" + print "[-t/--shortestFragmentLength] <Shortest restriction fragment length to consider>" + print "[-m/--longestFragmentLength] <Longest restriction fragment length to consider>" + print "[-d/--minCisDist] <Minimum distance between intrachromosomal contact to consider>" + print "[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>" + print "[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>" + print "[-S/--sam] <Output an additional SAM file with flag 'CT' for pairs classification>" + print "[-v/--verbose] <Verbose>" + print "[-h/--help] <Help>" + return + + +def get_args(): + """Get argument""" + try: + opts, args = getopt.getopt( + sys.argv[1:], + "f:r:o:s:l:t:m:d:g:Svah", + ["fragmentFile=", + "mappedReadsFile=", + "outputDir=", + "minInsertSize=", "maxInsertSize", + "minFragSize", "maxFragSize", + "minDist", + "gatg", "samOut", "verbose", "all", "help"]) + except getopt.GetoptError: + usage() + sys.exit(-1) + return opts + + +def timing(function, *args): + """ + Run a fonction and eturn the run time and the result of the function + If the function requires arguments, those can be passed in + """ + startTime = time.time() + result = function(*args) + print '%s function took %0.3f ms' % (function.func_name, (time.time() - startTime) * 1000) + return result + + +def get_read_strand(read): + """ + Conversion of read position to naive strand representation + + Parameters + ---------- + read : list + list of aligned reads + """ + strand = "+" + if read.is_reverse: + strand = "-" + return strand + + +def isIntraChrom(read1, read2): + """ + Return true is the reads pair is intrachromosomal + + read1 : [AlignedRead] + read2 : [AlignedRead] + + """ + if read1.tid == read2.tid: + return True + else: + return False + + +def get_cis_dist(read1, read2): + """ + Calculte the contact distance between two intrachromosomal reads + + read1 : [AlignedRead] + read2 : [AlignedRead] + + """ + # Get oriented reads + ##r1, r2 = get_ordered_reads(read1, read2) + dist = None + if not read1.is_unmapped and not read2.is_unmapped: + ## Contact distances can be calculated for intrachromosomal reads only + if isIntraChrom(read1, read2): + r1pos = get_read_pos(read1) + r2pos = get_read_pos(read2) + dist = abs(r1pos - r2pos) + return dist + + +def get_read_pos(read, st="start"): + """ + Return the read position (zero-based) used for the intersection with + the restriction fragment + + The 5' end is not a good choice for the reverse reads (which contain part + of the restriction site, and thus overlap the next restriction fragment) + Using the left-most position (ie. start, 5' for forward, 3' for reverse) or the + middle of the read should work but the middle of the reads might be more + safe + + Parameters + ----------- + read : list + list of aligned reads + """ + + if st == "middle": + pos = read.pos + int(read.alen/2) + elif st =="start": + pos = get_read_start(read) + elif st == "left": + pos = read.pos + + return pos + + +def get_read_start(read): + """ + Return the 5' end of the read + """ + if read.is_reverse: + pos = read.pos + read.alen -1 + else: + pos = read.pos + return pos + +def get_ordered_reads(read1, read2): + """ + Reorient reads + + The sequencing is usually not oriented. Reorient the reads so that r1 is + always before r2. + Sequencing is always performed from 5' to 3' end + So in unstranded case, we can have + + 1 2 + ---> ---> + ========== or ========= + <---- <--- + 2 1 + + Reordering the reads allow to always be in the first case + read1 = [AlignedRead] + read2 = [AlignedRead] + """ + if read1.tid == read2.tid: + if get_read_pos(read1) < get_read_pos(read2): + r1 = read1 + r2 = read2 + else: + r1 = read2 + r2 = read1 + else: + if read1.tid < read2.tid: + r1 = read1 + r2 = read2 + else: + r1 = read2 + r2 = read1 + + return r1, r2 + +def load_restriction_fragment(in_file, minfragsize=None, maxfragsize=None, verbose=False): + """ + Read a BED file and store the intervals in a tree + + Intervals are zero-based objects. The output object is a hash table with + one search tree per chromosome + + in_file = input file [character] + verbose = verbose mode [logical] + + """ + resFrag = {} + if verbose: + print "## Loading Restriction File Intervals '", in_file, "'..." + + bed_handle = open(in_file) + nline = 0 + nfilt = 0 + for line in bed_handle: + nline +=1 + bedtab = line.split("\t") + try: + chromosome, start, end, name = bedtab[:4] + except ValueError: + print "Warning : wrong input format in line", nline,". Not a BED file !?" + continue + + # BED files are zero-based as Intervals objects + start = int(start) # + 1 + end = int(end) + fragl = abs(end - start) + name = name.strip() + + ## Discard fragments outside the size range + filt=False + if minfragsize != None and int(fragl) < int(minfragsize): + nfilt+=1 + filt=True + elif maxfragsize != None and int(fragl) > int(maxfragsize): + nfilt+=1 + filt=True + + if chromosome in resFrag: + tree = resFrag[chromosome] + tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt})) + else: + tree = Intersecter() + tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt})) + resFrag[chromosome] = tree + + if nfilt > 0: + print "Warning : ", nfilt ,"fragment(s) outside of range and discarded. ", nline - nfilt, " remaining." + + bed_handle.close() + return resFrag + + +def get_overlapping_restriction_fragment(resFrag, chrom, read): + """ + Intersect a given read with the set of restriction fragments + + ## + resFrag = the restriction fragments [hash] + chrom = the chromosome to look at [character] + read = the read to intersect [AlignedRead] + + """ + # Get read position (middle or 5' end) + pos = get_read_pos(read, st="middle") + + if chrom in resFrag: + # Overlap with the position of the read (zero-based) + resfrag = resFrag[chrom].find(pos, pos+1) + if len(resfrag) > 1: + print "Warning : ", len(resfrag), " restriction fragments found for ", read.qname, "- skipped" + return None + elif len(resfrag) == 0: + print "Warning - no restriction fragments for ", read.qname ," at ", chrom, ":", pos + return None + else: + return resfrag[0] + else: + print "Warning - no restriction fragments for ", read.qname," at ", chrom, ":", pos + return None + + +def are_contiguous_fragments(frag1, frag2, chr1, chr2): + ''' + Compare fragment positions to check if they are contiguous + ''' + ret = False + if chr1 == chr2: + if int(frag1.start) < int(frag2.start): + d = int(frag2.start) - int(frag1.end) + else: + d = int(frag1.start) - int(frag2.end) + + if d == 0: + ret = True + + return ret + +def is_religation(read1, read2, frag1, frag2): + """ + Reads are expected to map adjacent fragments + Check the orientation of reads -><- + + """ + ret=False + if are_contiguous_fragments(frag1, frag2, read1.tid, read2.tid): + #r1, r2 = get_ordered_reads(read1, read2) + #if get_read_strand(r1) == "+" and get_read_strand(r2) == "-": + ret=True + return ret + + +def is_self_circle(read1, read2): + """ + Both reads are expected to be on the same restriction fragments + Check the orientation of reads <--> + + read1 : [AlignedRead] + read2 : [AlignedRead] + """ + ret = False + # Get oriented reads + r1, r2 = get_ordered_reads(read1, read2) + # 1<- ->2 or 2<- ->1 + if get_read_strand(r1) == "-" and get_read_strand(r2) == "+": + ret = True + return ret + + +def is_dangling_end(read1, read2): + """ + Both reads are expected to be on the same restriction fragments + Check the orientation of reads -><- + + read1 : [AlignedRead] + read2 : [AlignedRead] + """ + ret = False + # Get oriented reads + r1, r2 = get_ordered_reads(read1, read2) + # 1-> <-2 or 2-> <-1 + if get_read_strand(r1) == "+" and get_read_strand(r2) == "-": + ret = True + return ret + + +def get_valid_orientation(read1, read2): + """ + Both reads are expected to be on the different restriction fragments + Check the orientation of reads ->-> / <-<- / -><- / <--> + + read1 : [AlignedRead] + read2 : [AlignedRead] + + """ + # Get oriented reads + r1, r2 = get_ordered_reads(read1, read2) + + direction = None + if get_read_strand(r1) == "+" and get_read_strand(r2) == "+": + direction = "FF" + elif get_read_strand(r1) == "-" and get_read_strand(r2) == "-": + direction = "RR" + elif get_read_strand(r1) == "+" and get_read_strand(r2) == "-": + direction = "FR" + elif get_read_strand(r1) == "-" and get_read_strand(r2) == "+": + direction = "RF" + + return direction + + +def get_PE_fragment_size(read1, read2, resFrag1, resFrag2, interactionType): + """ + Calculte the size of the DNA fragment library + + read1 : [AlignedRead] + read2 : [AlignedRead] + resfrag1 = restrictin fragment overlapping the R1 read [interval] + resfrag1 = restrictin fragment overlapping the R1 read [interval] + interactionType : Type of interaction from get_interaction_type() [str] + + """ + + fragmentsize = None + + # Get oriented reads + r1, r2 = get_ordered_reads(read1, read2) + if not r1.is_unmapped and not r2.is_unmapped: + if r1 == read2: + rfrag1 = resFrag2 + rfrag2 = resFrag1 + else: + rfrag1 = resFrag1 + rfrag2 = resFrag2 + + ## In this case use the read start ! + r1pos = get_read_start(r1) + r2pos = get_read_start(r2) + + if interactionType == "DE" or interactionType == "RE": + fragmentsize = r2pos - r1pos + elif interactionType == "SC": + fragmentsize = (r1pos - rfrag1.start) + (rfrag2.end - r2pos) + elif interactionType == "VI": + if get_read_strand(r1) == "+": + dr1 = rfrag1.end - r1pos + else: + dr1 = r1pos - rfrag1.start + if get_read_strand(r2) == "+": + dr2 = rfrag2.end - r2pos + else: + dr2 = r2pos - rfrag2.start + fragmentsize = dr2 + dr1 + + return fragmentsize + + +def get_interaction_type(read1, read1_chrom, resfrag1, read2, + read2_chrom, resfrag2, verbose): + """ + Returns the interaction type + + For a given reads pair and their related restriction fragment, classify + the 3C products as : + + - Interaction + - Self circle + - Dangling end + - Religation + - Unknown + + ## + read1 = the R1 read of the pair [AlignedRead] + read1_chrom = the chromosome of R1 read [character] + resfrag1 = restrictin fragment overlapping the R1 read [interval] + read2 = the R2 read of the pair [AlignedRead] + read2_chrom = the chromosome of R2 read [character] + resfrag2 = restrictin fragment overlapping the R2 read [interval] + verbose = verbose mode [logical] + + """ + + # If returned InteractionType=None -> Same restriction fragment + # and same strand = Dump + interactionType = None + + if not read1.is_unmapped and not read2.is_unmapped and resfrag1 is not None and resfrag2 is not None: + # same restriction fragment + if resfrag1 == resfrag2: + # Self_circle <- -> + if is_self_circle(read1, read2): + interactionType = "SC" + # Dangling_end -> <- + elif is_dangling_end(read1, read2): + interactionType = "DE" + elif is_religation(read1, read2, resfrag1, resfrag2): + interactionType = "RE" + else: + interactionType = "VI" + elif r1.is_unmapped or r2.is_unmapped: + interactionType = "SI" + + return interactionType + + +def get_read_tag(read, tag): + for t in read.tags: + if t[0] == tag: + return t[1] + return None + + +if __name__ == "__main__": + # Read command line arguments + opts = get_args() + samOut = False + verbose = False + allOutput = False + minInsertSize = None + maxInsertSize = None + minFragSize = None + maxFragSize = None + minDist = None + outputDir = "." + gtag = None + + if len(opts) == 0: + usage() + sys.exit() + + for opt, arg in opts: + if opt in ("-h", "--help"): + usage() + sys.exit() + elif opt in ("-f", "--fragmentFile"): + fragmentFile = arg + elif opt in ("-r", "--mappedReadsFile"): + mappedReadsFile = arg + elif opt in ("-o", "--outputDir"): + outputDir = arg + elif opt in ("-s", "--shortestInsertSize"): + minInsertSize = arg + elif opt in ("-l", "--longestInsertSize"): + maxInsertSize = arg + elif opt in ("-t", "--shortestFragmentLength"): + minFragSize = arg + elif opt in ("-m", "--longestFragmentLength"): + maxFragSize = arg + elif opt in ("-d", "--minCisDist"): + minDist = arg + elif opt in ("-g", "--gtag"): + gtag = arg + elif opt in ("-a", "--all"): + allOutput = True + elif opt in ("-S", "--sam"): + samOut = True + elif opt in ("-v", "--verbose"): + verbose = True + else: + assert False, "unhandled option" + + # Verbose mode + if verbose: + print "## overlapMapped2HiCFragments.py" + print "## mappedReadsFile=", mappedReadsFile + print "## fragmentFile=", fragmentFile + print "## minInsertSize=", minInsertSize + print "## maxInsertSize=", maxInsertSize + print "## minFragSize=", minFragSize + print "## maxFragSize=", maxFragSize + print "## allOuput=", allOutput + print "## SAM ouput=", samOut + print "## verbose=", verbose, "\n" + + # Initialize variables + reads_counter = 0 + de_counter = 0 + re_counter = 0 + sc_counter = 0 + valid_counter = 0 + valid_counter_FF = 0 + valid_counter_RR = 0 + valid_counter_FR = 0 + valid_counter_RF = 0 + single_counter = 0 + dump_counter = 0 + filt_counter = 0 + + ## AS counter + G1G1_ascounter = 0 + G2G2_ascounter = 0 + G1U_ascounter = 0 + UG1_ascounter = 0 + G2U_ascounter = 0 + UG2_ascounter = 0 + G1G2_ascounter = 0 + G2G1_ascounter = 0 + UU_ascounter = 0 + CF_ascounter = 0 + + baseReadsFile = os.path.basename(mappedReadsFile) + baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile) + + # Open handlers for output files + handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w') + + if allOutput: + handle_de = open(outputDir + '/' + baseReadsFile + '.DEPairs', 'w') + handle_re = open(outputDir + '/' + baseReadsFile + '.REPairs', 'w') + handle_sc = open(outputDir + '/' + baseReadsFile + '.SCPairs', 'w') + handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w') + handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs', 'w') + handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs', 'w') + + # Read the BED file + resFrag = timing(load_restriction_fragment, fragmentFile, minFragSize, maxFragSize, verbose) + + # Read the SAM/BAM file + if verbose: + print "## Opening SAM/BAM file '", mappedReadsFile, "'..." + samfile = pysam.Samfile(mappedReadsFile, "rb") + + if samOut: + handle_sam = pysam.AlignmentFile(outputDir + '/' + baseReadsFile + '_interaction.bam', "wb", template=samfile) + + # Reads are 0-based too (for both SAM and BAM format) + # Loop on all reads + if verbose: + print "## Classifying Interactions ..." + + for read in samfile.fetch(until_eof=True): + reads_counter += 1 + cur_handler = None + htag = "" + + # First mate + if read.is_read1: + r1 = read + if not r1.is_unmapped: + r1_chrom = samfile.getrname(r1.tid) + r1_resfrag = get_overlapping_restriction_fragment(resFrag, r1_chrom, r1) + else: + r1_resfrag = None + r1_chrom = None + + # Second mate + elif read.is_read2: + r2 = read + if not r2.is_unmapped: + r2_chrom = samfile.getrname(r2.tid) + r2_resfrag = get_overlapping_restriction_fragment(resFrag, r2_chrom, r2) + else: + r2_resfrag = None + r2_chrom = None + + if r1_resfrag is not None or r2_resfrag is not None: + interactionType = get_interaction_type(r1, r1_chrom, r1_resfrag, r2, r2_chrom, r2_resfrag, verbose) + dist = get_PE_fragment_size(r1, r2, r1_resfrag, r2_resfrag, interactionType) + cdist = get_cis_dist(r1, r2) + + ## Filter based on restriction fragments + if (r1_resfrag is not None and r1_resfrag.value['filter'] == True) or (r2_resfrag is not None and r2_resfrag.value['filter']) == True: + interactionType = "FILT" + + # Check Insert size criteria - FILT + if (minInsertSize is not None and dist is not None and + dist < int(minInsertSize)) or \ + (maxInsertSize is not None and dist is not None and dist > int(maxInsertSize)): + interactionType = "FILT" + + # Check Distance criteria - FILT + # Done for VI otherwise this criteria will overwrite all other invalid classification + if (interactionType == "VI" and minDist is not None and cdist is not None and cdist < int(minDist)): + interactionType = "FILT" + + if interactionType == "VI": + valid_counter += 1 + cur_handler = handle_valid + validType = get_valid_orientation(r1, r2) + if validType == "RR": + valid_counter_RR += 1 + elif validType == "FF": + valid_counter_FF += 1 + elif validType == "FR": + valid_counter_FR += 1 + elif validType == "RF": + valid_counter_RF += 1 + + ## Counts valid pairs based on XA tag + if gtag is not None: + r1as = get_read_tag(r1, gtag) + r2as = get_read_tag(r2, gtag) + if r1as == 1 and r2as == 1: + G1G1_ascounter += 1 + elif r1as == 2 and r2as == 2: + G2G2_ascounter += 1 + elif r1as == 1 and r2as == 0: + G1U_ascounter += 1 + elif r1as == 0 and r2as == 1: + UG1_ascounter += 1 + elif r1as == 2 and r2as == 0: + G2U_ascounter += 1 + elif r1as == 0 and r2as == 2: + UG2_ascounter += 1 + elif r1as == 1 and r2as == 2: + G1G2_ascounter += 1 + elif r1as == 2 and r2as == 1: + G2G1_ascounter += 1 + elif r1as == 3 or r2as == 3: + CF_ascounter += 1 + else: + UU_ascounter += 1 + + elif interactionType == "DE": + de_counter += 1 + cur_handler = handle_de if allOutput else None + + elif interactionType == "RE": + re_counter += 1 + cur_handler = handle_re if allOutput else None + + elif interactionType == "SC": + sc_counter += 1 + cur_handler = handle_sc if allOutput else None + + elif interactionType == "SI": + single_counter += 1 + cur_handler = handle_single if allOutput else None + + elif interactionType == "FILT": + filt_counter += 1 + cur_handler = handle_filt if allOutput else None + + else: + interactionType = "DUMP" + dump_counter += 1 + cur_handler = handle_dump if allOutput else None + else: + interactionType = "DUMP" + dump_counter += 1 + cur_handler = handle_dump if allOutput else None + dist = None + + ## Write results in right handler + if cur_handler is not None: + if not r1.is_unmapped and not r2.is_unmapped: + ##reorient reads to ease duplicates removal + or1, or2 = get_ordered_reads(r1, r2) + or1_chrom = samfile.getrname(or1.tid) + or2_chrom = samfile.getrname(or2.tid) + + ##reset as tag now that the reads are oriented + r1as = get_read_tag(or1, gtag) + r2as = get_read_tag(or2, gtag) + if gtag is not None: + htag = str(r1as)+"-"+str(r2as) + + ##get fragment name and reorient if necessary + if or1 == r1 and or2 == r2: + or1_resfrag = r1_resfrag + or2_resfrag = r2_resfrag + elif or1 == r2 and or2 == r1: + or1_resfrag = r2_resfrag + or2_resfrag = r1_resfrag + + if or1_resfrag is not None: + or1_fragname = or1_resfrag.value['name'] + + if or2_resfrag is not None: + or2_fragname = or2_resfrag.value['name'] + + cur_handler.write( + or1.qname + "\t" + + or1_chrom + "\t" + + str(get_read_pos(or1)+1) + "\t" + + str(get_read_strand(or1)) + "\t" + + or2_chrom + "\t" + + str(get_read_pos(or2)+1) + "\t" + + str(get_read_strand(or2)) + "\t" + + str(dist) + "\t" + + or1_fragname + "\t" + + or2_fragname + "\t" + + str(or1.mapping_quality) + "\t" + + str(or2.mapping_quality) + "\t" + + str(htag) + "\n") + + elif r2.is_unmapped and not r1.is_unmapped: + if r1_resfrag is not None: + r1_fragname = r1_resfrag.value['name'] + + cur_handler.write( + r1.qname + "\t" + + r1_chrom + "\t" + + str(get_read_pos(r1)+1) + "\t" + + str(get_read_strand(r1)) + "\t" + + "*" + "\t" + + "*" + "\t" + + "*" + "\t" + + "*" + "\t" + + r1_fragname + "\t" + + "*" + "\t" + + str(r1.mapping_quality) + "\t" + + "*" + "\n") + elif r1.is_unmapped and not r2.is_unmapped: + if r2_resfrag is not None: + r2_fragname = r2_resfrag.value['name'] + + cur_handler.write( + r2.qname + "\t" + + "*" + "\t" + + "*" + "\t" + + "*" + "\t" + + r2_chrom + "\t" + + str(get_read_pos(r2)+1) + "\t" + + str(get_read_strand(r2)) + "\t" + + "*" + "\t" + + "*" + "\t" + + r2_fragname + "\t" + + "*" + "\t" + + str(r2.mapping_quality) + "\n") + + ## Keep initial order + if samOut: + r1.tags = r1.tags + [('CT', str(interactionType))] + r2.tags = r2.tags + [('CT', str(interactionType))] + handle_sam.write(r1) + handle_sam.write(r2) + + if (reads_counter % 100000 == 0 and verbose): + print "##", reads_counter + + # Close handler + handle_valid.close() + if allOutput: + handle_de.close() + handle_re.close() + handle_sc.close() + handle_dump.close() + handle_single.close() + handle_filt.close() + + + # Write stats file + handle_stat = open(outputDir + '/' + baseReadsFile + '.RSstat', 'w') + handle_stat.write("## Hi-C processing\n") + handle_stat.write("Valid_interaction_pairs\t" + str(valid_counter) + "\n") + handle_stat.write( + "Valid_interaction_pairs_FF\t" + str(valid_counter_FF) + "\n") + handle_stat.write( + "Valid_interaction_pairs_RR\t" + str(valid_counter_RR) + "\n") + handle_stat.write( + "Valid_interaction_pairs_RF\t" + str(valid_counter_RF) + "\n") + handle_stat.write( + "Valid_interaction_pairs_FR\t" + str(valid_counter_FR) + "\n") + handle_stat.write("Dangling_end_pairs\t" + str(de_counter) + "\n") + handle_stat.write("Religation_pairs\t" + str(re_counter) + "\n") + handle_stat.write("Self_Cycle_pairs\t" + str(sc_counter) + "\n") + handle_stat.write("Single-end_pairs\t" + str(single_counter) + "\n") + handle_stat.write("Filtered_pairs\t" + str(filt_counter) + "\n") + handle_stat.write("Dumped_pairs\t" + str(dump_counter) + "\n") + + ## Write AS report + if gtag is not None: + handle_stat.write("## ======================================\n") + handle_stat.write("## Allele specific information\n") + handle_stat.write("Valid_pairs_from_ref_genome_(1-1)\t" + str(G1G1_ascounter) + "\n") + handle_stat.write("Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + str(UG1_ascounter+G1U_ascounter) + "\n") + handle_stat.write("Valid_pairs_from_alt_genome_(2-2)\t" + str(G2G2_ascounter) + "\n") + handle_stat.write("Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + str(UG2_ascounter+G2U_ascounter) + "\n") + handle_stat.write("Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter+G2G1_ascounter) + "\n") + handle_stat.write("Valid_pairs_with_both_unassigned_mated_(0-0)\t" + str(UU_ascounter) + "\n") + handle_stat.write("Valid_pairs_with_at_least_one_conflicting_mate_(3-)\t" + str(CF_ascounter) + "\n") + + handle_stat.close() + + if samOut: + samfile.close() + diff --git a/bin/mergeSAM.py b/bin/mergeSAM.py new file mode 100755 index 0000000..fdf0c67 --- /dev/null +++ b/bin/mergeSAM.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python + +## HiC-Pro +## Copyright (c) 2015 Institut Curie +## Author(s): Nicolas Servant, Eric Viara +## Contact: nicolas.servant@curie.fr +## This software is distributed without any guarantee under the terms of the BSD-3 licence. +## See the LICENCE file for details + + +""" +Script to pair 2 SAM/BAM files into one PE BAM +- On 03/05/16 Ferhat made changes starting from ~/bin/HiC-Pro_2.7.2b/scripts/mergeSAM.py +to make singletons possible to be reported +""" + +import getopt +import sys +import os +import re +import pysam +from itertools import izip + +def usage(): + """Usage function""" + print "Usage : python mergeSAM.py" + print "-f/--forward <forward read mapped file>" + print "-r/--reverse <reverse read mapped file>" + print "[-o/--output] <Output file. Default is stdin>" + print "[-s/--single] <report singleton>" + print "[-m/--multi] <report multiple hits>" + print "[-q/--qual] <minimum reads mapping quality>" + print "[-t/--stat] <generate a stat file>" + print "[-v/--verbose] <Verbose>" + print "[-h/--help] <Help>" + return + + +def get_args(): + """Get argument""" + try: + opts, args = getopt.getopt( + sys.argv[1:], + "f:r:o:q:smtvh", + ["forward=", + "reverse=", + "output=", "qual=", + "single", "multi", "stat", "verbose", "help"]) + except getopt.GetoptError: + usage() + sys.exit(-1) + return opts + + +def is_unique_bowtie2(read): + ret = False + if not read.is_unmapped and read.has_tag('AS'): + if read.has_tag('XS'): + primary = read.get_tag('AS') + secondary = read.get_tag('XS') + if (primary > secondary): + ret = True + else: + ret = True + + return ret + +## Remove everything after "/" or " " in read's name +def get_read_name(read): + name = read.qname + #return name.split("/",1)[0] + return re.split('/| ', name)[0] + +def sam_flag(read1, read2, hr1, hr2): + + f1 = read1.flag + f2 = read2.flag + + if r1.is_unmapped == False: + r1_chrom = hr1.getrname(r1.tid) + else: + r1_chrom="*" + if r2.is_unmapped == False: + r2_chrom = hr2.getrname(r2.tid) + else: + r2_chrom="*" + + + ##Relevant bitwise flags (flag in an 11-bit binary number) + ##1 The read is one of a pair + ##2 The alignment is one end of a proper paired-end alignment + ##4 The read has no reported alignments + ##8 The read is one of a pair and has no reported alignments + ##16 The alignment is to the reverse reference strand + ##32 The other mate in the paired-end alignment is aligned to the reverse reference strand + ##64 The read is the first (#1) mate in a pair + ##128 The read is the second (#2) mate in a pair + + ##The reads were mapped as single-end data, so should expect flags of + ##0 (map to the '+' strand) or 16 (map to the '-' strand) + ##Output example: a paired-end read that aligns to the reverse strand + ##and is the first mate in the pair will have flag 83 (= 64 + 16 + 2 + 1) + + if f1 & 0x4: + f1 = f1 | 0x8 + + if f2 & 0x4: + f2 = f2 | 0x8 + + if (not (f1 & 0x4) and not (f2 & 0x4)): + ##The flag should now indicate this is paired-end data + f1 = f1 | 0x1 + f1 = f1 | 0x2 + f2 = f2 | 0x1 + f2 = f2 | 0x2 + + + ##Indicate if the pair is on the reverse strand + if f1 & 0x10: + f2 = f2 | 0x20 + + if f2 & 0x10: + f1 = f1 | 0x20 + + ##Is this first or the second pair? + f1 = f1 | 0x40 + f2 = f2 | 0x80 + + ##Insert the modified bitwise flags into the reads + read1.flag = f1 + read2.flag = f2 + + ##Determine the RNEXT and PNEXT values (i.e. the positional values of a read's pair) + #RNEXT + if r1_chrom == r2_chrom: + read1.rnext = r1.tid + read2.rnext = r1.tid + else: + read1.rnext = r2.tid + read2.rnext = r1.tid + + #PNEXT + read1.pnext = read2.pos + read2.pnext = read1.pos + + return(read1, read2) + + + +if __name__ == "__main__": + ## Read command line arguments + opts = get_args() + inputFile = None + outputFile = None + mapq = None + report_single = False + report_multi = False + verbose = False + stat = False + output = "-" + + if len(opts) == 0: + usage() + sys.exit() + + for opt, arg in opts: + if opt in ("-h", "--help"): + usage() + sys.exit() + elif opt in ("-f", "--forward"): + R1file = arg + elif opt in ("-r", "--reverse"): + R2file = arg + elif opt in ("-o", "--output"): + output = arg + elif opt in ("-q", "--qual"): + mapq = arg + elif opt in ("-s", "--single"): + report_single = True + elif opt in ("-m", "--multi"): + report_multi = True + elif opt in ("-t", "--stat"): + stat = True + elif opt in ("-v", "--verbose"): + verbose = True + else: + assert False, "unhandled option" + + ## Verbose mode + if verbose: + print "## mergeBAM.py" + print "## forward=", R1file + print "## reverse=", R2file + print "## output=", output + print "## min mapq=", mapq + print "## report_single=", report_single + print "## report_multi=", report_multi + print "## verbose=", verbose + + ## Initialize variables + tot_pairs_counter = 0 + multi_pairs_counter = 0 + uniq_pairs_counter = 0 + unmapped_pairs_counter = 0 + lowq_pairs_counter = 0 + multi_singles_counter = 0 + uniq_singles_counter = 0 + lowq_singles_counter = 0 + + #local_counter = 0 + paired_reads_counter = 0 + singleton_counter = 0 + reads_counter = 0 + r1 = None + r2 = None + + ## Reads are 0-based too (for both SAM and BAM format) + ## Loop on all reads + if verbose: + print "## Merging forward and reverse tags ..." + + with pysam.Samfile(R1file, "rb") as hr1, pysam.Samfile(R2file, "rb") as hr2: + if output == "-": + outfile = pysam.AlignmentFile(output, "w", template=hr1) + else: + outfile = pysam.AlignmentFile(output, "wb", template=hr1) + for r1, r2 in izip(hr1.fetch(until_eof=True), hr2.fetch(until_eof=True)): + reads_counter +=1 + + #print r1 + #print r2 + #print hr1.getrname(r1.tid) + #print hr2.getrname(r2.tid) + + if (reads_counter % 1000000 == 0 and verbose): + print "##", reads_counter + + if get_read_name(r1) == get_read_name(r2): + + ## both unmapped + if r1.is_unmapped == True and r2.is_unmapped == True: + unmapped_pairs_counter += 1 + continue + + ## both mapped + elif r1.is_unmapped == False and r2.is_unmapped == False: + ## quality + if mapq != None and (r1.mapping_quality < int(mapq) or r2.mapping_quality < int(mapq)): + lowq_pairs_counter += 1 + continue + + ## Unique mapping + if is_unique_bowtie2(r1) == True and is_unique_bowtie2(r2) == True: + uniq_pairs_counter += 1 + else: + multi_pairs_counter += 1 + if report_multi == False: + continue + # one end mapped, other is not + else: + singleton_counter += 1 + if report_single == False: + continue + if r1.is_unmapped == False: ## first end is mapped, second is not + ## quality + if mapq != None and (r1.mapping_quality < int(mapq)): + lowq_singles_counter += 1 + continue + ## Unique mapping + if is_unique_bowtie2(r1) == True: + uniq_singles_counter += 1 + else: + multi_singles_counter += 1 + if report_multi == False: + continue + else: ## second end is mapped, first is not + ## quality + if mapq != None and (r2.mapping_quality < int(mapq)): + lowq_singles_counter += 1 + continue + ## Unique mapping + if is_unique_bowtie2(r2) == True: + uniq_singles_counter += 1 + else: + multi_singles_counter += 1 + if report_multi == False: + continue + + tot_pairs_counter += 1 + (r1, r2) = sam_flag(r1,r2, hr1, hr2) + + #print hr1.getrname(r1.tid) + #print hr2.getrname(r2.tid) + #print r1 + #print r2 + ## Write output + outfile.write(r1) + outfile.write(r2) + + else: + print "Forward and reverse reads not paired. Check that BAM files have the same read names and are sorted." + sys.exit(1) + + if stat: + if output == '-': + statfile = "pairing.stat" + else: + statfile = re.sub('\.bam$', '.pairstat', output) + handle_stat = open(statfile, 'w') + + handle_stat.write("Total_pairs_processed\t" + str(reads_counter) + "\t" + str(round(float(reads_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Unmapped_pairs\t" + str(unmapped_pairs_counter) + "\t" + str(round(float(unmapped_pairs_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Low_qual_pairs\t" + str(lowq_pairs_counter) + "\t" + str(round(float(lowq_pairs_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Unique_paired_alignments\t" + str(uniq_pairs_counter) + "\t" + str(round(float(uniq_pairs_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Multiple_pairs_alignments\t" + str(multi_pairs_counter) + "\t" + str(round(float(multi_pairs_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Pairs_with_singleton\t" + str(singleton_counter) + "\t" + str(round(float(singleton_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Low_qual_singleton\t" + str(lowq_singles_counter) + "\t" + str(round(float(lowq_singles_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Unique_singleton_alignments\t" + str(uniq_singles_counter) + "\t" + str(round(float(uniq_singles_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Multiple_singleton_alignments\t" + str(multi_singles_counter) + "\t" + str(round(float(multi_singles_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.write("Reported_pairs\t" + str(tot_pairs_counter) + "\t" + str(round(float(tot_pairs_counter)/float(reads_counter)*100,3)) + "\n") + handle_stat.close() + + hr1.close() + hr2.close() + outfile.close() + diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 0e98e07..4a1747d 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -7,14 +7,10 @@ import re regexes = { 'nf-core/hic': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], } results = OrderedDict() results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' -results['FastQC'] = '<span style="color:#999999;\">N/A</span>' -results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' # Search each file using its regex for k, v in regexes.items(): diff --git a/bin/src/build_matrix.cpp b/bin/src/build_matrix.cpp new file mode 100644 index 0000000..e366d5b --- /dev/null +++ b/bin/src/build_matrix.cpp @@ -0,0 +1,1037 @@ +// HiC-Pro +// Copyright 2015 Institut Curie +// Author(s): Eric Viara +// Contact: nicolas.servant@curie.fr +// This software is distributed without any guarantee under the terms of the BSD-3 License + +#include <iostream> +#include <iomanip> +#include <fstream> +#include <sstream> +#include <unordered_map> +#include <map> +#include <vector> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <math.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/stat.h> + + +static const int SPARSE_FMT = 0x1; +static const int BED_FMT = 0x2; +static const char* prog; +static bool progress = false; +static bool detail_progress = false; +static bool quiet = false; + +static bool NO_DICHO = getenv("NO_DICHO") != NULL; + +typedef unsigned int chrsize_t; + +const std::string VERSION = "1.2 [2015-10-20]"; + +const static chrsize_t BIN_NOT_FOUND = (chrsize_t)-1; + +class AxisChromosome; + +static bool is_empty_line(const char* buffer) +{ + while (char c = *buffer++) { + if (c != ' ' || c != '\n' || c != '\t') { + return false; + } + } + return true; +} + +static int bed_line_parse(char* buffer, char chr[], chrsize_t& start, chrsize_t& end, const std::string& bedfile, size_t line_num) +{ + if (sscanf(buffer, "%s %u %u", chr, &start, &end) != 3) { + std::cerr << "bed file \"" << bedfile << "\" at line #" << line_num << " format error\n"; + return 1; + } + return 0; +} + +struct Interval { + chrsize_t start; + chrsize_t end; + + Interval(chrsize_t start = 0, chrsize_t end = 0) : start(start), end(end) { } +}; + +class ChrRegions { + + std::vector<std::string> chr_v; + std::map<std::string, std::vector<Interval>* > intervals; + +public: + ChrRegions() { } + + int readBedfile(const std::string& bedfile) { + std::ifstream ifs(bedfile.c_str()); + if (ifs.bad() || ifs.fail()) { + std::cerr << prog << " cannot open bed file: " << bedfile << " for reading\n"; + return 1; + } + char buffer[4096]; + size_t line_num = 0; + chrsize_t lastend = 0; + char lastchr[2048] = {0}; + while (!ifs.eof()) { + ifs.getline(buffer, sizeof(buffer)-1); + line_num++; + if (is_empty_line(buffer)) { + continue; + } + chrsize_t start = 0; + chrsize_t end = 0; + char chr[2048]; + if (bed_line_parse(buffer, chr, start, end, bedfile, line_num)) { + return 1; + } + if (intervals.find(chr) == intervals.end()) { + intervals[chr] = new std::vector<Interval>(); + chr_v.push_back(chr); + } + /* + if (lastend != 0 && !strcmp(lastchr, chr) && start != lastend) { + std::cerr << "warning: discontinuous segment for chromosome " << chr << " at position " << start << " " << end << std::endl; + } + */ + if (*lastchr && strcmp(lastchr, chr)) { + lastend = 0; + } + + if (lastend != 0 && start < lastend) { + std::cerr << "error: bedfile not sorted at line #" << line_num << std::endl; + exit(1); + } + strcpy(lastchr, chr); + lastend = end; + intervals[chr]->push_back(Interval(start, end)); + if (progress && (line_num % 100000) == 0) { + std::cerr << '.' << std::flush; + } + } + if (progress) { + std::cerr << std::endl; + } + return 0; + } + + void displayBed(std::ostream& ofs, const std::vector<AxisChromosome*>& axis_chr) const { + std::vector<std::string>::const_iterator begin = chr_v.begin(); + std::vector<std::string>::const_iterator end = chr_v.end(); + unsigned int num = 1; + while (begin != end) { + const std::string& chrname = *begin; + std::map<std::string, std::vector<Interval>* >::const_iterator iter = intervals.find(chrname); + assert(iter != intervals.end()); + const std::vector<Interval>* itv_vect = (*iter).second; + std::vector<Interval>::const_iterator itv_begin = itv_vect->begin(); + std::vector<Interval>::const_iterator itv_end = itv_vect->end(); + while (itv_begin != itv_end) { + const Interval& itv = (*itv_begin); + ofs << chrname << '\t' << itv.start << '\t' << itv.end << '\t' << num << '\n'; + if (progress && (num % 100000) == 0) { + std::cerr << '.' << std::flush; + } + num++; + ++itv_begin; + } + ++begin; + } + if (progress) { + std::cerr << std::endl; + } + } + + const std::vector<Interval>* getIntervalsFromChr(const std::string& chr) const { + std::map<std::string, std::vector<Interval>* >::const_iterator iter = intervals.find(chr); + if (iter != intervals.end()) { + return (*iter).second; + } + return NULL; + } +}; + +class Dichotomic { + + int min, max; + const std::vector<Interval>& intervals; + +public: + Dichotomic(const std::vector<Interval>& intervals) : intervals(intervals) { + //min = middle(intervals[0]); + //max = middle(intervals[intervals.size()-1]); + min = 0; + max = intervals.size()-1; + } + + static chrsize_t middle(const Interval& itv) { + return (itv.start+1 + itv.end) / 2; + } + + int find(chrsize_t value) { + int l = min; + int r = max; + int n = 0; + while (l <= r) { + n = (l + r) >> 1; + const Interval& itv = intervals[n]; + if (value >= itv.start+1 && value <= itv.end) { + return n; + } + + int x = middle(itv) - value; + + if (x < 0) { + l = n + 1; + } else { + r = n - 1; + } + //std::cout << "l: " << l << '\n'; + //std::cout << "r: " << r << '\n'; + } + + return -1; + } +}; + +class Chromosome { + +private: + static std::unordered_map<std::string, Chromosome*> chr_map; + + void computeSizes(chrsize_t ori_binsize, chrsize_t step, bool binadjust, const ChrRegions* chr_regions); + + std::string name; + + chrsize_t chrsize; + + chrsize_t binsize; + chrsize_t stepsize; + chrsize_t bincount; + + const ChrRegions* chr_regions; + +public: + Chromosome(const std::string& name, chrsize_t chrsize, chrsize_t ori_binsize, chrsize_t step, bool binadjust, const ChrRegions* chr_regions) : name(name), chrsize(chrsize), chr_regions(chr_regions) { + computeSizes(ori_binsize, step, binadjust, chr_regions); + assert(chr_map.find(name) == chr_map.end()); + chr_map[name] = this; + } + + void adjustBinsize(chrsize_t ori_binsize, const chrsize_t step); + + const std::string& getName() const {return name;} + chrsize_t getChrsize() const {return chrsize;} + chrsize_t getBinsize() const {return binsize;} + chrsize_t getStepsize() const {return stepsize;} + chrsize_t getBincount() const {return bincount;} + + const ChrRegions* getChrRegions() const {return chr_regions;} + + static chrsize_t getCount() { + return chr_map.size(); + } + + static Chromosome* getByName(const std::string& name) { + return chr_map[name]; + } +}; + +class AxisChromosome { + int idx; // really needed ? + const Chromosome* chr; + chrsize_t binstart; + chrsize_t binend; + +public: + AxisChromosome(int binoffset, const Chromosome* chr, const AxisChromosome* lastAxisChr) : chr(chr) { + if (lastAxisChr != NULL) { + binstart = lastAxisChr->getBinend(); + } else { + binstart = binoffset; + } + binend = binstart + chr->getBincount(); + /* + if (verbose) { + std::cerr << "AxisChromosome: " << chr->getName() << " " << binstart << " " << binend << " " << chr->getBincount() << std::endl; + } + */ + } + + chrsize_t getBinstart() const {return binstart;} + chrsize_t getBinend() const {return binend;} + chrsize_t getChrsize() const {return chr->getChrsize();} + chrsize_t getBinsize() const {return chr->getBinsize();} + chrsize_t getStepsize() const {return chr->getStepsize();} + chrsize_t getBincount() const {return chr->getBincount();} + + const Chromosome* getChromosome() const {return chr;} + + chrsize_t assign_bin(const std::string& org, chrsize_t start) const { + const ChrRegions* chr_regions = chr->getChrRegions(); + if (chr_regions != NULL) { + const std::vector<Interval>* intervals = chr_regions->getIntervalsFromChr(chr->getName()); + assert(intervals != NULL); + + if (!NO_DICHO) { + Dichotomic dicho(*intervals); + int where = dicho.find(start); + if (where < 0) { + if (!quiet) { + std::cerr << "warning: no bin at position " << chr->getName() << ":" << start << std::endl; + } + return BIN_NOT_FOUND; + } + return where + getBinstart(); + } + + std::vector<Interval>::const_iterator begin = intervals->begin(); + std::vector<Interval>::const_iterator end = intervals->end(); + + chrsize_t binidx = 1; + while (begin != end) { + const Interval& itv = *begin; + if (start >= itv.start+1 && start <= itv.end) { + break; + } + ++binidx; + ++begin; + } + + return binidx + getBinstart() - 1; + } + + int loc = (int)start; + int binsize = getBinsize(); + int stepsize = getStepsize(); + int cur_binidx = 1 + ceil((double)(loc-binsize)/stepsize); + int cur_binbeg = stepsize * (cur_binidx-1)+1; + int cur_binend = cur_binbeg + binsize-1; + int chrsize = getChrsize(); + if (cur_binend > chrsize) { + cur_binend = chrsize; + } + return cur_binidx + getBinstart() - 1; + } +}; + +class Matrix { + + std::vector<AxisChromosome*> axis_chr_abs; + std::vector<AxisChromosome*> axis_chr_ord; + std::unordered_map<std::string, AxisChromosome*> axis_chr_abs_map; + std::unordered_map<std::string, AxisChromosome*> axis_chr_ord_map; + + std::map<chrsize_t, std::map<chrsize_t, chrsize_t> > mat; + + void addAxisChromosome(const std::vector<const Chromosome*>& chr_v, std::vector<AxisChromosome*>& axis_chr, std::unordered_map<std::string, AxisChromosome*>& axis_chr_map); + + const AxisChromosome* getAxisChromosome(const std::string& chrname, const std::unordered_map<std::string, AxisChromosome*>& axis_chr_map) const { + std::unordered_map<std::string, AxisChromosome*>::const_iterator iter = axis_chr_map.find(chrname); + if (iter == axis_chr_map.end()) { + return NULL; + } + return (*iter).second; + } + + void displayBed(std::ostream& ofs, const std::vector<AxisChromosome*>& axis_chr) const { + std::vector<AxisChromosome*>::const_iterator begin = axis_chr.begin(); + std::vector<AxisChromosome*>::const_iterator end = axis_chr.end(); + while (begin != end) { + const AxisChromosome* axis_chr = *begin; + const std::string& name = axis_chr->getChromosome()->getName(); + chrsize_t binstart = axis_chr->getBinstart(); + chrsize_t binend = axis_chr->getBinend(); + chrsize_t binsize = axis_chr->getBinsize(); + chrsize_t chrsize = axis_chr->getChrsize(); + binend -= binstart; + for (chrsize_t bin = 0; bin < binend; ++bin) { + // bed are 0-based begin, 1-based end + chrsize_t beg = bin * binsize; + chrsize_t end = beg + binsize - 1; + if (end > chrsize) { + end = chrsize-1; + } + ofs << name << '\t' << beg << '\t' << (end+1) << '\t' << (bin+binstart) << '\n'; + } + ++begin; + } + } + + int binoffset; + +public: + Matrix(int binoffset) : binoffset(binoffset) {} + + void addXAxisChromosome(const std::vector<const Chromosome*>& chr_v); + void addYAxisChromosome(const std::vector<const Chromosome*>& chr_v); + + const AxisChromosome* getXAxisChromosome(const std::string& chrname) const { + return getAxisChromosome(chrname, axis_chr_abs_map); + } + + const AxisChromosome* getYAxisChromosome(const std::string& chrname) const { + return getAxisChromosome(chrname, axis_chr_ord_map); + } + + void add(chrsize_t abs_bin, chrsize_t ord_bin) { + std::map<chrsize_t, std::map<chrsize_t, chrsize_t> >::iterator iter = mat.find(abs_bin); + if (iter == mat.end()) { + mat[abs_bin] = std::map<chrsize_t, chrsize_t>(); + mat[abs_bin][ord_bin] = 1; + } else { + (*iter).second[ord_bin]++; + } + } + + void displayMatrix(std::ostream& ofs) const { + std::map<chrsize_t, std::map<chrsize_t, chrsize_t> >::const_iterator begin = mat.begin(); + std::map<chrsize_t, std::map<chrsize_t, chrsize_t> >::const_iterator end = mat.end(); + size_t line_total = 0; + if (progress) { + while (begin != end) { + const std::map<chrsize_t, chrsize_t>& line = (*begin).second; + line_total += line.size(); + ++begin; + } + begin = mat.begin(); + } + + size_t line_cnt = 1; + if (progress) { + std::cerr << "\n=================\n"; + std::cerr << " Dumping matrix\n"; + std::cerr << "=================\n\n"; + } + size_t modulo = line_total / 1000; + while (begin != end) { + chrsize_t abs = (*begin).first; + const std::map<chrsize_t, chrsize_t>& line = (*begin).second; + std::map<chrsize_t, chrsize_t>::const_iterator bb = line.begin(); + std::map<chrsize_t, chrsize_t>::const_iterator ee = line.end(); + while (bb != ee) { + if (progress && (line_cnt % modulo) == 0) { + double percent = (double(line_cnt)/line_total)*100; + std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; + } + ofs << abs << '\t' << (*bb).first << '\t' << (*bb).second << '\n'; + line_cnt++; + ++bb; + } + ++begin; + } + } + + void displayXBed(std::ostream& ofs) const { + displayBed(ofs, axis_chr_abs); + } + + void displayYBed(std::ostream& ofs) const { + displayBed(ofs, axis_chr_ord); + } + + const std::vector<AxisChromosome*>& getXAxisChromosomes() {return axis_chr_abs;} + const std::vector<AxisChromosome*>& getYAxisChromosomes() {return axis_chr_ord;} +}; + +void Matrix::addAxisChromosome(const std::vector<const Chromosome*>& chr_v, std::vector<AxisChromosome*>& axis_chr, std::unordered_map<std::string, AxisChromosome*>& axis_chr_map) +{ + std::vector<const Chromosome*>::const_iterator begin = chr_v.begin(); + std::vector<const Chromosome*>::const_iterator end = chr_v.end(); + + const AxisChromosome* lastAxisChr = NULL; + while (begin != end) { + const Chromosome* chr = *begin; + AxisChromosome* axisChr = new AxisChromosome(binoffset, chr, lastAxisChr); + axis_chr.push_back(axisChr); + axis_chr_map[chr->getName()] = axisChr; + lastAxisChr = axisChr; + ++begin; + } +} + +void Matrix::addXAxisChromosome(const std::vector<const Chromosome*>& chr_v) +{ + addAxisChromosome(chr_v, axis_chr_abs, axis_chr_abs_map); +} + +void Matrix::addYAxisChromosome(const std::vector<const Chromosome*>& chr_v) +{ + addAxisChromosome(chr_v, axis_chr_ord, axis_chr_ord_map); +} + +std::unordered_map<std::string, Chromosome*> Chromosome::chr_map; + +enum Format { + SPARSE_IND_FMT = SPARSE_FMT, + SPARSE_BED_FMT = SPARSE_FMT|BED_FMT, + EXPANDED_FMT = 0x4 +}; + +void Chromosome::adjustBinsize(chrsize_t ori_binsize, const chrsize_t step) +{ + bincount = 1 + (chrsize_t)floor( (double)(chrsize-ori_binsize) / (ori_binsize/step)); + binsize = chrsize / bincount; + stepsize = binsize / step; +} + +void Chromosome::computeSizes(chrsize_t ori_binsize, chrsize_t step, bool binadjust, const ChrRegions* chr_regions) +{ + if (NULL != chr_regions) { + const std::vector<Interval>* intervals = chr_regions->getIntervalsFromChr(name); + assert(intervals != NULL); + bincount = intervals->size(); + /* + if (verbose) { + std::cerr << name << " bincount: " << bincount << std::endl; + } + */ + } else { + if (chrsize < ori_binsize) { + binsize = chrsize; + stepsize = chrsize; + bincount = 1; + } else if (binadjust) { + adjustBinsize(ori_binsize, step); + } else { + binsize = ori_binsize; + stepsize = (chrsize_t)floor(ori_binsize/step); + chrsize_t remainder = (chrsize - ori_binsize) % stepsize; + chrsize_t tmp_bincount = 1 + (chrsize_t)floor(chrsize-ori_binsize)/stepsize; + bincount = remainder > 0 ? tmp_bincount+1 : tmp_bincount; + } + /* + if (verbose) { + std::cerr << name << " sizes: " << chrsize << " " << binsize << " " << stepsize << " " << bincount << std::endl; + } + */ + } +} + +static int usage(int ret = 1) +{ + std::cerr << "\nusage: " << prog << " --binsize BINSIZE|--binfile --chrsizes FILE --ifile FILE\n"; + std::cerr << " --oprefix PREFIX [--binadjust] [--step STEP] [--binoffset OFFSET]\n"; + std::cerr << " [--matrix-format asis|upper|lower|complete][--chrA CHR... --chrB CHR...] [--quiet] [--progress] [--detail-progress]\n"; + std::cerr << "\nusage: " << prog << " --version\n"; + std::cerr << "\nusage: " << prog << " --help\n"; + return ret; +} + +static int help() +{ + (void)usage(); + std::cerr << "\nOPTIONS\n\n"; + std::cerr << " --version : display version\n"; + std::cerr << " --binsize BINSIZE : bin size\n"; + std::cerr << " --binfile BEDFILE : bed file containing bins (chr start end)\n"; + std::cerr << " --chrsizes FILE : file containing chromosome sizes\n"; + std::cerr << " --ifile FILE : input interaction file\n"; + std::cerr << " --oprefix PREFIX : output prefix of generated files (matrix and bed)\n"; + std::cerr << " --binadjust : [optional] adjust bin sizes, default is false\n"; + std::cerr << " --step STEP : [optional] step size, default is 1\n"; + std::cerr << " --binoffset OFFSET : [optional] starting bin offset, default is 1\n"; + std::cerr << " --matrix-format FORMAT : [optional] FORMAT may be:\n"; + std::cerr << " - asis: matrix is generated according to input data (default)\n"; + std::cerr << " - upper: only the upper matrix is generated\n"; + std::cerr << " - lower: only the lower matrix is generated\n"; + std::cerr << " - complete: generate both parts of the matrix (upper and lower);\n"; + std::cerr << " input data must contain only one part (upper or lower) \n"; + std::cerr << " --chrA CHR : [optional] colon separated list of abscissa chromosomes; default is all chromosomes\n"; + std::cerr << " --chrB CHR : [optional] colon separated list of ordinate chromosomes; default is all chromosomes\n"; + std::cerr << " --quiet : do not display any warning\n"; + std::cerr << " --progress : display progress\n"; + std::cerr << " --detail-progress : display detail progress (needs preliminary steps consuming time)\n"; + return -1; +} + +enum MatrixFormat { + ASIS_MATRIX = 1, + UPPER_MATRIX, + LOWER_MATRIX, + COMPLETE_MATRIX +}; + +static int get_options(int argc, char* argv[], chrsize_t& binsize, const char*& binfile, const char*& chrsize_file, const char*& ifile, const char*& oprefix, Format& format, std::string& bed_prefix, bool& binadjust, MatrixFormat& matrix_format, chrsize_t& step, bool& whole_genome, int& binoffset, const char*& chrA, const char*& chrB) +{ + prog = argv[0]; + for (int ac = 1; ac < argc; ++ac) { + const char* opt = argv[ac]; + if (*opt == '-') { + if (!strcmp(opt, "--binadjust")) { + binadjust = true; + } else if (!strcmp(opt, "--version")) { + std::cout << "build_matrix version " << VERSION << "\n"; + exit(0); + } else if (!strcmp(opt, "--progress")) { + progress = true; + } else if (!strcmp(opt, "--quiet")) { + quiet = true; + } else if (!strcmp(opt, "--detail-progress")) { + progress = true; + detail_progress = true; + } else if (!strcmp(opt, "--matrix-format")) { + if (ac == argc-1) { + return usage(); + } + std::string matrix_format_str = argv[++ac]; + if (matrix_format_str == "asis") { + matrix_format = ASIS_MATRIX; + } else if (matrix_format_str == "upper") { + matrix_format = UPPER_MATRIX; + } else if (matrix_format_str == "lower") { + matrix_format = LOWER_MATRIX; + } else if (matrix_format_str == "complete") { + matrix_format = COMPLETE_MATRIX; + } else { + return usage(); + } + } else if (!strcmp(opt, "--step")) { + if (ac == argc-1) { + return usage(); + } + step = atoi(argv[++ac]); + } else if (!strcmp(opt, "--binfile")) { + if (ac == argc-1) { + return usage(); + } + binfile = argv[++ac]; + } else if (!strcmp(opt, "--binsize")) { + if (ac == argc-1) { + return usage(); + } + binsize = atoi(argv[++ac]); + } else if (!strcmp(opt, "--binoffset")) { + if (ac == argc-1) { + return usage(); + } + binoffset = atoi(argv[++ac]); + } else if (!strcmp(opt, "--ifile")) { + if (ac == argc-1) { + return usage(); + } + ifile = argv[++ac]; + } else if (!strcmp(opt, "--oprefix")) { + if (ac == argc-1) { + return usage(); + } + oprefix = argv[++ac]; + } else if (!strcmp(opt, "--chrsizes")) { + if (ac == argc-1) { + return usage(); + } + chrsize_file = argv[++ac]; + } else if (!strcmp(opt, "--chrA")) { + if (ac == argc-1) { + return usage(); + } + chrA = argv[++ac]; + whole_genome = false; + } else if (!strcmp(opt, "--chrB")) { + if (ac == argc-1) { + return usage(); + } + chrB = argv[++ac]; + whole_genome = false; + } else if (!strcmp(opt, "--help")) { + return help(); + } else { + std::cerr << '\n' << prog << ": unknown option " << opt << std::endl; + return usage(); + } + } + } + + return 0; +} + +static void split_in_vect(const std::string& str, std::vector<const Chromosome*>& vect) +{ + size_t last_pos = 0; + while (size_t pos = str.find(':', last_pos)) { + std::string chrname; + bool last = pos == std::string::npos; + if (last) { + chrname = str.substr(last_pos); + } else { + chrname = str.substr(last_pos, pos-last_pos); + } + const Chromosome* chr = Chromosome::getByName(chrname); + if (!chr) { + std::cerr << prog << ": unknown chromosome " << chrname << std::endl; + exit(1); + } + vect.push_back(chr); + if (last) { + break; + } + last_pos = pos+1; + } +} + +static int interaction_parse(char* buffer, char*& lchr, chrsize_t& lstart, char*& rchr, chrsize_t& rstart) +{ + char c; + char* str; + while ((c = *buffer++) != 0) { + if (c == '\t') { + lchr = buffer; + break; + } + } + while ((c = *buffer) != 0) { + if (c == '\t') { + *buffer++ = 0; + str = buffer; + break; + } + buffer++; + } + + while ((c = *buffer) != 0) { + if (c == '\t') { + *buffer++ = 0; + lstart = atoi(str); + break; + } + buffer++; + } + + while ((c = *buffer++) != 0) { + if (c == '\t') { + rchr = buffer; + break; + } + } + + while ((c = *buffer) != 0) { + if (c == '\t') { + *buffer++ = 0; + str = buffer; + break; + } + buffer++; + } + + while ((c = *buffer) != 0) { + if (c == '\t') { + *buffer++ = 0; + rstart = atoi(str); + break; + } + buffer++; + } + + return 0; +} + +static char p_buffer[512000]; + +static int build_matrix_init(Matrix& matrix, const char* ifile, std::ifstream& ifs, const std::string& oprefix, std::ofstream& matfs, std::ofstream& xbedfs, std::ofstream& ybedfs, const char* chrsize_file, bool whole_genome, const char* chrA, const char* chrB, chrsize_t ori_binsize, const char* binfile, chrsize_t step, bool binadjust, ChrRegions*& chr_regions, size_t& line_total) +{ + ifs.open(ifile); + if (ifs.bad() || ifs.fail()) { + std::cerr << prog << " cannot open interaction file: " << ifile << " for reading\n"; + return 1; + } + + if (detail_progress) { + if (progress) { + std::cerr << "\n======================================\n"; + std::cerr << " Getting information for progress bar\n"; + std::cerr << "======================================\n\n"; + } + std::cerr << std::setprecision(2) << std::fixed; + int fd = open(ifile, O_RDONLY); + struct stat st; + assert(fstat(fd, &st) == 0); + assert(fd >= 0); + int nn; + int cnt = 1; + while ((nn = read(fd, p_buffer, sizeof(p_buffer))) > 0) { + const char *p = p_buffer; + while (nn-- > 0) { + if (*p++ == '\n') { + line_total++; + } + } + if ((cnt % 200) == 0) { + std::cerr << '.' << std::flush; + } + cnt++; + } + std::cerr << std::endl; + close(fd); + } + + std::ifstream chrsizefs; + chrsizefs.open(chrsize_file); + if (chrsizefs.bad() || chrsizefs.fail()) { + std::cerr << prog << " cannot open chrsizes file: " << chrsize_file << " for reading\n"; + return 1; + } + + std::string matfile = oprefix + ".matrix"; + matfs.open(matfile); + if (matfs.bad() || matfs.fail()) { + std::cerr << prog << " cannot open file: " << matfile << " for writing\n"; + return 1; + } + + std::string xbedfile = oprefix + "_abs.bed"; + xbedfs.open(xbedfile); + if (xbedfs.bad() || xbedfs.fail()) { + std::cerr << prog << " cannot open file: " << xbedfile << " for writing\n"; + return 1; + } + + std::string ybedfile = oprefix + "_ord.bed"; + if (!whole_genome) { + //std::string xbedlink; + //size_t pos = xbedfile.rfind('/'); + //if (pos != std::string::npos) { + // xbedlink = xbedfile.substr(pos+1); + //} else { + // xbedlink = xbedfile; + //} + //unlink(ybedfile.c_str()); + //if (symlink(xbedlink.c_str(), ybedfile.c_str())) { + // std::cerr << prog << " cannot created link: " << ybedfile << "\n"; + // return 1; + //} + //} else { + ybedfs.open(ybedfile); + if (ybedfs.bad() || ybedfs.fail()) { + std::cerr << prog << " cannot open file: " << ybedfile << " for writing\n"; + return 1; + } + } + + chr_regions = NULL; + if (NULL != binfile) { + chr_regions = new ChrRegions(); + if (progress) { + std::cerr << "\n=================\n"; + std::cerr << " Reading binfile\n"; + std::cerr << "=================\n\n"; + } + if (chr_regions->readBedfile(binfile)) { + return 1; + } + } + + std::vector<const Chromosome*> all_chr_v; + while (!chrsizefs.eof()) { + std::string buffer; + getline(chrsizefs, buffer); + + chrsize_t chrsize; + std::istringstream istr(buffer); + std::string name; + istr >> name >> chrsize; + if (!istr.fail()) { + Chromosome* chromosome = new Chromosome(name, chrsize, ori_binsize, step, binadjust, chr_regions); + all_chr_v.push_back(chromosome); + } + } + + chrsizefs.close(); + + if (chrA) { + assert(chrB != NULL); + std::vector<const Chromosome*> chrA_v; + std::vector<const Chromosome*> chrB_v; + split_in_vect(chrA, chrA_v); + split_in_vect(chrB, chrB_v); + matrix.addXAxisChromosome(chrA_v); + matrix.addYAxisChromosome(chrB_v); + } else { + matrix.addXAxisChromosome(all_chr_v); + matrix.addYAxisChromosome(all_chr_v); + } + + return 0; +} + +static int build_matrix(int binoffset, chrsize_t ori_binsize, const char* binfile, const char* chrsize_file, const char* ifile, const char* oprefix, Format _dummy_format, const std::string& _dummy_bed_prefix, bool binadjust, MatrixFormat matrix_format, chrsize_t step, bool whole_genome, const char* chrA, const char* chrB) +{ + std::ifstream ifs; + std::ofstream matfs, xbedfs, ybedfs; + + Matrix matrix(binoffset); + ChrRegions *chr_regions = NULL; + size_t line_total = 0; + if (int ret = build_matrix_init(matrix, ifile, ifs, oprefix, matfs, xbedfs, ybedfs, chrsize_file, whole_genome, chrA, chrB, ori_binsize, binfile, step, binadjust, chr_regions, line_total)) { + return ret; + } + + if (progress) { + std::cerr << "\n=================\n"; + std::cerr << " Building matrix\n"; + std::cerr << "=================\n\n"; + } + size_t line_cnt = 1; + size_t line_num = 0; + char buffer[4096]; + std::string lmark, rmark, lorg, rorg; + while (!ifs.eof()) { + ifs.getline(buffer, sizeof(buffer)-1); + line_num++; + if (is_empty_line(buffer)) { + continue; + } + chrsize_t lstart = 0; + chrsize_t rstart = 0; + char* lchr = NULL; + char* rchr = NULL; + interaction_parse(buffer, lchr, lstart, rchr, rstart); + const AxisChromosome* abs_chr = matrix.getXAxisChromosome(lchr); + if (!abs_chr) { + continue; + } + const AxisChromosome* ord_chr = matrix.getYAxisChromosome(rchr); + if (!ord_chr) { + continue; + } + chrsize_t abs_bin = abs_chr->assign_bin(lorg, lstart); + if (abs_bin == BIN_NOT_FOUND) { + continue; + } + chrsize_t ord_bin = ord_chr->assign_bin(rorg, rstart); + if (ord_bin == BIN_NOT_FOUND) { + continue; + } + switch(matrix_format) { + + case ASIS_MATRIX: + matrix.add(abs_bin, ord_bin); + break; + + case UPPER_MATRIX: + if (abs_bin < ord_bin) { + matrix.add(abs_bin, ord_bin); + } else { + matrix.add(ord_bin, abs_bin); + } + break; + + case LOWER_MATRIX: + if (abs_bin > ord_bin) { + matrix.add(abs_bin, ord_bin); + } else { + matrix.add(ord_bin, abs_bin); + } + break; + + case COMPLETE_MATRIX: + matrix.add(abs_bin, ord_bin); + if (abs_bin != ord_bin) { + matrix.add(ord_bin, abs_bin); + } + break; + } + line_cnt++; + if (progress && (line_cnt % 100000) == 0) { + if (detail_progress) { + double percent = (double(line_cnt)/line_total)*100; + std::cerr << "" << percent << "% " << line_cnt << " / " << line_total << std::endl; + } else { + std::cerr << line_cnt << std::endl; + } + } + } + + if (progress) { + std::cerr << "\n==================\n"; + std::cerr << " Dumping bedfiles\n"; + std::cerr << "==================\n\n"; + } + + if (NULL != chr_regions) { + chr_regions->displayBed(xbedfs, matrix.getXAxisChromosomes()); + if (!whole_genome) { + chr_regions->displayBed(ybedfs, matrix.getYAxisChromosomes()); + } + } else { + matrix.displayXBed(xbedfs); + if (!whole_genome) { + matrix.displayYBed(ybedfs); + } + } + matrix.displayMatrix(matfs); + xbedfs.close(); + ybedfs.close(); + matfs.close(); + return 0; +} + +int main(int argc, char* argv[]) +{ + chrsize_t step = 1; + bool binadjust = false; + MatrixFormat matrix_format = ASIS_MATRIX; + chrsize_t binsize = 0; + const char* ifile = NULL; + const char* oprefix = NULL; + const char* chrA = NULL; + const char* chrB = NULL; + const char* chrsize_file = NULL; + const char* binfile = NULL; + bool whole_genome = true; + int binoffset = 1; + std::string bed_prefix; + Format format = SPARSE_BED_FMT; + + if (int ret = get_options(argc, argv, binsize, binfile, chrsize_file, ifile, oprefix, format, bed_prefix, binadjust, matrix_format, step, whole_genome, binoffset, chrA, chrB)) { + if (ret < 0) { + return 0; + } + return ret; + } + + if (!binsize && !binfile) { + std::cerr << '\n'; + std::cerr << prog << ": missing --binsize or --binfile option\n"; + return usage(); + } + + if (!chrsize_file) { + std::cerr << '\n'; + std::cerr << prog << ": missing --chrsizes option\n"; + return usage(); + } + + if (!ifile) { + std::cerr << '\n'; + std::cerr << prog << ": missing --ifile option\n"; + return usage(); + } + + if (!oprefix) { + std::cerr << '\n'; + std::cerr << prog << ": missing --oprefix option\n"; + return usage(); + } + + if ((chrA && !chrB) || (!chrA && chrB)) { + std::cerr << '\n'; + std::cerr << prog << ": options --chrA and --chrB must be set simultanously\n"; + return usage(); + } + + if (binfile && binsize) { + std::cerr << '\n'; + std::cerr << prog << ": options --binfile and --binsize cannot be set simultanously\n"; + return usage(); + } + + return build_matrix(binoffset, binsize, binfile, chrsize_file, ifile, oprefix, format, bed_prefix, binadjust, matrix_format, step, whole_genome, chrA, chrB); +} diff --git a/bin/src/cutsite_trimming.cpp b/bin/src/cutsite_trimming.cpp new file mode 100644 index 0000000..ef3fa86 --- /dev/null +++ b/bin/src/cutsite_trimming.cpp @@ -0,0 +1,153 @@ +// HiC-Pro +// Copyright 2015 Institut Curie +// Author(s): Nicolas Servant +// Contact: nicolas.servant@curie.fr +// This software is distributed without any guarantee under the terms of the BSD-3 licence + +// g++ -std=c++0x -o cutsite_trimming cutsite_trimming.cpp +//./cutsite_trimming -fastq fastq -cutsite AGCTT + + +#include <iostream> // std::cout +#include <stdlib.h> +#include <string.h> +#include <vector> +#include <fstream> + +static const char* prog; + +static int usage(int ret=1) +{ + std::cerr << "usage: " << prog << " --fastq FASTQFILE --cutsite CUTSITE --out OUTFILE [--rmuntrim] \n"; + std::cerr << "usage: " << prog << " --help\n"; + return ret; +} + +static int get_options(int argc, char* argv[], std::string& fastqFile, + std::vector<std::string>& cutSites, std::string& output, bool& rmuntrim) +{ + prog = argv[0]; + if (argc == 1){ + exit(usage()); + } + for (int ac = 1; ac < argc; ++ac) { + const char* opt = argv[ac]; + if (*opt == '-') { + if (!strcmp(opt, "--fastq")) { + fastqFile = std::string(argv[++ac]); + } else if (!strcmp(opt, "--cutsite")) { + + std::string cutSitesSequence; + cutSitesSequence = std::string(argv[++ac]); + size_t pos = cutSitesSequence.find(","); + size_t begin = 0; + while(pos != std::string::npos){ + cutSites.push_back(cutSitesSequence.substr(begin, pos - begin)); + begin = pos + 1; + pos = cutSitesSequence.find(",", begin + 1); + } + cutSites.push_back(cutSitesSequence.substr(begin, pos)); + + } + else if (!strcmp(opt, "--out")) { + output = std::string(argv[++ac]); + } + else if (!strcmp(opt, "--rmuntrim")) { + rmuntrim = true; + } + }else { + std::cerr << prog << ": unknown option " << opt << std::endl; + return usage(); + } + } + return 0; +} + +static int trim_fastq(std::string& fastqFile, + std::vector<std::string>& cutSites, + std::string& outFile, bool& rmuntrim) +{ + + int trim_count=0; + std::string ID; + std::ifstream ifs (fastqFile); + std::ofstream ofs (outFile); + + if (ifs.is_open()){ + while (getline(ifs, ID)) { + std::string seq; + std::string dummy; + std::string qual; + + getline(ifs, seq); + getline(ifs, dummy); + getline(ifs, qual); + + bool find_pos = false; + size_t pos = std::string::npos; + for (std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ + size_t tmp_pos = seq.find(*it); + if (tmp_pos != std::string::npos) { + // If find_pos is alread True, there is a problem (there are two cut + // sites in the same read).) + if (find_pos == true){ + if(tmp_pos < pos) { + pos = tmp_pos; + } + } else { + find_pos = true; + pos = tmp_pos; + } + } + } + + if (pos != std::string::npos) { + trim_count++; + ofs << ID << '\n'; + ofs << seq.substr(0, pos) << '\n'; + ofs << "+\n"; + ofs << qual.substr(0, pos) << '\n'; + } else { + if (!rmuntrim){ + ofs << ID << '\n'; + ofs << seq << '\n'; + ofs << "+\n"; + ofs << qual << '\n'; + } + } + find_pos = false; + } + }else{ + std::cerr << "Error : Cannot open file : " << fastqFile; + } + return trim_count; +} + +int main(int argc, char* argv[]) +{ + + std::string fastqFile; + std::vector<std::string> cutSites; + std::string outFile; + bool rmuntrim = false; + + int ret = get_options(argc, argv, fastqFile, cutSites, outFile, rmuntrim); + printf("##Fastq file: %s\n", fastqFile.c_str()); + printf("##Restriction sites:\n"); + for(std::vector<std::string>::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ + std::cout << *it << std::endl; + } + printf("##Output File: %s\n", outFile.c_str()); + + if (fastqFile.empty() || cutSites.size() == 0 || outFile.empty()){ + usage(); + exit(ret); + } + + int trim_count=trim_fastq(fastqFile, cutSites, outFile, rmuntrim); + printf("\n##Trimmed reads: %d\n", trim_count); + return(0); + } + + + diff --git a/conf/base.config b/conf/base.config index 23c9e4a..a8413de 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,6 +1,6 @@ /* * ------------------------------------------------- - * nf-core/hic Nextflow base config file + * Nextflow base config file * ------------------------------------------------- * A 'blank slate' config file, appropriate for general * use on most high performace compute environments. @@ -13,29 +13,40 @@ process { container = params.container - // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } + cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'terminate' } maxRetries = 1 maxErrors = '-1' // Process-specific resource requirements - // TODO nf-core: Customise requirements for specific processes - withName: fastqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + withName:bowtie2_end_to_end { + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } } - withName: multiqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + withName:bowtie2_on_trimmed_reads { + cpus = { check_max( 2, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:merge_mapping_steps { + cpus = { check_max( 1, 'cpus' ) } + memory = { check_max( 20.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } + } + withName:trim_reads { + cpus = { check_max (1, 'cpus')} + memory = { check_max( 10.GB * task.attempt, 'memory' ) } + time = { check_max( 5.h * task.attempt, 'time' ) } } } params { // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' + max_memory = 20.GB + max_cpus = 1 + max_time = 24.h } diff --git a/conf/curie.config b/conf/curie.config new file mode 100644 index 0000000..ab85a2d --- /dev/null +++ b/conf/curie.config @@ -0,0 +1,16 @@ +singularity { + enabled = false +} + +process { + executor = 'pbs' + queue = params.queue + //beforeScript = 'export PATH=/bioinfo/pipelines/sandbox/dev/nfcore/rnaseq/modules/conda/envs/nf-core-rnaseq-1.2/bin:$PATH' +} + +params { + clusterOptions = false + max_memory = 128.GB + max_cpus = 4 + max_time = 240.h +} diff --git a/conf/hicpro.config b/conf/hicpro.config new file mode 100644 index 0000000..3eafbd1 --- /dev/null +++ b/conf/hicpro.config @@ -0,0 +1,17 @@ +/* + * ------------------------------------------------- + * Nextflow config file for Genomes paths + * ------------------------------------------------- + * Defines reference genomes + * Can be used by any config that customises the base + * path using $params.genomes_base / --genomes_base + */ + +params { + bwt2_index = '/data/annotations/pipelines/Human/hg19/indexes/bowtie2/hg19' + bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder' + bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder' + restriction_fragment_bed = '/data/users/nservant/Apps/HiC-Pro_annotation/HindIII_resfrag_hg19.bed' + chromosome_size = '/data/users/nservant/Apps/HiC-Pro_annotation/chrom_hg19.sizes' +} + diff --git a/conf/test.config b/conf/test.config index a03678b..e8a8e0e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,11 +13,8 @@ params { max_memory = 6.GB max_time = 48.h // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - singleEnd = false readPaths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] + ['Testdata2', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_00_R1.fastq.gz']], + ['Testdata1', ['/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz', '/bioinfo/users/nservant/GIT/HiCPro/test-op/hSRR400264_01_R1.fastq.gz']], ] } diff --git a/main.nf b/main.nf index 97a07de..49b3d61 100644 --- a/main.nf +++ b/main.nf @@ -23,6 +23,9 @@ def helpMessage() { nf-core/hic v${workflow.manifest.version} ======================================================= + This pipeline is a Nextflow version of the HiC-Pro pipeline for Hi-C data processing. + See https://github.com/nservant/HiC-Pro for details. + Usage: The typical command for running the pipeline is as follows: @@ -30,13 +33,13 @@ def helpMessage() { nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -profile docker Mandatory arguments: - --reads Path to input data (must be surrounded with quotes) + --readsPath Path to input data (must be surrounded with quotes) --genome Name of iGenomes reference -profile Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, awsbatch, test and more. Options: - --singleEnd Specifies that the input is single end reads + References If not specified in the configuration file or you wish to overwrite any of the references. --fasta Path to Fasta reference @@ -52,7 +55,7 @@ def helpMessage() { """.stripIndent() } -/* +/********************************************************** * SET UP CONFIGURATION VARIABLES */ @@ -64,17 +67,11 @@ if (params.help){ // TODO nf-core: Add any reference files that are needed // Configurable reference genomes -fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -if ( params.fasta ){ - fasta = file(params.fasta) - if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}" -} -// -// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY -// If you want to use the above in a process, define the following: -// input: -// file fasta from fasta -// +//fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false +//if ( params.fasta ){ +// fasta = file(params.fasta) +// if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}" +//} // Has the run name been specified by the user? @@ -98,30 +95,51 @@ if( workflow.profile == 'awsbatch') { ch_multiqc_config = Channel.fromPath(params.multiqc_config) ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") + + + +/********************************************************** + * SET UP CHANNELS + */ + +/* + * input read files + */ +Channel + .fromFilePairs( params.readPaths ) + .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } + .set { raw_reads_pairs } + +raw_reads = Channel.create() +raw_reads_2 = Channel.create() +Channel + .fromFilePairs( params.readPaths ) + .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } + + +// SPlit fastq files +// https://www.nextflow.io/docs/latest/operator.html#splitfastq + /* - * Create a channel for input read files + * Other input channels */ - if(params.readPaths){ - if(params.singleEnd){ - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_trimming } - } else { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { read_files_fastqc; read_files_trimming } - } - } else { - Channel - .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { read_files_fastqc; read_files_trimming } - } +// Bowtie2 Index +bwt2_file = file("${params.bwt2_index}.1.bt2") +if( !bwt2_file.exists() ) exit 1, "Reference genome Bowtie 2 not found: ${params.bwt2_index}" +bwt2_index = Channel.value( "${params.bwt2_index}" ) + +// Restriction fragment +res_frag_file = Channel.value( "${params.restriction_fragment_bed}" ) + +// Chromosome size +chr_size = Channel.value( "${params.chromosome_size}" ) + + + +/********************************************************** + * SET UP LOGS + */ // Header log info log.info """======================================================= @@ -140,7 +158,7 @@ summary['Run Name'] = custom_runName ?: workflow.runName // TODO nf-core: Report custom parameters here summary['Reads'] = params.reads summary['Fasta Ref'] = params.fasta -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +//summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' summary['Max Memory'] = params.max_memory summary['Max CPUs'] = params.max_cpus summary['Max Time'] = params.max_time @@ -191,43 +209,174 @@ process get_software_versions { file 'software_versions_mqc.yaml' into software_versions_yaml script: - // TODO nf-core: Get all tools to print their version number here - """ + """ echo $workflow.manifest.version > v_pipeline.txt echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - multiqc --version > v_multiqc.txt + bowtie2 --version > v_bowtie2.txt + python --version > v_python.txt + samtools --version > v_samtools.txt scrape_software_versions.py > software_versions_mqc.yaml """ } +/**************************************************** + * MAIN WORKFLOW + */ /* - * STEP 1 - FastQC - */ -process fastqc { - tag "$name" - publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} + * STEP 1 - Two-steps Reads Mapping +*/ - input: - set val(name), file(reads) from read_files_fastqc +raw_reads = raw_reads.concat( raw_reads_2 ) + +process bowtie2_end_to_end { + tag "$prefix" + input: + set val(sample), file(reads) from raw_reads + val bt2_index from bwt2_index + + output: + set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end + set val(prefix), file("${prefix}.bam") into end_to_end_bam + + script: + prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ + def bwt2_opts = params.bwt2_opts_end2end + """ + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x ${bt2_index} \\ + --un ${prefix}_unmap.fastq \\ + -U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam + """ +} - output: - file "*_fastqc.{zip,html}" into fastqc_results +process trim_reads { + tag "$prefix" + input: + set val(prefix), file(reads) from unmapped_end_to_end - script: - """ - fastqc -q $reads - """ + output: + set val(prefix), file("${prefix}_trimmed.fastq") into trimmed_reads + + script: + """ + cutsite_trimming --fastq $reads \\ + --cutsite params.ligation_motifs \\ + --out ${prefix}_trimmed.fastq + """ +} + +process bowtie2_on_trimmed_reads { + tag "$prefix" + input: + set val(prefix), file(reads) from trimmed_reads + val bt2_index from bwt2_index + + output: + set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam + + script: + prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ + def bwt2_opts = params.bwt2_opts_trimmed + """ + bowtie2 --rg-id BMG --rg SM:${prefix} \\ + ${bwt2_opts} \\ + -p ${task.cpus} \\ + -x ${bt2_index} \\ + -U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam + """ +} + +process merge_mapping_steps{ + tag "$bam1 + $bam2" + input: + set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ) + + output: + set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam + + script: + sample = prefix.toString() - ~/(_R1)?(_R2)?(_val_1)?(_val_2)?$/ + """ + samtools merge -@ ${task.cpus} \\ + -f ${prefix}_bwt2merged.bam \\ + ${bam1} ${bam2} + + samtools sort -@ ${task.cpus} -m 800M \\ + -n -T /tmp/ \\ + -o ${prefix}_bwt2merged.sorted.bam \\ + ${prefix}_bwt2merged.bam + + mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam + """ } +process combine_mapped_files{ + tag "$sample = $r1_prefix + $r2_prefix" + input: + set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple() + + output: + set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam + + script: + r1_bam = aligned_bam[0] + r1_prefix = r1_bam.toString() - ~/_bwt2merged.bam$/ + r2_bam = aligned_bam[1] + r2_prefix = r2_bam.toString() - ~/_bwt2merged.bam$/ + """ + mergeSAM.py -f ${r1_bam} -r ${r2_bam} -o ${sample}_bwt2pairs.bam + """ +} /* - * STEP 2 - MultiQC - */ + * STEP2 - DETECT VALID PAIRS +*/ + +process get_valid_interaction{ + tag "$sample" + input: + set val(sample), file(pe_bam) from paired_bam + val frag_file from res_frag_file + + output: + set val(sample), file("*.validPairs") into valid_pairs + + script: + """ + mapped_2hic_fragments.py -f ${frag_file} -r ${pe_bam} + """ +} + + +/* + * STEP3 - BUILD MATRIX +*/ + +process build_contact_maps{ + tag "$sample" + input: + set val(sample), file(vpairs) from valid_pairs + val chrsize from chr_size + + output: + set val(sample), file("*.matrix") into matrix_file + + script: + """ + build_matrix --matrix-format upper --binsize 1000000 --chrsizes ${chrsize} --ifile ${vpairs} --oprefix ${sample}_1000000 + """ + +} + + +/* + // STEP 2 - MultiQC + process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' @@ -252,10 +401,8 @@ process multiqc { } +// STEP 3 - Output Description HTML -/* - * STEP 3 - Output Description HTML - */ process output_documentation { publishDir "${params.outdir}/Documentation", mode: 'copy' @@ -270,7 +417,7 @@ process output_documentation { markdown_to_html.r $output_docs results_description.html """ } - +*/ /* diff --git a/nextflow.config b/nextflow.config index 5f363c6..0e1bb10 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,7 +18,6 @@ params { // Workflow flags // TODO nf-core: Specify your pipeline's command line flags reads = "data/*{1,2}.fastq.gz" - singleEnd = false outdir = './results' // Boilerplate options @@ -42,6 +41,8 @@ includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config" +includeConfig 'conf/hicpro.config' + profiles { awsbatch { includeConfig 'conf/awsbatch.config' } conda { process.conda = "$baseDir/environment.yml" } -- GitLab