From 09c573f2f2bdc05af30cf548870f47aadcd73134 Mon Sep 17 00:00:00 2001 From: Paul-Christian Volkmer Date: Mon, 11 Aug 2025 15:36:20 +0200 Subject: [PATCH] feat: implement info subcommand --- Cargo.toml | 2 + README.md | 16 +++- docs/info_subcommand.jpg | Bin 0 -> 16769 bytes src/fastq.rs | 30 ++++++- src/main.rs | 176 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 218 insertions(+), 6 deletions(-) create mode 100644 docs/info_subcommand.jpg diff --git a/Cargo.toml b/Cargo.toml index 7445e32..cf65ad9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,5 @@ readme = "README.md" [dependencies] regex = "1.11" clap = { version = "4.5", features = ["color", "derive"]} +console = "0.16" +itertools = "0.14" diff --git a/README.md b/README.md index 0803fdf..5070713 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,24 @@ Application to show information about and scramble FASTQ files to provide non-se This application provides the following subcommands +### Info + +To show information about compressed FASTQ files use: + +```shell +cat file_fastq.gz | gzip -d | fastq-tools info +``` + +This will result in output like + +![Info subcommand](docs/info_subcommand.jpg) + ### Scramble To scramble compressed FASTQ files use: ```shell cat file_fastq.gz | gzip -d | fastq-tools scramble | gzip > scrambled_fastq.gz -``` \ No newline at end of file +``` + +This will scramble headers and sequences and write the output into `scrambled_fastq.gz`. \ No newline at end of file diff --git a/docs/info_subcommand.jpg b/docs/info_subcommand.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cee2ce6f06a4247b1a15436dad71db83571fbfd0 GIT binary patch literal 16769 zcmb`u2{fDEw?7<3QB$?lR8whHO*Ky`RYg%0HJ7v$C5^P^F{)~wilQ|ZRccC&F{`<% z#vmj}LQ!+fArWuB|M#x@-uJHi`@i?T>;EKst*nzLC(q}cea_kYv-dumKEnYn8tNJ7 z0cdDw08gnez!?Q_@g>yF-TB!|8+Bnhc@-6E3;Vyd*s2SwD$7wD*u0&+TwFZWh3`L- zmOH}$bO5yH&i(VIUUbwqJtI9m9Uc7z28Q#DEEiZf$jZjS#>{;2%0-UL zoLpR7EbQDoS2%etadL6~(+LeN^*40%O!V|joUF{Ood4tN><55@5%8PlA}!4|z&Q>Y zS`M1C9)J)4KtoRj_z%W^yJ*f)pYc2cBhv+D>J9Z50q1CFY0uHo{`1t-yF;np1L!#D zFNw%$pTBJOjN#gAPWkYZ0!Gn$)ty}CV+1h;C$G0m7p`#g@bX>1A%0WhmZFlfimIBr z&V5}y{Raky7LP5htZi)VoS(b6y1j7s@b>Zb^A89Nig*_p6&({Bmzwq=JtOnur>w%F zFU4O=O23uW)YjEEAR3#RySjUN`}%(l{2E72OirPuXVCbC#Xn2SD}Ptlc6Rsn4-Sur z$0z^bq5;tU-?082*?+>tLB(~Bj*ga&;U8Qy=lrQc%RxskB6t3hwi&~-*O#x!hcj~C zODU-CWD->{CvZ7=ja|4Rrij13^AEKDh3vlv?Ct+AWd9EA|G|X@u+Y*_Cy$l`pameR zbUCt)6(H;4Hv`>To2ER0i5kX;%wH_D@8k_hHf39?G}=IOBD^z8X(+1R|8sF~;#RCy zdZ~Nu;EK!~2{@fomfF%N@&-7$&9s)~7E^ZyV2-WlH&(GH{2>)3Kgt| zXyeXASukX78({wszZBp%>puxg4Tz8WhS_-b zzL97vKJIP-?;b&srm^~t=uEzf!Giv;gKCqygLNq}#h_H+6|{fKcy+E_wO38EqbW2P zq){($1y#ER&yY`?+J>$dw;wsk=YvyzTR8J6ue8L)=@&sut_A4GC2Reut1M`F`1qUtema&fgE@yh!7M zb4%+DV(6Ala%3H?7n+%e=-VoP!-Fdw9bSZHxsinP{9aJj1V|r}n_2{R?8E3`av=)U z4Mgp4#Q46;#NbB+>E*9!?%hUtIEQ|3(PqOMJw{7@!jHGJuUL%@>r>M+tum5Z>-X9+ zv7zC8e5D;kttBXtPhpW1(;6S^G`yK$t-2oJuo9tp`3%sdP1x%dRGzFi-H@i$XNEu| z5PSKLI$-8gO0yl^8Lk7I~nf1d$f_PK`G4L}H-p$SD z72qKI+wofTP;GwMplmJMlUXFWl|?D`!9XVNv(jgwERC@O5x?%PiLyGniLgp? zj8tO-_?xIm?Iko{~4$F)$o$$dDR^kmeR*y|Pf=qsh;k}ZAF&W61HK+gzam+uTx zG>5Y9!gyhV?r8C@Q?TQX(pE}E2r$z;jl@mTw2xJK222!#JsI2;8H7NHt638S_Du%d z9dT{KOO+GsjHeXD$EP`d!yEBobluzGp}eT@X5?h!uy=S`@3UL!flcNlgO~e`hwZ`Q zOTl*xm*V36XH?d<>L`f0mjRlQBn7h3j!JapL7PI??wmV{>9v@Y%xBx{yS%9`Hd%K! z)-2oawj{~ZT%G);$w~~`##x>LlI!w$jY~a(9D0kZiVFsd>Lj6JON1-l64=D(_Ynm6 zm;EpvqEI&8_>nDt7OqIe)qz$FR5!cH}+R$D;D=~E?(nx$?C^0hT%)iQEESN)?tLB z#gcdAI|O8x@WNx0ebYHIJ=8u&ELAqAnSokZPsH=ZHEK^x*}~FCd83$t#M{oy^8v*=%q?l7{r{ad~elj>Voai8klU5gHp|; zWaY-x>jaBrm0ZdSwC@+`&Y@L^TjUwQ?F=A9M0W432gycXZ$}oon>~4QFInThf^d}b zt3Al}QdDo?Dvv6V?*xg@?9XR;k$<6;+x-l{-GjDuhv#Oy-qTyEKqAsO~{CckSTWCp4oD$y?Uo%@HLWVbOBNS zFn}0I9a#MZR`%}7;IGEZr@4u<9E09VXMl>`XMuF7o1MeGo_e>0m_lfGj*?ncZnPB; z{lJ@Y56hBQbg!m;?0bH(XG_qj2%b?Hceovny`m|!ie;^1743%!k1GkNJOp=WXit(A zqoo-ish^f{ku2`@ zUccHnQy(s*IeR!1s!ft5c613UPqj15m@cU8Z(kwW9hpavIBhiI@vp`W#cZQEIM3QTxk+FA*V-N@O0 z&L77~0_N}yVK|r2(mAFQSJ5iEAj?#L%rP5aB4D@mXHe3J-|mSz2NQJ*`W?}slusC0 z411?2t#cTZZ!wkFNT~0tlxctsDujM8RZw)h5DmlW&(0+gOVrcTQsdUYCfC&;&-jiX z1|{}oJsH4;QtI&&$UJo-0A3sbCFdETn!zIm7va`lD8!j*+0i$x&)UW=Ou z$#z&GWakXf&F9v&&2TTo9*j4=NHm83v>&;kQ;u~Atf{NMQ`uc`5iwMckY^*0sn?sQ z#Zl*?su>!pXji_b`Cv{G3+f=ZIVKnE8vL{B_MwcF5uuB%{`eTOb^*~B?laJet*vf( ztL7Qfagg^=Ryft6QEj^NXK&!2|G3=XWfX=G8)8^Vi1MgnvH&sS!GzD8Torkf!J6Vu^1x#j{C4pnwKl3*MiiFIQy#tnQq{epZlr(Pcu z^m9XMitp~u>0JHuO0ty*-r4Iyn|5K(V@l#GfudE!``C@7^{!WID8w~eJDaR2{Ai5* zG8*%Vfp@u;gm;St@-GNQz-|+?;>dRGtSuc=5T$s?y-?-gpT!*gw}38B&40mOdFe?q zO;oz?mfKB@wW-w~Anm2io9Bnzc{)|+j>x&KM-46iY!RAf=WN{^l9w_2Au1Y7-2(#{%)aY4*Nu9%wi%-7z6GoSF+5MT(usU|+79jU1nWMSNM`5G>WKm!=aQ?)q1k30G0kHR7LgwLNBz!FlW2cvUn!@58 znnYRu)z~(R_p|@X{;^vl=I4*)_Icv+)Du=HKbFI`3tJO%k15#!?a8BLvuIKYE zO`OTRcd{cm<20Ylx&!V5)mryp*=O9dq6%;%(m9~ki*``D^v`xv6*u7(#^c%7ofX$( zJmevE^P$3Sc|>aot>Y#KP!G_GkdBr_6uK>8-OD^ zTz?)oxZ#$g3(@v#?E+&N5JQd1F#U|Kb)%=hXVmNK)*B%_mIF(#^gWg}X~??7rUk8; z!Jhf6#NaB2HNk76_3;ljUT!2@d*HL178uF(;}r?HtPAO;);c??$?f) z*QKUywYzJ;KqdC@B)HO?(Iezb#r4X2Yb&P6?syG@61TW%4bL!XrY=uU6TrZqNveHh zM4VX=dW)seWcTnpC;8D+$>F6(P-xE!bBc(ReL+fG>cuTJ+IIG5Nh>(3eDT=2A~3=Mt26h8~wU>JC-P9CW1llg2?>?P`ihSbTe> zJ<2%wT`7DlIf5eA(Lhk?ZUKY8+AG(NUDbc1zg@rn@i{PgA@&n{S+&5BIT7N^9!=qM zhvMO^MpdiICsB1kGk0+L^tAip<)0i%^jQq9AaU2<^5IEp6)W+tmXG|r?9TwQu)%}T zF>~WD`MVutCY6%VUQb4Tho%%72Kh8e&CPQbK^+F)pySC8F~Jo z&{*0K+c5Vc*bs#m5rFER7Y+48J^r4`nIiLK2P*CH^dA- z5}h@)go>$?b{*H?XV!aX3@!PP{TnS*2F0We3_aetWR@LA1X-VMS%1I&=#9Y=n( zmrs$K7@2tbHK4Il_jf?){6~7fmGhNkAry3a11hd)pXYcHb?o9LIJjQ!KQM1p0o7R5kxf>lx|#8hrXWBC zeCT;4m7wAU#q8yfW2+s#*489#+$(@sKBxx9@1R57jH9ybiQsPY^=9Mtf=YHyJG-?* zWxjx(T{>fJ*YKIlftgyC&(2BU6(Kg8p@rlb+CHlwrIy**RZZ!JNhsv$V5?OkL<}rK zn_55=Z}7G_$W%4xwVUloNNwxjJPh6dRWD^3AAq-vDbluwO*qTH>atxDA*O_KtDUIC z3N&c$Z*q&(sGq!p9p=h~%v+{|V!=uLP|3akrGxy9A4-s5Ty(Tz{0k)zyF32@eG6(o z_N7c6V%8>Ova-aX`^AqZdDe7(aK_V|{lP@>ZXw=QU9U)>7W+H6VqeECD_$A5RHdlv zcbMgPX-6%GTO7wF^|``8vbmL53ss%$l=dntEI;ZDAZ>cCuuTJhL>AW~l)}WQEQ6-h z834~Q<3e?17;^svHtw0GEo~oD4!vty`7XohB&YQx zws!litS|c;^nB)}=|GqY_^*SfFLQt!v>LNryIYvjle*RH)X`$qHxK5k=ep&Uy?P4M zd!GzkWZO8_i#s9V^2!twMRC~Zg()$l&l!NjK>RQ%THs?UtM~7T>xgtud*Npyx|l{~ zxjy7jU`IEU7duFBTNsISHUc{cqIB&ui~_ixH)WDcCM1W%=e^2$A7757!#z*+_I3D^ zirM!IBWT?#Vy?@cD?Zvh127x_`|;5|cl7c4$%{;?X$($?;;E3o`g=*)d9MotSp#2&c%^rOz_!zREAH~O=e_DM`Hyl26+bJXUyqKvZ z%Pj)ki(+D*Ny*00&gCAoXRa@r>k@NG0Urck9)Z;!enl3rl zTZRdjvm$WoZB#j>5`notCmu2m1)hs zx0rd)j+|h@PQ;de&~Edj!Cqtg!08bv>v?g&USK^@{3Mdo%<92EYW*L@3Ka=-uOjlB zrINZ&>gQ8@rDUOE88sjFu04y=6W}#1_*r2mp_o<6T7LnZ-WY?qbhMI-Qo8&e_r zub8p3_iM6kLaAu`7@k(C=Nt)t3}eVzOx~p>49^%d`A$9?F1A6wuzu zHCq)Et?68S{m4xf2$0Nz*R=DJFOF_gJpwFUKpqmeoV~;Fcp{SIVU425q#4KsTC=1? zCd}&bFEMPv33~%o>XV{|S9j4Ahx}masvwT|>?H@&9H7?DVBh?eF_-bWHWx_6%yKGl z2GiOwbhGE{yj4Oge>6xUQ%NgA^G-nj4?&?!-7&W@Jo2k~+I3WP9jWPtIsR}!Y*eFp z>%v`EVJ@CNdVjeWl=C~Fjn0M2pphSh7kLtdt%4Fe0Y_<*HD{mNx|YHFTh5Lb*{)>b zGdF4>i`&crj$224Q_bu#Ax}_7_#xi?V>kP5np^WP*M8_^((CC2O_`^Qm5lJ)e%Wu^ zU$%EV(KuaBG@A3D-$q*HZa0*B$B%ZrFy#*j?zUi1x!*3^PDih$*_B!JS1&|U7>DT#jhAhkw&j)PfytlGr+|~=Bc9k(~smkkRpsD zF?s=(kN-Nk4d2*jZ*U*yfm%&jTs8t09AYF@e+-jdV@=+A#T%6cL7^iQp<24K7pTld z_~1#?qT`jFDg>_03S%}_&-=1!^C<8kz}MlIWxF1|iH=}Vo0Ij}8Q?;g)28!6*xR~9 zh?0GaeeUZmi-$?b`+0{WM^cZn%NvIWpALVK5UEWdNZw+l?SzS(egs;ScjFp;lDUa) zkB}A5r-~-3T)-V_#e2MkZo9}+8=+g1hIQBG>fOIkJO{U+j^*Erm<+L6ouC~flY~7G zEm=3SOWn1;XwE&Oc`Lj^uiU@HA@kAohaXE4N23fb);>*QbfvrKQ4&dZuOup1Hca?H z?%7MWgw9(41gyQ%p()`O@B2%)9ZQ^x0>pc@YOt}sgV6IjUx>BcnP^Bh*lbNjz8?2+ zur2FKm^AxORZDVtPx<{t2@`3LI#|$+j`OW2KH`=|A}LD*!B`CBN5{ThP9 z)OF{H;%jxC#-3?@ZTi6eN|#$@*_qATKFuD-_DILUpYDzl2p9O^$VrQ)ZejadV(?Ok zIFWe~#6N9|b|8L*PBV6)3|2BSLdOWTFXw}`Pg2iM{8HPrnvnO37BYz*gIe=`WZn)$GiWRhFzzQl6)+eXeD zzT12SfZH$Lu}=1xshczG)<(3YGm}Gi|YLcAgXe72mi*Tlwm-RT89JA^q96 zZ+m2N3nl1ez0um68fl9?0}#?fD|_)LGW&WaTVG%4RoSOaz-4PyE(|+}@<}H$c_UX4$ z$LY|tU;XKazTt@Nx#4ZCujiEy)E0w(lqSKy?o>bn-pV z<2JUYaLmX$+S;7~21(EyheEnQq}s>N@NGdx;8=*$ zM80X3#Ahk93`_mRm8Xy4B}(7$>~HL};zCsLDq-A1&I`$$r{8?qk^(3A0<-`*`H>n5M+?Y?*9GJ9IT^XFeF zIK~qeaZ}NY7~?DBru?1G3zdEb&Pc@osrK=*pAXD@Z@%Xb-C)C}5WBCQV|5@x2$@}Z zN`#F1^0hqK$vWOTuz%ih)SJvJk8)C2U}5N#Pk&Tt?mz#NS75-oWRm?McOM&Onms;u z!sKl@<^amv&%}9z$P$-u@N;D89fo&G&}eQ=;d~HBRZ(gG0(ae|m|}UI202;?`oS>& za_>vx<-?M(*2}mNx*3)3rAeR}ZY;nj)ML@cCrU!*y1PnSO`po<>}}@5p^!btL>!3a zuW~+ntIi^b18rQ^3HD}7FL6&Xd1hNxY;biz5Lq+` zeV#bh+<=To=@i6(=+6M|i5j@&l#MFjs4!e}z@2*t^6chu}x-qfd){d1fc94K-+*UUPY{6VAEyWYA$>;j4ajq(xV zCu{B~Qqj0944of42-vpr8lA^T77jA^!4^M=f>R#+k~c?PRhgR3=SWPRcLHH@>+pp+qfE3{%GfDC%(T z2?*=SqFPhzm)x_WBk_~GUF0;=1^g>9FKos#ku{oju4f)b4sG!1eRRFDGy6nRV?=_&SajDG zlh>8m8^%@z)Nn#47Vq=*Wnaa*Wxe293=RxylJcY#!5x&pO0|B`jX`? z!-R<@{XJ;GaLDUI5HEfGP+zq(Wcj66^1BQdPacl(rt+Jru$`H(A8rrrP3LAu$5CCH zN<>-w9D~`;VLIN}+Gxs`ze)N)ga0_>XNl3%gi0v%+Ck(l6GJi8A7zEzCW`*xlMN>; z_`5u-v>jCbj9#wkIuPvi+yjee^3_On{Qa4{M}6vGT83+_{kBB<#avoEI$-(eK{!xm z`)U{@-n>3NJ?DLRp~1uW)RMBK^p(l0?@NohB&*7o5HQ3U;Elr)zlQC8n9A-}av6n( z>_FuWqk%NIMCI*;r{72SqdFXdyO`aAWehS;Dfgj$+djyDfYtpo}HxT2CUcl~5@PQRHi%ZsIPjg+p-eEpMzF0{8 zGI{dej19oj6Az-J(r)a4Zs*Fu#gAQ03zdLIsTrK?6tvCqr1|Q$o!4P7ABY|7&Lt$8p2!~okORfla^%wbG z#n-j(zv{9jajQM$=M`n(HYf5fI*8(11@APCKLcHO=*%1{SO5{F6%nDmU&G6pBh*?{ z{K~ST(p>&+FpO34;JDa#Iz6Cq7)!8d4vQQqgt3LZB;1I1!zIf0@VQoh9mMa12~SAN zA~(!U)qet4D|>4nY}WM)U1H-1VnOi}#(7%#38cQfTN7~UXUUf1Uq|a&MAVgfCw5z5 z&WxS2)qUZyet=7lSyEpwaXwZz*i$-0kfBF&i3U}mtrqwYOhh+`yBqXQp15~!hdZX8 z)fL&T$vx_9;a}z`YS8XBk`Cnr5B~fk$N)HJaIl9)eNt0B0KpGVpJZ!*KASm46q2uH zQXQiU^-!J{=%M1%`500?gu(gxWM%c!Z(d)>HTsSChTT@T2*=QSsgSBtsm)h~akt+6 ztKBCk>HkEGXt(lzLVSVU9~(oeIuhZ>A<1gbo|m*|cH|{z@2z7-%!@;9%ZdG+^H)wY zHCe@DDT0^)bnv?=-8$qN%1RH(9fG^}L(_m^nX=s$QljhixMcHEe6R+EH3V@-bufMQ zL<1)qQ6EM%Uu#jlASR;Y2+6+3drjT=kKGgV!XYuay3RQrj`uB@s;eoEOCQNB#y^Q!xQhS)}01!e}gGVsf&4tl}-SIccW~aH5f7xF_G_kzwd=Jn2 zX)@h>@W+!@Rdtxv4JhyRAqU}Ki{1`CYR~dF$mDAp;)$c%(U zRCKF`cud_i_OYfUqIT+Kh0m0xGsNNUle=3#pJBdlJ)!AWTF9pb3YC!`5ovdvyV^Oc zDaakVlf3lc^XV+5Hv%|H*naUOcKXY_UXxrtk<^c#@;n1@bufARV+t`QU&=D-n?XCq zp(m%O?8TZ)WGxigmmt2d6232er=Fc#ro4FM^nL2JVRP|Zm!~`lkz%w-pSbomPZLgD z4tjGZT=}yD`FNc@OV?igIAv7IQJ*YB-}dlI3&w04$Ra6BtW}o$kl-L??D`o1a3YD* z`bQD+Yq7mT7XqA+|LB3oKYH-e{MwM%O;@%x>OV*|^*@X|xqSGLkW^GelAaU(9CxFLSU7chV~^f=tHDw*TbA z?O#x}PrtdX9n#SOYALbUHhrrpKB2fNdH9-8ajBo`K%D_%#84FnM!X{WMvbsBVUBj3 zfmzPtoxl<@OS>^Q&Mi`$a1<@R!}f_YtAu>rY7~4$S(z@D-(&4sfWS;hZVRJJpXzcu zV_{){rPB3KOjts0UUP9+XAoa7C}`FFILPni5%#~u%{y+BZnN4u5=HPs*U+ri-Dtg2 zZO?<#Yxc~Q(nnXW*Mr?ruh6%-#nU?l$`?Qv0+0>_<)Z6H(FfqRYil-+^9Ir`63h3y zSrXLc^L0{e>V03R$GvV(ttDm3j)32n+4de-lkLOGj|E92wbe{M(7Ktb^9?&vjXg`sQopBm*~ zUcJ)K3rw1fw^N5SsZ3r!(pcYvBDE!o!T!+g8swk%X1ZKWAO9nV`5!*h7{=ZFe6Fn@ z4@#`(&ksKXaFS)pA&@Vazh-mlm>=J+|L!-hJ7^7cJ%485LQxb5Tk96nv`-C_9+B=E4! z{y~Nj`%H9CvPYjv`;Kv9Jw}`!PxYWU_)l}s0LphB2U%wrD?Bk4gCl~go$K!|hnDoZl!NkTDha zUnvVqtX%bsS^QfVx2nMDTW4Qp(kuGr!B2O@;D-yp zyretp9X{3UcC&FCyN{Sdb^P9$yp@MOoFW~Ac9+JngcW)agv>z=aSAO8MdY&`Kuq!7 z9)5P?YvMhcN_>B&@~6joH7dL|+<`@r0vEgy6R4i_Eg0izNq*;v{+sty2H2uB8lsWzM{S^q}F zVf~orpULHh2bcV0tOAN?N6>%1cZ-)8{%z-ip_GN#eL&|rONp0B-WG(AOs$!t6tFz{ z{@C!wu;9gn3S4z}8j{x{t}D#OFr-7e_22-#x;i~h7&|#4wQ1NpF2gt0NpeI`Pf&h$ zrBT12@VjKr`MKi!$lkie&G+_O2E0|y&z6*Tx)yWW^uWtTKc^&{#`L%x-Uvo0?Q?fw z(Ss(0E4jG($q*X$n0gSS!`#r9H7#_FJx@WS)41N)I~Tv%ezu@6AJmBdrQ@SZgNZHE z4GPE`^>4pzimf0_FdgHl4ttEYQHpLUHMm1Pf@lvWj*_FoY9IKIt&}+-7XI@0pW49h zw^Ed^W(dijSmzVr3uyr*RJw>PeBz%<5o+#e22=AX{L#jPMI5y+W)N;iN!}RFtcsQE z!IH<2s(HWqZ4`uF(8MzI41l{shJ)?xJ%d3TX8@86(ajbG6PhEg_gHO(7?nEexZ^({ za^j~U@%^Iwr5^*`f^v(5ZVTvn+*V=s{G`B&-4?gSYR$bhSRs-*X?2e82zDd{mB(B`}nh{Rtqs}K(M>O|CqNFc(k5D9t1IS!4gLbg{n{^!(>l3_lE2rIdiX?jm0_i&E3LAXUgG_x)TDN~*UF>z$jdSM^vo*5`zxI}v<$xEkw{60@ zr+5aqmh}YTfSoHLUmgR};}g##@kcSOkOd>PdK+n(XX8F|@)Ex#m1e(*9;Y^wF`^F! z(2Y{Ri-cWkLKLXuPjc&oiR~Cu0e@Sk-s&a9p@i}`FloqEiSQb%x+r#H8|It z4AACIY-c5Ab~d2=8s>!&lhOz48sMgJj$l3=DGPmmwF}Q5{l$A{&{U`14hBVl_*)qY zZ3Bdis0MMxYFa?><}Rz=@Tq36*j`1%aMrXvFbVU}Y)2T+{dvgE+&n)2R?GN&hH*#x zI;z>~>Lrh>^ZulLvh;5IP@XTYgXM*$Dlu>&K(sb%u(z4MXr05cVyU3L!CE(iJK?nd zE@|?^F6}vWzP-4E{HRC0)Fg^_r5(rOKrl;CRl0IS@eI>gmFG(O;@(v~wxuLR7NhTg z)X&e;l5bR69HH%PLri*7~$32Z{n0P&&Fb1CY>>{`8+*p*TA@(cqw9%3o&yEz zr(gKhfYID!yqfe*PLozzWI6mB^QuRJV|npnwj_}vt1}2-~Z#I z%NUgjPRi)-QGs!_Y7t^%S z>1`l>>Vqw*{6r9y(7W}coc1ldnE~jt7M)V;28=Plky=_onCp*uwpVWpbOE!$olB-a zJ*?baGXk>ph7`8k{&ik;xlZdj>p5QnceqHntWwwo;@fRA2gmteU`8L3xzRQ4AULzJ z*G7S1;_4e~&%g@txK|dC5Y6E2BXNk3g{f;Ds>39HFBGMN%cl=eu%$+ybG5qLg=#6I zQ_$Id2xR#S%o$*lID9XOW=;3G0l%)OD2se5{2iupmQORFCj`1|2txhHI$!lxwE;yhg##bKrI)Mb327E7}N{g*p-%-;BZalK|kFp zi23O-nB(&k{)))67hp~GIWHSh)83otbDAq)ay9X`)z#IPq!P|~+Leh zy5q5z4B&Qa8SEDxWIhTl*x}DGH%wR&Y(?piB=r{<$qIzmz-tXUT}6*4_2U5~P2EvH zlFFx!6>P2RQ0v6VCES-|KL~VY0E~gCOn%XFTRF8i^2ZDsn?_W&UaoWuP(JE?H3J=$ zs#^S>w>_j?6Ug=H@-s-k2+yB$=hYNFC`?mzLacw)PY*IvzN{DeA*+BBP~bhvll+@b zgHemiw0CMG*Wuw>926O1{$aq5bu>i?Y>mhh8&fKOTvJ{D;{K@nMp|IDPLc&|HqQk7 znaID$>Nu!-n$s#XejlwBE96)|^>P`Jr0p++7W<{%;oOKo z0V4JKhw$N1@@0(DR(>sDvVQxm;MH;I&yyi{zCb6gY=g$He{UYgzIL*=YRL6xEu!%X ze8Pb)EA}@xYBP2uy<(XGaWu*gvA*{pot5jT1>~E*4oz8BG}HCrrG`)L{wrZY z`hw^9{ycF%%DkcicxnWZ)SEte%PLb+js;t5y_k^olr8B4Vm{gF^?x@`sBB-Z8eYzY8_V(519F|lQ znex_Ddsxy4h2ihxXMSnHb?v7eI7-Pow)z%4h#m}6X=!R0Q(T4qsYe8tN8RHRj)d)} zYFsQW3eONtWiXdy=Yd;~Uk?_ViSL1WJcNcW#Pfq2OqjbeP?xT$kTiCu56o8WAA8>R zas8vA#JO{ep=A9;r6^N!Zvfvg~ zmG)&jN8aWMw-YU1piOei#M(8u<>muYQN|ey7{l?+!Hn2HJeGmW@hr?N*JO`&9Ki%- z>+A=_+>)QYb;o3JZ^bc{S$SP4Q)Z5}s`Knk0_zf%jwc{D>s{M4aLz4YVz&XYBjN=FNJXho{#{GzO3!=dS2O9t#XG|Zu73gT?4R$sr(I| z=g-DIoLr6TEsV3WZVcqNAR--_2*rq~=aW^K?kQybgq>T~eP)GM&!<(TljH*h%CE%T zG?0En&C7WDuNd`z+8+y)e!l7y2ap_X<=t876JjT8$;1{K7j>6BEWTj`XLmm`mCs~v zFt`yuM~#8d$^lCIZcC21dt2Q6CTZYsZfs5f72U2Hfa=N;%QDHVkgog`_&omf!rW55 zyPlZxZk@~K5nH>3j7Qk{kf5F9H$W|!_`(rt@4x@8cj$l7d+nxy0WzX(e4@&2HiTLQ z1pIe~X_}2;)Y=vo7E?pie>k-tTq%!o7*&dy^xR*sS)==w#<|mcap2mOmmHleKi;x> zosaXBsIvZEwLLU}As`ZcJLuOM&`F7#kX&H@f--OTbmH$Wn!sz zqB&vs(^tEhp>kEeyej7Uq2Cue$7i2!KIp#bT;~$=B!|UXNS94wD_pt$UQ)eB8CCwb NHu`_Ip%~6){x5rUsyzSz literal 0 HcmV?d00001 diff --git a/src/fastq.rs b/src/fastq.rs index deb2ab9..4d25d62 100644 --- a/src/fastq.rs +++ b/src/fastq.rs @@ -32,6 +32,34 @@ pub struct IlluminaHeader { } impl Header { + pub fn instrument_name(&self) -> String { + match self { + Header::Casava18(h) => h.instrument_name.clone(), + Header::Illumina(h) => h.instrument_name.clone(), + } + } + + pub fn flowcell_id(&self) -> Option { + match self { + Header::Casava18(h) => Some(h.flowcell_id.clone()), + Header::Illumina(_) => None, + } + } + + pub fn flowcell_lane(&self) -> u32 { + match self { + Header::Casava18(h) => h.flowcell_lane, + Header::Illumina(h) => h.flowcell_lane, + } + } + + pub fn pair_member(&self) -> Pair { + match self { + Header::Casava18(h) => h.pair_member.clone(), + Header::Illumina(h) => h.pair_member.clone(), + } + } + pub fn scramble(self) -> Self { fn number(value: u32) -> u32 { value % 3 + value % 17 + value % 271 + value % 911 @@ -213,7 +241,7 @@ impl FromStr for Header { } } -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Pair { PairedEnd = 1, MatePair = 2, diff --git a/src/main.rs b/src/main.rs index 769560a..4c98b95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,8 +2,10 @@ mod cli; mod fastq; use crate::cli::{Args, Command}; -use crate::fastq::Header; +use crate::fastq::{Header, Pair}; use clap::Parser; +use console::Style; +use itertools::Itertools; use regex::Regex; fn scramble_sequence(value: &str, seed: u32) -> String { @@ -53,9 +55,7 @@ fn main() { let args = Args::parse(); match &args.command { - Command::Info => { - println!("Not implemented yet"); - } + Command::Info => info(), Command::Scramble => scramble(), } @@ -87,3 +87,171 @@ fn scramble() { buf.clear(); } } + +fn info() { + let stdin = std::io::stdin(); + let mut buf = String::new(); + + let mut headers = vec![]; + let mut read_lens = vec![]; + let mut quality_lens = vec![]; + + let headline_style = Style::new().bold(); + let info_style = Style::new().bold().blue(); + let error_style = Style::new().bold().red(); + + let mut line = 1; + while let Ok(n) = stdin.read_line(&mut buf) { + if n == 0 { + break; + } + + if buf.starts_with("@") { + if let Ok(header) = buf.parse::
() { + headers.push(header) + } else { + println!( + "{}", + error_style.apply_to(format!("🔥 Invalid header at line {}", line)) + ); + } + } else if buf.starts_with("+") { + // ignore optional description + } else if line % 4 == 0 { + // check if quality values differs from sequence values + if Some(&buf.trim().len()) != read_lens.last() { + println!( + "{}", + error_style + .apply_to(format!("🔥 Invalid quality string length at line {}", line)) + ); + return; + } + quality_lens.push(buf.trim().len()); + } else if line % 4 == 2 { + read_lens.push(buf.trim().len()); + } + + line += 1; + buf.clear(); + } + + if line % 4 != 1 { + println!( + "{}", + error_style.apply_to("🔥 File contains invalid or incomplete sequences") + ); + return; + } + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to(format!("Found {} complete sequence sets", headers.len())) + ); + + // Instruments + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to("Unique instrument name(s):") + ); + println!( + "{}", + headers + .iter() + .map(|header| header.instrument_name()) + .sorted() + .chunk_by(|value| value.clone()) + .into_iter() + .map(|g| format!(" {} ({})", g.0, g.1.count())) + .collect::>() + .join("\n") + ); + + // Flowcell IDs + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to("Flowcell ID(s):") + ); + println!( + "{}", + headers + .iter() + .filter_map(|header| header.flowcell_id()) + .sorted() + .chunk_by(|value| value.clone()) + .into_iter() + .map(|g| format!(" {} ({})", g.0, g.1.count())) + .collect::>() + .join("\n") + ); + + // Flowcell Lanes + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to("Flowcell lane(s):") + ); + + println!( + "{}", + headers + .iter() + .map(|header| header.flowcell_lane()) + .sorted() + .chunk_by(|value| value.to_string()) + .into_iter() + .map(|g| format!(" {} ({})", g.0, g.1.count())) + .collect::>() + .join("\n") + ); + + // Read Orders + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to("Read order(s):") + ); + + println!( + "{}", + headers + .iter() + .map(|header| match header.pair_member() { + Pair::PairedEnd => "R1", + Pair::MatePair => "R2", + }) + .sorted() + .chunk_by(|value| value.to_string()) + .into_iter() + .map(|g| format!(" {} ({})", g.0, g.1.count())) + .collect::>() + .join("\n") + ); + + // Read Lengths + + println!( + "{} {}", + info_style.apply_to("🛈 "), + headline_style.apply_to("Read length(s):") + ); + + println!( + "{}", + read_lens + .iter() + .sorted() + .chunk_by(|value| value.to_string()) + .into_iter() + .map(|g| format!(" {} ({})", g.0, g.1.count())) + .collect::>() + .join("\n") + ) +}