1
0
mirror of https://github.com/pcvolkmer/fastq-tools.git synced 2025-09-13 05:02:53 +00:00

feat: add build-in gzip decompression

This commit is contained in:
2025-08-11 20:15:26 +02:00
parent 09c573f2f2
commit 8f2e74a9c2
4 changed files with 66 additions and 10 deletions

View File

@@ -12,3 +12,4 @@ regex = "1.11"
clap = { version = "4.5", features = ["color", "derive"]} clap = { version = "4.5", features = ["color", "derive"]}
console = "0.16" console = "0.16"
itertools = "0.14" itertools = "0.14"
flate2 = "1.1"

View File

@@ -6,6 +6,20 @@ Application to show information about and scramble FASTQ files to provide non-se
This application provides the following subcommands This application provides the following subcommands
```
Usage: fastq-tools [OPTIONS] <COMMAND>
Commands:
info Show information about input
scramble Scramble input data
help Print this message or the help of the given subcommand(s)
Options:
-d, --decompress decompress input as gzip compressed data
-h, --help Print help
-V, --version Print version
```
### Info ### Info
To show information about compressed FASTQ files use: To show information about compressed FASTQ files use:
@@ -14,6 +28,12 @@ To show information about compressed FASTQ files use:
cat file_fastq.gz | gzip -d | fastq-tools info cat file_fastq.gz | gzip -d | fastq-tools info
``` ```
To use build-in decompression of input data, use the `--decompress`/`-d` option:
```shell
cat file_fastq.gz | fastq-tools --decompress info
```
This will result in output like This will result in output like
![Info subcommand](docs/info_subcommand.jpg) ![Info subcommand](docs/info_subcommand.jpg)
@@ -26,4 +46,10 @@ To scramble compressed FASTQ files use:
cat file_fastq.gz | gzip -d | fastq-tools scramble | gzip > scrambled_fastq.gz cat file_fastq.gz | gzip -d | fastq-tools scramble | gzip > scrambled_fastq.gz
``` ```
This will scramble headers and sequences and write the output into `scrambled_fastq.gz`. This will scramble headers and sequences and write the output into `scrambled_fastq.gz`.
To use build-in decompression of input data, use the `--decompress`/`-d` option:
```shell
cat file_fastq.gz | fastq-tools -d scramble | gzip > scrambled_fastq.gz
```

View File

@@ -6,10 +6,18 @@ use clap::{Parser, Subcommand};
pub struct Args { pub struct Args {
#[command(subcommand)] #[command(subcommand)]
pub(crate) command: Command, pub(crate) command: Command,
#[arg(
short = 'd',
long = "decompress",
help = "decompress input as gzip compressed data"
)]
pub(crate) decompress: bool,
} }
#[derive(Subcommand)] #[derive(Subcommand)]
pub enum Command { pub enum Command {
#[command(about = "Show information about input")]
Info, Info,
#[command(about = "Scramble input data")]
Scramble, Scramble,
} }

View File

@@ -5,8 +5,10 @@ use crate::cli::{Args, Command};
use crate::fastq::{Header, Pair}; use crate::fastq::{Header, Pair};
use clap::Parser; use clap::Parser;
use console::Style; use console::Style;
use flate2::read::GzDecoder;
use itertools::Itertools; use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::io::{BufRead, BufReader};
fn scramble_sequence(value: &str, seed: u32) -> String { fn scramble_sequence(value: &str, seed: u32) -> String {
let ahead_1 = Regex::new(r"T([ACG])").unwrap(); let ahead_1 = Regex::new(r"T([ACG])").unwrap();
@@ -53,22 +55,37 @@ fn scramble_sequence(value: &str, seed: u32) -> String {
fn main() { fn main() {
let args = Args::parse(); let args = Args::parse();
let stdin = std::io::stdin();
match &args.command { match &args.command {
Command::Info => info(), Command::Info => {
Command::Scramble => scramble(), if args.decompress {
let gz_decoder = GzDecoder::new(stdin);
let buf_reader = BufReader::new(gz_decoder);
info(buf_reader)
} else {
info(BufReader::new(stdin))
}
}
Command::Scramble => {
if args.decompress {
let gz_decoder = GzDecoder::new(stdin);
let buf_reader = BufReader::new(gz_decoder);
scramble(buf_reader)
} else {
scramble(BufReader::new(stdin))
}
}
} }
println!() println!()
} }
fn scramble() { fn scramble(mut reader: impl BufRead) {
let stdin = std::io::stdin();
let mut buf = String::new(); let mut buf = String::new();
let mut line = 1; let mut line = 1;
while let Ok(n) = reader.read_line(&mut buf) {
while let Ok(n) = stdin.read_line(&mut buf) {
if n == 0 { if n == 0 {
break; break;
} }
@@ -88,8 +105,7 @@ fn scramble() {
} }
} }
fn info() { fn info(mut reader: impl BufRead) {
let stdin = std::io::stdin();
let mut buf = String::new(); let mut buf = String::new();
let mut headers = vec![]; let mut headers = vec![];
@@ -101,7 +117,7 @@ fn info() {
let error_style = Style::new().bold().red(); let error_style = Style::new().bold().red();
let mut line = 1; let mut line = 1;
while let Ok(n) = stdin.read_line(&mut buf) { while let Ok(n) = reader.read_line(&mut buf) {
if n == 0 { if n == 0 {
break; break;
} }
@@ -136,6 +152,11 @@ fn info() {
buf.clear(); buf.clear();
} }
if line == 1 {
println!("{}", error_style.apply_to("🔥 No valid input"));
return;
}
if line % 4 != 1 { if line % 4 != 1 {
println!( println!(
"{}", "{}",