diff --git a/Cargo.toml b/Cargo.toml index cf65ad9..8b7754d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ regex = "1.11" clap = { version = "4.5", features = ["color", "derive"]} console = "0.16" itertools = "0.14" +flate2 = "1.1" diff --git a/README.md b/README.md index 5070713..4dcd62e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,20 @@ Application to show information about and scramble FASTQ files to provide non-se This application provides the following subcommands +``` +Usage: fastq-tools [OPTIONS] + +Commands: + info Show information about input + scramble Scramble input data + help Print this message or the help of the given subcommand(s) + +Options: + -d, --decompress decompress input as gzip compressed data + -h, --help Print help + -V, --version Print version +``` + ### Info To show information about compressed FASTQ files use: @@ -14,6 +28,12 @@ To show information about compressed FASTQ files use: cat file_fastq.gz | gzip -d | fastq-tools info ``` +To use build-in decompression of input data, use the `--decompress`/`-d` option: + +```shell +cat file_fastq.gz | fastq-tools --decompress info +``` + This will result in output like ![Info subcommand](docs/info_subcommand.jpg) @@ -26,4 +46,10 @@ To scramble compressed FASTQ files use: cat file_fastq.gz | gzip -d | fastq-tools scramble | gzip > scrambled_fastq.gz ``` -This will scramble headers and sequences and write the output into `scrambled_fastq.gz`. \ No newline at end of file +This will scramble headers and sequences and write the output into `scrambled_fastq.gz`. + +To use build-in decompression of input data, use the `--decompress`/`-d` option: + +```shell +cat file_fastq.gz | fastq-tools -d scramble | gzip > scrambled_fastq.gz +``` \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs index add105d..cf554da 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -6,10 +6,18 @@ use clap::{Parser, Subcommand}; pub struct Args { #[command(subcommand)] pub(crate) command: Command, + #[arg( + short = 'd', + long = "decompress", + help = "decompress input as gzip compressed data" + )] + pub(crate) decompress: bool, } #[derive(Subcommand)] pub enum Command { + #[command(about = "Show information about input")] Info, + #[command(about = "Scramble input data")] Scramble, } diff --git a/src/main.rs b/src/main.rs index 4c98b95..35326fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,8 +5,10 @@ use crate::cli::{Args, Command}; use crate::fastq::{Header, Pair}; use clap::Parser; use console::Style; +use flate2::read::GzDecoder; use itertools::Itertools; use regex::Regex; +use std::io::{BufRead, BufReader}; fn scramble_sequence(value: &str, seed: u32) -> String { let ahead_1 = Regex::new(r"T([ACG])").unwrap(); @@ -53,22 +55,37 @@ fn scramble_sequence(value: &str, seed: u32) -> String { fn main() { let args = Args::parse(); + let stdin = std::io::stdin(); match &args.command { - Command::Info => info(), - Command::Scramble => scramble(), + Command::Info => { + if args.decompress { + let gz_decoder = GzDecoder::new(stdin); + let buf_reader = BufReader::new(gz_decoder); + info(buf_reader) + } else { + info(BufReader::new(stdin)) + } + } + Command::Scramble => { + if args.decompress { + let gz_decoder = GzDecoder::new(stdin); + let buf_reader = BufReader::new(gz_decoder); + scramble(buf_reader) + } else { + scramble(BufReader::new(stdin)) + } + } } println!() } -fn scramble() { - let stdin = std::io::stdin(); +fn scramble(mut reader: impl BufRead) { let mut buf = String::new(); let mut line = 1; - - while let Ok(n) = stdin.read_line(&mut buf) { + while let Ok(n) = reader.read_line(&mut buf) { if n == 0 { break; } @@ -88,8 +105,7 @@ fn scramble() { } } -fn info() { - let stdin = std::io::stdin(); +fn info(mut reader: impl BufRead) { let mut buf = String::new(); let mut headers = vec![]; @@ -101,7 +117,7 @@ fn info() { let error_style = Style::new().bold().red(); let mut line = 1; - while let Ok(n) = stdin.read_line(&mut buf) { + while let Ok(n) = reader.read_line(&mut buf) { if n == 0 { break; } @@ -136,6 +152,11 @@ fn info() { buf.clear(); } + if line == 1 { + println!("{}", error_style.apply_to("🔥 No valid input")); + return; + } + if line % 4 != 1 { println!( "{}",