mirror of
https://github.com/pcvolkmer/fastq-tools.git
synced 2025-09-13 13:12:52 +00:00
feat: add build-in gzip decompression
This commit is contained in:
@@ -12,3 +12,4 @@ regex = "1.11"
|
|||||||
clap = { version = "4.5", features = ["color", "derive"]}
|
clap = { version = "4.5", features = ["color", "derive"]}
|
||||||
console = "0.16"
|
console = "0.16"
|
||||||
itertools = "0.14"
|
itertools = "0.14"
|
||||||
|
flate2 = "1.1"
|
||||||
|
26
README.md
26
README.md
@@ -6,6 +6,20 @@ Application to show information about and scramble FASTQ files to provide non-se
|
|||||||
|
|
||||||
This application provides the following subcommands
|
This application provides the following subcommands
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: fastq-tools [OPTIONS] <COMMAND>
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
info Show information about input
|
||||||
|
scramble Scramble input data
|
||||||
|
help Print this message or the help of the given subcommand(s)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-d, --decompress decompress input as gzip compressed data
|
||||||
|
-h, --help Print help
|
||||||
|
-V, --version Print version
|
||||||
|
```
|
||||||
|
|
||||||
### Info
|
### Info
|
||||||
|
|
||||||
To show information about compressed FASTQ files use:
|
To show information about compressed FASTQ files use:
|
||||||
@@ -14,6 +28,12 @@ To show information about compressed FASTQ files use:
|
|||||||
cat file_fastq.gz | gzip -d | fastq-tools info
|
cat file_fastq.gz | gzip -d | fastq-tools info
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To use build-in decompression of input data, use the `--decompress`/`-d` option:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cat file_fastq.gz | fastq-tools --decompress info
|
||||||
|
```
|
||||||
|
|
||||||
This will result in output like
|
This will result in output like
|
||||||
|
|
||||||

|

|
||||||
@@ -27,3 +47,9 @@ cat file_fastq.gz | gzip -d | fastq-tools scramble | gzip > scrambled_fastq.gz
|
|||||||
```
|
```
|
||||||
|
|
||||||
This will scramble headers and sequences and write the output into `scrambled_fastq.gz`.
|
This will scramble headers and sequences and write the output into `scrambled_fastq.gz`.
|
||||||
|
|
||||||
|
To use build-in decompression of input data, use the `--decompress`/`-d` option:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cat file_fastq.gz | fastq-tools -d scramble | gzip > scrambled_fastq.gz
|
||||||
|
```
|
@@ -6,10 +6,18 @@ use clap::{Parser, Subcommand};
|
|||||||
pub struct Args {
|
pub struct Args {
|
||||||
#[command(subcommand)]
|
#[command(subcommand)]
|
||||||
pub(crate) command: Command,
|
pub(crate) command: Command,
|
||||||
|
#[arg(
|
||||||
|
short = 'd',
|
||||||
|
long = "decompress",
|
||||||
|
help = "decompress input as gzip compressed data"
|
||||||
|
)]
|
||||||
|
pub(crate) decompress: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Subcommand)]
|
#[derive(Subcommand)]
|
||||||
pub enum Command {
|
pub enum Command {
|
||||||
|
#[command(about = "Show information about input")]
|
||||||
Info,
|
Info,
|
||||||
|
#[command(about = "Scramble input data")]
|
||||||
Scramble,
|
Scramble,
|
||||||
}
|
}
|
||||||
|
39
src/main.rs
39
src/main.rs
@@ -5,8 +5,10 @@ use crate::cli::{Args, Command};
|
|||||||
use crate::fastq::{Header, Pair};
|
use crate::fastq::{Header, Pair};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use console::Style;
|
use console::Style;
|
||||||
|
use flate2::read::GzDecoder;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use std::io::{BufRead, BufReader};
|
||||||
|
|
||||||
fn scramble_sequence(value: &str, seed: u32) -> String {
|
fn scramble_sequence(value: &str, seed: u32) -> String {
|
||||||
let ahead_1 = Regex::new(r"T([ACG])").unwrap();
|
let ahead_1 = Regex::new(r"T([ACG])").unwrap();
|
||||||
@@ -53,22 +55,37 @@ fn scramble_sequence(value: &str, seed: u32) -> String {
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
|
let stdin = std::io::stdin();
|
||||||
|
|
||||||
match &args.command {
|
match &args.command {
|
||||||
Command::Info => info(),
|
Command::Info => {
|
||||||
Command::Scramble => scramble(),
|
if args.decompress {
|
||||||
|
let gz_decoder = GzDecoder::new(stdin);
|
||||||
|
let buf_reader = BufReader::new(gz_decoder);
|
||||||
|
info(buf_reader)
|
||||||
|
} else {
|
||||||
|
info(BufReader::new(stdin))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Command::Scramble => {
|
||||||
|
if args.decompress {
|
||||||
|
let gz_decoder = GzDecoder::new(stdin);
|
||||||
|
let buf_reader = BufReader::new(gz_decoder);
|
||||||
|
scramble(buf_reader)
|
||||||
|
} else {
|
||||||
|
scramble(BufReader::new(stdin))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println!()
|
println!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scramble() {
|
fn scramble(mut reader: impl BufRead) {
|
||||||
let stdin = std::io::stdin();
|
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
|
|
||||||
let mut line = 1;
|
let mut line = 1;
|
||||||
|
while let Ok(n) = reader.read_line(&mut buf) {
|
||||||
while let Ok(n) = stdin.read_line(&mut buf) {
|
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -88,8 +105,7 @@ fn scramble() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn info() {
|
fn info(mut reader: impl BufRead) {
|
||||||
let stdin = std::io::stdin();
|
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
|
|
||||||
let mut headers = vec![];
|
let mut headers = vec![];
|
||||||
@@ -101,7 +117,7 @@ fn info() {
|
|||||||
let error_style = Style::new().bold().red();
|
let error_style = Style::new().bold().red();
|
||||||
|
|
||||||
let mut line = 1;
|
let mut line = 1;
|
||||||
while let Ok(n) = stdin.read_line(&mut buf) {
|
while let Ok(n) = reader.read_line(&mut buf) {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -136,6 +152,11 @@ fn info() {
|
|||||||
buf.clear();
|
buf.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if line == 1 {
|
||||||
|
println!("{}", error_style.apply_to("🔥 No valid input"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if line % 4 != 1 {
|
if line % 4 != 1 {
|
||||||
println!(
|
println!(
|
||||||
"{}",
|
"{}",
|
||||||
|
Reference in New Issue
Block a user