1
0
mirror of https://github.com/pcvolkmer/fastq-tools.git synced 2025-10-29 08:12:15 +00:00

refactor: extract grouped/count using sort,chunk,map,collect

This commit is contained in:
2025-09-13 23:34:42 +02:00
parent abd587d50f
commit 94c72778e2

View File

@@ -10,6 +10,7 @@ use console::Style;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use itertools::Itertools; use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::fmt::Display;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
use std::path::PathBuf; use std::path::PathBuf;
@@ -221,6 +222,18 @@ fn info(mut reader: impl BufRead) {
headline_style.apply_to(format!("Found {} complete sequence sets", headers.len())) headline_style.apply_to(format!("Found {} complete sequence sets", headers.len()))
); );
fn grouped_count<T>(it: impl Iterator<Item = T>) -> String
where
T: Display + Ord,
{
it.sorted()
.chunk_by(|value| value.to_string())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
}
// Instruments // Instruments
println!( println!(
@@ -230,15 +243,7 @@ fn info(mut reader: impl BufRead) {
); );
println!( println!(
"{}", "{}",
headers grouped_count(headers.iter().map(|header| header.instrument_name()))
.iter()
.map(|header| header.instrument_name())
.sorted()
.chunk_by(|value| value.clone())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
); );
// Flowcell IDs // Flowcell IDs
@@ -250,15 +255,7 @@ fn info(mut reader: impl BufRead) {
); );
println!( println!(
"{}", "{}",
headers grouped_count(headers.iter().filter_map(|header| header.flowcell_id()))
.iter()
.filter_map(|header| header.flowcell_id())
.sorted()
.chunk_by(|value| value.clone())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
); );
// Flowcell Lanes // Flowcell Lanes
@@ -268,18 +265,9 @@ fn info(mut reader: impl BufRead) {
info_style.apply_to("🛈 "), info_style.apply_to("🛈 "),
headline_style.apply_to("Flowcell lane(s):") headline_style.apply_to("Flowcell lane(s):")
); );
println!( println!(
"{}", "{}",
headers grouped_count(headers.iter().map(|header| header.flowcell_lane()))
.iter()
.map(|header| header.flowcell_lane())
.sorted()
.chunk_by(|value| value.to_string())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
); );
// Read Orders // Read Orders
@@ -289,21 +277,12 @@ fn info(mut reader: impl BufRead) {
info_style.apply_to("🛈 "), info_style.apply_to("🛈 "),
headline_style.apply_to("Read order(s):") headline_style.apply_to("Read order(s):")
); );
println!( println!(
"{}", "{}",
headers grouped_count(headers.iter().map(|header| match header.pair_member() {
.iter() Pair::PairedEnd => "R1",
.map(|header| match header.pair_member() { Pair::MatePair => "R2",
Pair::PairedEnd => "R1", }))
Pair::MatePair => "R2",
})
.sorted()
.chunk_by(|value| value.to_string())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
); );
// Read Lengths // Read Lengths
@@ -313,16 +292,5 @@ fn info(mut reader: impl BufRead) {
info_style.apply_to("🛈 "), info_style.apply_to("🛈 "),
headline_style.apply_to("Read length(s):") headline_style.apply_to("Read length(s):")
); );
println!("{}", grouped_count(read_lens.iter()));
println!(
"{}",
read_lens
.iter()
.sorted()
.chunk_by(|value| value.to_string())
.into_iter()
.map(|g| format!(" {} ({})", g.0, g.1.count()))
.collect::<Vec<String>>()
.join("\n")
)
} }