mirror of
https://github.com/pcvolkmer/fastq-tools.git
synced 2025-09-13 13:12:52 +00:00
refactor: add FASTQ header subtypes
This commit is contained in:
82
src/fastq.rs
82
src/fastq.rs
@@ -2,7 +2,11 @@ use crate::scramble_sequence;
|
|||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
pub struct Header {
|
pub enum Header {
|
||||||
|
Casava18(Casava18Header),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Casava18Header {
|
||||||
instrument_name: String,
|
instrument_name: String,
|
||||||
run_id: u32,
|
run_id: u32,
|
||||||
flowcell_id: String,
|
flowcell_id: String,
|
||||||
@@ -33,46 +37,55 @@ impl Header {
|
|||||||
((value.len() as u8) + value.chars().map(|c| c as u8 & 2).sum::<u8>()) % 97
|
((value.len() as u8) + value.chars().map(|c| c as u8 & 2).sum::<u8>()) % 97
|
||||||
}
|
}
|
||||||
|
|
||||||
Header {
|
match self {
|
||||||
instrument_name: format!("TEST{:0<2}", (string_sum(&self.instrument_name) * 17) % 97),
|
Header::Casava18(header) => Header::Casava18(Casava18Header {
|
||||||
run_id: number(self.run_id),
|
instrument_name: format!(
|
||||||
flowcell_id: string(&self.flowcell_id),
|
"TEST{:0<2}",
|
||||||
flowcell_lane: number(self.flowcell_lane),
|
(string_sum(&header.instrument_name) * 17) % 97
|
||||||
tile_number: number(self.tile_number),
|
),
|
||||||
x: self.x + string_sum(&self.instrument_name) as u32,
|
run_id: number(header.run_id),
|
||||||
y: self.y + string_sum(&self.instrument_name) as u32,
|
flowcell_id: string(&header.flowcell_id),
|
||||||
pair_member: self.pair_member,
|
flowcell_lane: number(header.flowcell_lane),
|
||||||
filtered: self.filtered,
|
tile_number: number(header.tile_number),
|
||||||
control_bits: self.control_bits,
|
x: header.x + string_sum(&header.instrument_name) as u32,
|
||||||
index_sequence: scramble_sequence(&self.index_sequence, 1),
|
y: header.y + string_sum(&header.instrument_name) as u32,
|
||||||
|
pair_member: header.pair_member,
|
||||||
|
filtered: header.filtered,
|
||||||
|
control_bits: header.control_bits,
|
||||||
|
index_sequence: scramble_sequence(&header.index_sequence, 1),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Header {
|
impl Display for Header {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Header::Casava18(header) => {
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"@{}:{}:{}:{}:{}:{}:{} {}:{}:{}:{}",
|
"@{}:{}:{}:{}:{}:{}:{} {}:{}:{}:{}",
|
||||||
self.instrument_name,
|
header.instrument_name,
|
||||||
self.run_id,
|
header.run_id,
|
||||||
self.flowcell_id,
|
header.flowcell_id,
|
||||||
self.flowcell_lane,
|
header.flowcell_lane,
|
||||||
self.tile_number,
|
header.tile_number,
|
||||||
self.x,
|
header.x,
|
||||||
self.y,
|
header.y,
|
||||||
match self.pair_member {
|
match header.pair_member {
|
||||||
Pair::PairedEnd => "1",
|
Pair::PairedEnd => "1",
|
||||||
Pair::MatePair => "2",
|
Pair::MatePair => "2",
|
||||||
},
|
},
|
||||||
match self.filtered {
|
match header.filtered {
|
||||||
Filtered::Y => "Y",
|
Filtered::Y => "Y",
|
||||||
Filtered::N => "N",
|
Filtered::N => "N",
|
||||||
},
|
},
|
||||||
self.control_bits,
|
header.control_bits,
|
||||||
self.index_sequence
|
header.index_sequence
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromStr for Header {
|
impl FromStr for Header {
|
||||||
@@ -80,18 +93,16 @@ impl FromStr for Header {
|
|||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
if !s.starts_with("@") {
|
if !s.starts_with("@") {
|
||||||
return Err("Invalid Casava 1.8+ header".to_string());
|
return Err("Cannot parse FASTQ header".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
let parts = s
|
let parts = s
|
||||||
.split(" ")
|
.split(" ")
|
||||||
.flat_map(|main_part| main_part.split(":").collect::<Vec<_>>())
|
.flat_map(|main_part| main_part.split(":").collect::<Vec<_>>())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
if parts.len() != 11 {
|
|
||||||
return Err("Invalid Casava 1.8+ header".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Header {
|
if parts.len() == 11 {
|
||||||
|
return Ok(Header::Casava18(Casava18Header {
|
||||||
instrument_name: parts[0][1..].to_string(),
|
instrument_name: parts[0][1..].to_string(),
|
||||||
run_id: parts[1]
|
run_id: parts[1]
|
||||||
.parse()
|
.parse()
|
||||||
@@ -130,7 +141,10 @@ impl FromStr for Header {
|
|||||||
return Err("Invalid Casava 1.8+ header".to_string());
|
return Err("Invalid Casava 1.8+ header".to_string());
|
||||||
},
|
},
|
||||||
index_sequence: parts[10].into(),
|
index_sequence: parts[10].into(),
|
||||||
})
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
Err("Cannot parse FASTQ header".to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,11 +168,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn should_return_parsed_header() {
|
fn should_return_parsed_header() {
|
||||||
let given = "@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG";
|
let given = "@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG";
|
||||||
let actual = given.parse::<Header>();
|
|
||||||
|
|
||||||
assert!(actual.is_ok());
|
if let Ok(Header::Casava18(actual)) = given.parse::<Header>() {
|
||||||
|
|
||||||
let actual = actual.unwrap();
|
|
||||||
assert_eq!(actual.instrument_name, "EAS139");
|
assert_eq!(actual.instrument_name, "EAS139");
|
||||||
assert_eq!(actual.run_id, 136);
|
assert_eq!(actual.run_id, 136);
|
||||||
assert_eq!(actual.flowcell_id, "FC706VJ");
|
assert_eq!(actual.flowcell_id, "FC706VJ");
|
||||||
@@ -170,6 +181,9 @@ mod tests {
|
|||||||
assert_eq!(actual.filtered, Filtered::Y);
|
assert_eq!(actual.filtered, Filtered::Y);
|
||||||
assert_eq!(actual.control_bits, 18);
|
assert_eq!(actual.control_bits, 18);
|
||||||
assert_eq!(actual.index_sequence, "ATCACG");
|
assert_eq!(actual.index_sequence, "ATCACG");
|
||||||
|
} else {
|
||||||
|
panic!("Failed to parse FASTQ header");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Reference in New Issue
Block a user