diff --git a/src/fastq.rs b/src/fastq.rs index 2ec90e0..0fa48c4 100644 --- a/src/fastq.rs +++ b/src/fastq.rs @@ -2,7 +2,11 @@ use crate::scramble_sequence; use std::fmt::Display; use std::str::FromStr; -pub struct Header { +pub enum Header { + Casava18(Casava18Header), +} + +pub struct Casava18Header { instrument_name: String, run_id: u32, flowcell_id: String, @@ -33,45 +37,54 @@ impl Header { ((value.len() as u8) + value.chars().map(|c| c as u8 & 2).sum::()) % 97 } - Header { - instrument_name: format!("TEST{:0<2}", (string_sum(&self.instrument_name) * 17) % 97), - run_id: number(self.run_id), - flowcell_id: string(&self.flowcell_id), - flowcell_lane: number(self.flowcell_lane), - tile_number: number(self.tile_number), - x: self.x + string_sum(&self.instrument_name) as u32, - y: self.y + string_sum(&self.instrument_name) as u32, - pair_member: self.pair_member, - filtered: self.filtered, - control_bits: self.control_bits, - index_sequence: scramble_sequence(&self.index_sequence, 1), + match self { + Header::Casava18(header) => Header::Casava18(Casava18Header { + instrument_name: format!( + "TEST{:0<2}", + (string_sum(&header.instrument_name) * 17) % 97 + ), + run_id: number(header.run_id), + flowcell_id: string(&header.flowcell_id), + flowcell_lane: number(header.flowcell_lane), + tile_number: number(header.tile_number), + x: header.x + string_sum(&header.instrument_name) as u32, + y: header.y + string_sum(&header.instrument_name) as u32, + pair_member: header.pair_member, + filtered: header.filtered, + control_bits: header.control_bits, + index_sequence: scramble_sequence(&header.index_sequence, 1), + }), } } } impl Display for Header { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "@{}:{}:{}:{}:{}:{}:{} {}:{}:{}:{}", - self.instrument_name, - self.run_id, - self.flowcell_id, - self.flowcell_lane, - self.tile_number, - self.x, - self.y, - match self.pair_member { - Pair::PairedEnd => "1", - Pair::MatePair => "2", - }, - match self.filtered { - Filtered::Y => "Y", - Filtered::N => "N", - }, - self.control_bits, - self.index_sequence - ) + match self { + Header::Casava18(header) => { + write!( + f, + "@{}:{}:{}:{}:{}:{}:{} {}:{}:{}:{}", + header.instrument_name, + header.run_id, + header.flowcell_id, + header.flowcell_lane, + header.tile_number, + header.x, + header.y, + match header.pair_member { + Pair::PairedEnd => "1", + Pair::MatePair => "2", + }, + match header.filtered { + Filtered::Y => "Y", + Filtered::N => "N", + }, + header.control_bits, + header.index_sequence + ) + } + } } } @@ -80,57 +93,58 @@ impl FromStr for Header { fn from_str(s: &str) -> Result { if !s.starts_with("@") { - return Err("Invalid Casava 1.8+ header".to_string()); + return Err("Cannot parse FASTQ header".to_string()); } let parts = s .split(" ") .flat_map(|main_part| main_part.split(":").collect::>()) .collect::>(); - if parts.len() != 11 { - return Err("Invalid Casava 1.8+ header".to_string()); + + if parts.len() == 11 { + return Ok(Header::Casava18(Casava18Header { + instrument_name: parts[0][1..].to_string(), + run_id: parts[1] + .parse() + .expect("Valid Casava 1.8+ header: Number value required"), + flowcell_id: parts[2].into(), + flowcell_lane: parts[3] + .parse() + .expect("Valid Casava 1.8+ header: Number value required"), + tile_number: parts[4] + .parse() + .expect("Valid Casava 1.8+ header: Number value required"), + x: parts[5] + .parse() + .expect("Valid Casava 1.8+ header: Number value required"), + y: parts[6] + .parse() + .expect("Valid Casava 1.8+ header: Number value required"), + pair_member: match parts[7] { + "1" => Pair::PairedEnd, + "2" => Pair::MatePair, + _ => return Err("Invalid Casava 1.8+ header".to_string()), + }, + filtered: match parts[8] { + "Y" => Filtered::Y, + "N" => Filtered::N, + _ => return Err("Invalid Casava 1.8+ header".to_string()), + }, + control_bits: if parts[9] + .parse::() + .expect("Valid Casava 1.8+ header: Even value for control bits required") + % 2 + == 0 + { + parts[9].parse().expect("Number") + } else { + return Err("Invalid Casava 1.8+ header".to_string()); + }, + index_sequence: parts[10].into(), + })); } - Ok(Header { - instrument_name: parts[0][1..].to_string(), - run_id: parts[1] - .parse() - .expect("Valid Casava 1.8+ header: Number value required"), - flowcell_id: parts[2].into(), - flowcell_lane: parts[3] - .parse() - .expect("Valid Casava 1.8+ header: Number value required"), - tile_number: parts[4] - .parse() - .expect("Valid Casava 1.8+ header: Number value required"), - x: parts[5] - .parse() - .expect("Valid Casava 1.8+ header: Number value required"), - y: parts[6] - .parse() - .expect("Valid Casava 1.8+ header: Number value required"), - pair_member: match parts[7] { - "1" => Pair::PairedEnd, - "2" => Pair::MatePair, - _ => return Err("Invalid Casava 1.8+ header".to_string()), - }, - filtered: match parts[8] { - "Y" => Filtered::Y, - "N" => Filtered::N, - _ => return Err("Invalid Casava 1.8+ header".to_string()), - }, - control_bits: if parts[9] - .parse::() - .expect("Valid Casava 1.8+ header: Even value for control bits required") - % 2 - == 0 - { - parts[9].parse().expect("Number") - } else { - return Err("Invalid Casava 1.8+ header".to_string()); - }, - index_sequence: parts[10].into(), - }) + Err("Cannot parse FASTQ header".to_string()) } } @@ -154,22 +168,22 @@ mod tests { #[test] fn should_return_parsed_header() { let given = "@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG"; - let actual = given.parse::
(); - assert!(actual.is_ok()); - - let actual = actual.unwrap(); - assert_eq!(actual.instrument_name, "EAS139"); - assert_eq!(actual.run_id, 136); - assert_eq!(actual.flowcell_id, "FC706VJ"); - assert_eq!(actual.flowcell_lane, 2); - assert_eq!(actual.tile_number, 2104); - assert_eq!(actual.x, 15343); - assert_eq!(actual.y, 197393); - assert_eq!(actual.pair_member, Pair::PairedEnd); - assert_eq!(actual.filtered, Filtered::Y); - assert_eq!(actual.control_bits, 18); - assert_eq!(actual.index_sequence, "ATCACG"); + if let Ok(Header::Casava18(actual)) = given.parse::
() { + assert_eq!(actual.instrument_name, "EAS139"); + assert_eq!(actual.run_id, 136); + assert_eq!(actual.flowcell_id, "FC706VJ"); + assert_eq!(actual.flowcell_lane, 2); + assert_eq!(actual.tile_number, 2104); + assert_eq!(actual.x, 15343); + assert_eq!(actual.y, 197393); + assert_eq!(actual.pair_member, Pair::PairedEnd); + assert_eq!(actual.filtered, Filtered::Y); + assert_eq!(actual.control_bits, 18); + assert_eq!(actual.index_sequence, "ATCACG"); + } else { + panic!("Failed to parse FASTQ header"); + } } #[test]