diff --git a/src/fastq.rs b/src/fastq.rs index 0fa48c4..c6df866 100644 --- a/src/fastq.rs +++ b/src/fastq.rs @@ -4,6 +4,7 @@ use std::str::FromStr; pub enum Header { Casava18(Casava18Header), + Illumina(IlluminaHeader), } pub struct Casava18Header { @@ -20,6 +21,16 @@ pub struct Casava18Header { index_sequence: String, } +pub struct IlluminaHeader { + instrument_name: String, + flowcell_lane: u32, + tile_number: u32, + x: u32, + y: u32, + index_number: String, + pair_member: Pair, +} + impl Header { pub fn scramble(self) -> Self { fn number(value: u32) -> u32 { @@ -54,6 +65,18 @@ impl Header { control_bits: header.control_bits, index_sequence: scramble_sequence(&header.index_sequence, 1), }), + Header::Illumina(header) => Header::Illumina(IlluminaHeader { + instrument_name: format!( + "TEST{:0<2}", + (string_sum(&header.instrument_name) * 17) % 97 + ), + flowcell_lane: number(header.flowcell_lane), + tile_number: number(header.tile_number), + x: header.x + string_sum(&header.instrument_name) as u32, + y: header.y + string_sum(&header.instrument_name) as u32, + index_number: header.index_number, + pair_member: header.pair_member, + }), } } } @@ -84,6 +107,22 @@ impl Display for Header { header.index_sequence ) } + Header::Illumina(header) => { + write!( + f, + "@{}:{}:{}:{}:{}#{}/{}", + header.instrument_name, + header.flowcell_lane, + header.tile_number, + header.x, + header.y, + header.index_number, + match header.pair_member { + Pair::PairedEnd => "1", + Pair::MatePair => "2", + }, + ) + } } } } @@ -98,6 +137,8 @@ impl FromStr for Header { let parts = s .split(" ") + .flat_map(|s| s.split("#").collect::>()) + .flat_map(|s| s.split("/").collect::>()) .flat_map(|main_part| main_part.split(":").collect::>()) .collect::>(); @@ -142,6 +183,30 @@ impl FromStr for Header { }, index_sequence: parts[10].into(), })); + } else if parts.len() == 7 { + return Ok(Header::Illumina(IlluminaHeader { + instrument_name: parts[0][1..].to_string(), + flowcell_lane: parts[1] + .parse() + .expect("Valid Illumina header: Number value required"), + tile_number: parts[2] + .parse() + .expect("Valid Illumina header: Number value required"), + x: parts[3] + .parse() + .expect("Valid Illumina header: Number value required"), + y: parts[4] + .parse() + .expect("Valid Illumina header: Number value required"), + index_number: parts[5] + .parse() + .expect("Valid Illumina header: Number value required"), + pair_member: match parts[6] { + "1" => Pair::PairedEnd, + "2" => Pair::MatePair, + _ => return Err("Invalid Illumina header".to_string()), + }, + })); } Err("Cannot parse FASTQ header".to_string()) @@ -166,7 +231,7 @@ mod tests { use crate::{scramble_sequence, Header}; #[test] - fn should_return_parsed_header() { + fn should_return_parsed_casava18_header() { let given = "@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG"; if let Ok(Header::Casava18(actual)) = given.parse::
() { @@ -197,18 +262,6 @@ mod tests { assert_eq!(given, actual.to_string()); } - #[test] - fn should_return_scrambled_header_string() { - let given = "@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG"; - let actual = given.parse::
(); - let expected = "@TEST73:273:CQEAACM:8:503:15353:197403 1:Y:18:GAGCGC"; - - assert!(actual.is_ok()); - - let actual = actual.unwrap().scramble(); - assert_eq!(expected, actual.to_string().as_str()); - } - #[test] fn should_return_scrambled_sequence_string_seed1() { let given = "GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT"; @@ -226,4 +279,33 @@ mod tests { assert_eq!(expected, actual); } + + #[test] + fn should_return_parsed_illumna_header() { + let given = "@HWUSI-EAS100R:6:73:941:1973#0/1"; + let actual = given.parse::
(); + + if let Ok(Header::Illumina(actual)) = actual { + assert_eq!(actual.instrument_name, "HWUSI-EAS100R"); + assert_eq!(actual.flowcell_lane, 6); + assert_eq!(actual.tile_number, 73); + assert_eq!(actual.x, 941); + assert_eq!(actual.y, 1973); + assert_eq!(actual.index_number, "0"); + assert_eq!(actual.pair_member, Pair::PairedEnd); + } else { + panic!("Failed to parse FASTQ header"); + } + } + + #[test] + fn should_return_illumina_header_string() { + let given = "@HWUSI-EAS100R:6:73:941:1973#0/1"; + let actual = given.parse::
(); + + assert!(actual.is_ok()); + + let actual = actual.unwrap(); + assert_eq!(given, actual.to_string()); + } }