diff --git a/src/lkrexport.rs b/src/lkrexport.rs index 66e0945..811ce45 100644 --- a/src/lkrexport.rs +++ b/src/lkrexport.rs @@ -125,9 +125,22 @@ impl Meldung { } } - pub fn no_linebreak(&self) -> String { + pub fn sanitized_xml_string(&self) -> String { let re = Regex::new(r"\n\s*").unwrap(); - re.replace_all(&self.raw_value, "").trim().to_string() + let content = re.replace_all(&self.raw_value, "").trim().to_string(); + + let re = Regex::new(r"<[^>]+/>").unwrap(); + if re.is_match(&content) { + let mut c = content.to_string(); + re.find_iter(&content) + .map(|m| m.as_str().to_string().replace('<', "").replace("/>", "")) + .for_each(|tag| { + c = c.replace(&format!("<{}/>", tag), &format!("<{}>", tag, tag)); + }); + return c; + } + + content } } @@ -235,8 +248,23 @@ mod tests { }; assert_eq!( - meldung.no_linebreak(), + meldung.sanitized_xml_string(), "TestInhalt 3".to_string() ); } + + #[test] + fn should_get_meldung_without_self_closing_tags() { + let meldung = Meldung { + raw_value: + " \n \n Test\n \n \n\n" + .into(), + }; + + assert_eq!( + meldung.sanitized_xml_string(), + "Test" + .to_string() + ); + } } diff --git a/src/main.rs b/src/main.rs index 7225609..cabfb6f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -572,8 +572,8 @@ fn main() -> Result<(), Box> { .unwrap_or(&Meldung { raw_value: String::new(), }) - .no_linebreak() - != meldung.no_linebreak() + .sanitized_xml_string() + != meldung.sanitized_xml_string() }) .map(|(_, meldung)| meldung.id().unwrap_or("?".into())) .collect_vec();