diff --git a/src/lkrexport.rs b/src/lkrexport.rs
index 66e0945..811ce45 100644
--- a/src/lkrexport.rs
+++ b/src/lkrexport.rs
@@ -125,9 +125,22 @@ impl Meldung {
}
}
- pub fn no_linebreak(&self) -> String {
+ pub fn sanitized_xml_string(&self) -> String {
let re = Regex::new(r"\n\s*").unwrap();
- re.replace_all(&self.raw_value, "").trim().to_string()
+ let content = re.replace_all(&self.raw_value, "").trim().to_string();
+
+ let re = Regex::new(r"<[^>]+/>").unwrap();
+ if re.is_match(&content) {
+ let mut c = content.to_string();
+ re.find_iter(&content)
+ .map(|m| m.as_str().to_string().replace('<', "").replace("/>", ""))
+ .for_each(|tag| {
+ c = c.replace(&format!("<{}/>", tag), &format!("<{}>{}>", tag, tag));
+ });
+ return c;
+ }
+
+ content
}
}
@@ -235,8 +248,23 @@ mod tests {
};
assert_eq!(
- meldung.no_linebreak(),
+ meldung.sanitized_xml_string(),
"TestInhalt 3".to_string()
);
}
+
+ #[test]
+ fn should_get_meldung_without_self_closing_tags() {
+ let meldung = Meldung {
+ raw_value:
+ " \n \n Test\n \n \n\n"
+ .into(),
+ };
+
+ assert_eq!(
+ meldung.sanitized_xml_string(),
+ "Test"
+ .to_string()
+ );
+ }
}
diff --git a/src/main.rs b/src/main.rs
index 7225609..cabfb6f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -572,8 +572,8 @@ fn main() -> Result<(), Box> {
.unwrap_or(&Meldung {
raw_value: String::new(),
})
- .no_linebreak()
- != meldung.no_linebreak()
+ .sanitized_xml_string()
+ != meldung.sanitized_xml_string()
})
.map(|(_, meldung)| meldung.id().unwrap_or("?".into()))
.collect_vec();