feat: convert self-closing tags when sanitizing xml

This commit is contained in:
Paul-Christian Volkmer 2024-06-13 09:24:16 +02:00
parent 3520806e79
commit 07478cf6a3
2 changed files with 33 additions and 5 deletions

View File

@ -125,9 +125,22 @@ impl Meldung {
} }
} }
pub fn no_linebreak(&self) -> String { pub fn sanitized_xml_string(&self) -> String {
let re = Regex::new(r"\n\s*").unwrap(); let re = Regex::new(r"\n\s*").unwrap();
re.replace_all(&self.raw_value, "").trim().to_string() let content = re.replace_all(&self.raw_value, "").trim().to_string();
let re = Regex::new(r"<[^>]+/>").unwrap();
if re.is_match(&content) {
let mut c = content.to_string();
re.find_iter(&content)
.map(|m| m.as_str().to_string().replace('<', "").replace("/>", ""))
.for_each(|tag| {
c = c.replace(&format!("<{}/>", tag), &format!("<{}></{}>", tag, tag));
});
return c;
}
content
} }
} }
@ -235,8 +248,23 @@ mod tests {
}; };
assert_eq!( assert_eq!(
meldung.no_linebreak(), meldung.sanitized_xml_string(),
"<Test><Test2>TestInhalt 3</Test2></Test>".to_string() "<Test><Test2>TestInhalt 3</Test2></Test>".to_string()
); );
} }
#[test]
fn should_get_meldung_without_self_closing_tags() {
let meldung = Meldung {
raw_value:
" <Test>\n <Test2/>\n <Content>Test</Content>\n <Test3/>\n <Test2/>\n</Test>\n"
.into(),
};
assert_eq!(
meldung.sanitized_xml_string(),
"<Test><Test2></Test2><Content>Test</Content><Test3></Test3><Test2></Test2></Test>"
.to_string()
);
}
} }

View File

@ -572,8 +572,8 @@ fn main() -> Result<(), Box<dyn Error>> {
.unwrap_or(&Meldung { .unwrap_or(&Meldung {
raw_value: String::new(), raw_value: String::new(),
}) })
.no_linebreak() .sanitized_xml_string()
!= meldung.no_linebreak() != meldung.sanitized_xml_string()
}) })
.map(|(_, meldung)| meldung.id().unwrap_or("?".into())) .map(|(_, meldung)| meldung.id().unwrap_or("?".into()))
.collect_vec(); .collect_vec();