feat: add compare sub command for csv file and database

2025-07-02 00:02:55 +00:00 · 2024-02-28 15:15:23 +01:00
parent 560e93bcb7
commit 7d4b0ac085
6 changed files with 282 additions and 30 deletions
--- a/README.md
+++ b/README.md
@ -60,7 +60,7 @@ Options:

 ## Export aus der Onkostar-Datenbank

-Die Anwendung ist in der Lage, die Spalten
+Die Anwendung ist in der Lage, mit dem Befehl `export` die Spalten

 * `pat_id`: Patienten-ID (optional über Parameter `--pat-id`)
 * `cond_id`: Condition-ID
@ -77,3 +77,10 @@ Options:
      --pat-id               Export mit Klartext-Patienten-ID
  -o, --output <OUTPUT>      Ausgabedatei
 ```
+
+## Vergleich CSV-Datei für OPAL und Onkostar-Datenbank
+
+Die Anwendung kann auch die Conditions in der CSV-Datei mit der Onkostar-Datenbank direkt vergleichen.
+
+Hierzu kann der Befehl `compare` genutzt werden. Dieser verwendet alle Optionen für die Datenbank und die Option `--file`
+für die CSV-Datei und gibt eine Übersicht auf der Konsole aus.
--- a/src/cli.rs
+++ b/src/cli.rs
@ -89,4 +89,30 @@ pub enum SubCommand {
        #[arg(short = 'y', long, help = "Jahr der Diagnose")]
        year: String,
    },
+    #[command(about = "Abgleich zwischen CSV-Datei für OPAL und Onkostar-Datenbank")]
+    Compare {
+        #[arg(short = 'D', long, help = "Datenbank-Name", default_value = "onkostar")]
+        database: String,
+        #[arg(
+            short = 'h',
+            long,
+            help = "Datenbank-Host",
+            default_value = "localhost"
+        )]
+        host: String,
+        #[arg(short = 'P', long, help = "Datenbank-Host", default_value = "3306")]
+        port: u16,
+        #[arg(
+            short = 'p',
+            long,
+            help = "Passwort. Wenn nicht angegeben, wird danach gefragt"
+        )]
+        password: Option<String>,
+        #[arg(short = 'u', long, help = "Benutzername")]
+        user: String,
+        #[arg(short, long, help = "CSV-File für Opal")]
+        file: String,
+        #[arg(short = 'y', long, help = "Jahr der Diagnose")]
+        year: String,
+    },
 }
--- a/src/common.rs
+++ b/src/common.rs
@ -19,10 +19,11 @@
 */

 use itertools::Itertools;
+use serde::{Deserialize, Serialize};

 pub struct Icd10GroupSize {
    pub name: String,
-    pub size: usize
+    pub size: usize,
 }

 pub struct Record {
@ -30,16 +31,34 @@ pub struct Record {
    pub icd10_code: String,
 }

+pub struct DiffRecord {
+    pub condition_id: String,
+    pub csv_icd10_code: String,
+    pub db_icd10_code: String,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ExportData {
+    #[serde(rename = "pat_id")]
+    pub pat_id: Option<String>,
+    #[serde(rename = "cond_id")]
+    pub condition_id: String,
+    #[serde(rename = "conditiondate")]
+    pub diagnosis_date: String,
+    #[serde(rename = "condcodingcode")]
+    pub icd_10_code: String,
+}
+
 pub struct Check;

 impl Check {
-
    /// Collects all records by grouping by ICD10 group
    pub fn collect(records: &[Record]) -> Result<Vec<Icd10GroupSize>, ()> {
-        let items = records.iter()
+        let items = records
+            .iter()
            .map(|record| Record {
                condition_id: record.condition_id.to_string(),
-                icd10_code: Self::map_icd_code(&record.icd10_code)
+                icd10_code: Self::map_icd_code(&record.icd10_code),
            })
            .sorted_by_key(|record| record.icd10_code.to_string())
            .group_by(|record| record.icd10_code.to_string())
@ -47,20 +66,29 @@ impl Check {
            .map(|(icd10, group)| (icd10, group.collect::<Vec<_>>()))
            .map(|record| Icd10GroupSize {
                name: record.0,
-                size: record.1.iter().count()
-            }).collect::<Vec<_>>();
+                size: record.1.iter().count(),
+            })
+            .collect::<Vec<_>>();

        Ok(items)
    }

+    pub fn is_relevant(code: &str) -> bool {
+        match Self::map_icd_code(code).as_str() {
+            "Other" => false,
+            _ => true,
+        }
+    }
+
    fn map_icd_code(code: &str) -> String {
        let icd10 = match code {
            "D39.1" | "D09.0" | "D41.4" => code,
-            _ => code.split('.').collect::<Vec<_>>().first().unwrap()
+            _ => code.split('.').collect::<Vec<_>>().first().unwrap(),
        };

        match icd10 {
-            "C00" | "C01" | "C02" | "C03" | "C04" | "C05" | "C06" | "C07" | "C08" | "C09" | "C10" | "C11" | "C12" | "C13" | "C14" => "C00-C14",
+            "C00" | "C01" | "C02" | "C03" | "C04" | "C05" | "C06" | "C07" | "C08" | "C09"
+            | "C10" | "C11" | "C12" | "C13" | "C14" => "C00-C14",
            "C15" => "C15",
            "C16" => "C16",
            "C18" | "C19" | "C20" | "C21" => "C18-C21",
@ -84,7 +112,8 @@ impl Check {
            "C82" | "C83" | "C84" | "C85" | "C86" | "C87" | "C88" | "C96" => "C82-C88, C96",
            "C90" => "C90",
            "C91" | "C92" | "C93" | "C94" | "C95" => "C91-C95",
-            _ => "Other"
-        }.to_string()
+            _ => "Other",
+        }
+        .to_string()
    }
 }
--- a/src/database.rs
+++ b/src/database.rs
@ -20,25 +20,12 @@

 use mysql::prelude::Queryable;
 use mysql::{params, Pool};
-use serde::Serialize;

-use crate::common::Icd10GroupSize;
+use crate::common::{ExportData, Icd10GroupSize};
 use crate::resources::{EXPORT_QUERY, SQL_QUERY};

 pub struct DatabaseSource(String);

-#[derive(Serialize, Debug)]
-pub struct ExportData {
-    #[serde(rename = "pat_id")]
-    pat_id: Option<String>,
-    #[serde(rename = "cond_id")]
-    condition_id: String,
-    #[serde(rename = "condition_date")]
-    diagnosis_date: String,
-    #[serde(rename = "condcodingcode")]
-    icd_10_code: String,
-}
-
 impl DatabaseSource {
    pub fn new(database: &str, host: &str, password: &str, port: u16, user: &str) -> Self {
        let password = urlencoding::encode(password);
--- a/src/main.rs
+++ b/src/main.rs
@ -24,9 +24,10 @@ use std::path::Path;
 use clap::Parser;
 use console::{style, Term};
 use csv::Writer;
+use itertools::Itertools;

 use crate::cli::{Cli, SubCommand};
-use crate::common::Icd10GroupSize;
+use crate::common::{Check, DiffRecord, Icd10GroupSize};
 use crate::database::DatabaseSource;

 mod cli;
@ -154,6 +155,196 @@ fn main() -> Result<(), Box<dyn Error>> {
                .to_string(),
            );
        }
+        SubCommand::Compare {
+            database,
+            host,
+            password,
+            port,
+            user,
+            file,
+            year,
+        } => {
+            let password = if let Some(password) = password {
+                password
+            } else {
+                let password = dialoguer::Password::new()
+                    .with_prompt("Password")
+                    .interact()
+                    .unwrap_or_default();
+                let _ = term.clear_last_lines(1);
+                password
+            };
+
+            let year = if year.len() == 4 {
+                year
+            } else {
+                format!("2{:0>3}", year)
+            };
+
+            let _ = term.write_line(
+                &style(format!("Warte auf Daten für das Diagnosejahr {}...", year))
+                    .blue()
+                    .to_string(),
+            );
+
+            let db = DatabaseSource::new(&database, &host, &password, port, &user);
+            let db_items = db
+                .export(&year, false)
+                .map_err(|_e| "Fehler bei Zugriff auf die Datenbank")?;
+
+            let _ = term.clear_last_lines(1);
+
+            let csv_items = opal::OpalCsvFile::export(Path::new(&file))
+                .map_err(|_e| "Kann Datei nicht lesen")?;
+
+            let mut not_in_csv = db_items
+                .iter()
+                .filter(|db_item| {
+                    !csv_items
+                        .iter()
+                        .map(|csv_item| &csv_item.condition_id)
+                        .contains(&db_item.condition_id)
+                })
+                .collect::<Vec<_>>();
+
+            let _ = term.write_line(
+                &style(format!(
+                    "{} Conditions aus der Datenbank für das Jahr {} - aber nicht in Datei '{}'",
+                    not_in_csv.len(),
+                    year,
+                    file
+                ))
+                .green()
+                .to_string(),
+            );
+
+            let _ = term.write_line(&format!("{:<64}   {:<5}", "Condition-ID", "ICD10"));
+
+            not_in_csv.sort_by_key(|item| item.condition_id.to_string());
+
+            not_in_csv
+                .iter()
+                .for_each(|item| match Check::is_relevant(&item.icd_10_code) {
+                    true => {
+                        let _ = term.write_line(&format!(
+                            "{}   {:<5}",
+                            item.condition_id,
+                            style(&item.icd_10_code).bold().red()
+                        ));
+                    }
+                    false => {
+                        let _ = term.write_line(&format!(
+                            "{}   {:<5}",
+                            item.condition_id, item.icd_10_code
+                        ));
+                    }
+                });
+
+            let mut not_in_db = csv_items
+                .iter()
+                .filter(|csv_item| {
+                    !db_items
+                        .iter()
+                        .map(|db_item| &db_item.condition_id)
+                        .contains(&csv_item.condition_id)
+                })
+                .collect::<Vec<_>>();
+
+            let _ = term.write_line(
+                &style(format!(
+                    "{} Conditions aus Datei '{}' - aber nicht in der Datenbank für das Jahr {}",
+                    not_in_db.len(),
+                    file,
+                    year
+                ))
+                .green()
+                .to_string(),
+            );
+
+            let _ = term.write_line(&format!("{:<64}   {:<5}", "Condition-ID", "ICD10"));
+
+            not_in_db.sort_by_key(|item| item.condition_id.to_string());
+
+            not_in_db
+                .iter()
+                .for_each(|item| match Check::is_relevant(&item.icd_10_code) {
+                    true => {
+                        let _ = term.write_line(&format!(
+                            "{}   {:<5}",
+                            item.condition_id,
+                            style(&item.icd_10_code).bold().red()
+                        ));
+                    }
+                    false => {
+                        let _ = term.write_line(&format!(
+                            "{}   {:<5}",
+                            item.condition_id, item.icd_10_code
+                        ));
+                    }
+                });
+
+            let mut icd10diff = csv_items
+                .iter()
+                .filter(|csv_item| {
+                    db_items
+                        .iter()
+                        .map(|db_item| &db_item.condition_id)
+                        .contains(&csv_item.condition_id)
+                })
+                .filter(|csv_item| {
+                    !db_items
+                        .iter()
+                        .map(|db_item| format!("{}-{}", db_item.condition_id, db_item.icd_10_code))
+                        .contains(&format!(
+                            "{}-{}",
+                            csv_item.condition_id, csv_item.icd_10_code
+                        ))
+                })
+                .map(|csv_item| DiffRecord {
+                    condition_id: csv_item.condition_id.to_string(),
+                    csv_icd10_code: csv_item.icd_10_code.to_string(),
+                    db_icd10_code: db_items
+                        .iter()
+                        .filter(|db_item| db_item.condition_id == csv_item.condition_id)
+                        .collect_vec()
+                        .first()
+                        .unwrap()
+                        .icd_10_code
+                        .to_string(),
+                })
+                .collect::<Vec<_>>();
+
+            let _ = term.write_line(
+                &style(format!(
+                    "{} Conditions mit Unterschied im ICD10-Code",
+                    icd10diff.len()
+                ))
+                .green()
+                .to_string(),
+            );
+
+            icd10diff.sort_by_key(|item| item.condition_id.to_string());
+
+            let _ = term.write_line(&format!(
+                "{:<64}   {:<5}   {:<5}",
+                "Condition-ID", "CSV", "DB"
+            ));
+
+            icd10diff.iter().for_each(|item| {
+                let _ = term.write_line(&format!(
+                    "{}   {}   {}",
+                    item.condition_id,
+                    match Check::is_relevant(&item.csv_icd10_code) {
+                        true => style(format!("{:<5}", item.csv_icd10_code)).bold().red(),
+                        _ => style(format!("{:<5}", item.csv_icd10_code)),
+                    },
+                    match Check::is_relevant(&item.db_icd10_code) {
+                        true => style(format!("{:<5}", item.db_icd10_code)).bold().red(),
+                        _ => style(format!("{:<5}", item.db_icd10_code)),
+                    }
+                ));
+            });
+        }
    }

    Ok(())
--- a/src/opal.rs
+++ b/src/opal.rs
@ -23,7 +23,7 @@ use std::path::Path;
 use csv::Reader;
 use serde::Deserialize;

-use crate::common::{Check, Icd10GroupSize, Record};
+use crate::common::{Check, ExportData, Icd10GroupSize, Record};

 #[derive(Deserialize)]
 pub struct OpalRecord {
@ -39,16 +39,28 @@ impl OpalCsvFile {
    pub fn check(path: &Path) -> Result<Vec<Icd10GroupSize>, ()> {
        let mut reader = Reader::from_path(path).expect("open file");

-        let items = reader.deserialize::<OpalRecord>()
+        let items = reader
+            .deserialize::<OpalRecord>()
            .filter(|record| record.is_ok())
            .map(|record| record.unwrap())
            .map(|record| Record {
                condition_id: record.cond_id,
-                icd10_code: record.cond_coding_code
+                icd10_code: record.cond_coding_code,
            })
            .collect::<Vec<_>>();

        Check::collect(&items)
    }

+    pub fn export(path: &Path) -> Result<Vec<ExportData>, ()> {
+        let mut reader = Reader::from_path(path).expect("open file");
+
+        let items = reader
+            .deserialize::<ExportData>()
+            .filter(|record| record.is_ok())
+            .map(|record| record.unwrap())
+            .collect::<Vec<_>>();
+
+        Ok(items)
+    }
 }