1
0
mirror of https://github.com/pcvolkmer/etl-processor.git synced 2025-04-19 17:26:51 +00:00

feat: add pseudonymization for patient IDs (#107)

This commit is contained in:
Paul-Christian Volkmer 2025-04-06 14:42:09 +02:00 committed by GitHub
parent c5c553f817
commit 8e3de6a220
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 328 additions and 156 deletions

View File

@ -21,12 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
import de.ukw.ccc.bwhc.dto.MtbFile import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.PatientId import dev.dnpm.etl.processor.PatientId
import dev.pcvolkmer.mv64e.mtb.Mtb
import org.apache.commons.codec.digest.DigestUtils import org.apache.commons.codec.digest.DigestUtils
/** Replaces patient ID with generated patient pseudonym /** Replaces patient ID with generated patient pseudonym
* *
* @param pseudonymizeService The pseudonymizeService to be used * @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing patient pseudonymes * @return The MTB file containing patient pseudonymes
*/ */
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) { infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
@ -49,7 +49,11 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
} }
this.lastGuidelineTherapies?.forEach { it.patient = patientPseudonym } this.lastGuidelineTherapies?.forEach { it.patient = patientPseudonym }
this.molecularPathologyFindings?.forEach { it.patient = patientPseudonym } this.molecularPathologyFindings?.forEach { it.patient = patientPseudonym }
this.molecularTherapies?.forEach { molecularTherapy -> molecularTherapy.history.forEach { it.patient = patientPseudonym } } this.molecularTherapies?.forEach { molecularTherapy ->
molecularTherapy.history.forEach {
it.patient = patientPseudonym
}
}
this.ngsReports?.forEach { it.patient = patientPseudonym } this.ngsReports?.forEach { it.patient = patientPseudonym }
this.previousGuidelineTherapies?.forEach { it.patient = patientPseudonym } this.previousGuidelineTherapies?.forEach { it.patient = patientPseudonym }
this.rebiopsyRequests?.forEach { it.patient = patientPseudonym } this.rebiopsyRequests?.forEach { it.patient = patientPseudonym }
@ -63,7 +67,6 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
* Creates new hash of content IDs with given prefix except for patient IDs * Creates new hash of content IDs with given prefix except for patient IDs
* *
* @param pseudonymizeService The pseudonymizeService to be used * @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing rehashed content IDs * @return The MTB file containing rehashed content IDs
*/ */
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) { infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
@ -224,3 +227,89 @@ infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService)
} }
} }
} }
/** Replaces patient ID with generated patient pseudonym
*
* @since 0.11.0
*
* @param pseudonymizeService The pseudonymizeService to be used
* @return The MTB file containing patient pseudonymes
*/
infix fun Mtb.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
val patientPseudonym = pseudonymizeService.patientPseudonym(PatientId(this.patient.id)).value
this.episodesOfCare?.forEach { it.patient.id = patientPseudonym }
this.carePlans?.forEach {
it.patient.id = patientPseudonym
it.rebiopsyRequests?.forEach { it.patient.id = patientPseudonym }
it.histologyReevaluationRequests?.forEach { it.patient.id = patientPseudonym }
it.medicationRecommendations.forEach { it.patient.id = patientPseudonym }
it.studyEnrollmentRecommendations?.forEach { it.patient.id = patientPseudonym }
it.procedureRecommendations?.forEach { it.patient.id = patientPseudonym }
it.geneticCounselingRecommendation.patient.id = patientPseudonym
}
this.diagnoses?.forEach { it.patient.id = patientPseudonym }
this.guidelineTherapies?.forEach { it.patient.id = patientPseudonym }
this.guidelineProcedures?.forEach { it.patient.id = patientPseudonym }
this.patient.id = patientPseudonym
this.claims?.forEach { it.patient.id = patientPseudonym }
this.claimResponses?.forEach { it.patient.id = patientPseudonym }
this.diagnoses?.forEach { it.patient.id = patientPseudonym }
this.histologyReports?.forEach {
it.patient.id = patientPseudonym
it.results.tumorMorphology?.patient?.id = patientPseudonym
it.results.tumorCellContent?.patient?.id = patientPseudonym
}
this.ngsReports?.forEach {
it.patient.id = patientPseudonym
it.results.simpleVariants?.forEach { it.patient.id = patientPseudonym }
it.results.copyNumberVariants?.forEach { it.patient.id = patientPseudonym }
it.results.dnaFusions?.forEach { it.patient.id = patientPseudonym }
it.results.rnaFusions?.forEach { it.patient.id = patientPseudonym }
it.results.tumorCellContent?.patient?.id = patientPseudonym
it.results.brcaness?.patient?.id = patientPseudonym
it.results.tmb?.patient?.id = patientPseudonym
it.results.hrdScore?.patient?.id = patientPseudonym
}
this.ihcReports?.forEach {
it.patient.id = patientPseudonym
it.results.msiMmr?.forEach { it.patient.id = patientPseudonym }
it.results.proteinExpression?.forEach { it.patient.id = patientPseudonym }
}
this.responses?.forEach { it.patient.id = patientPseudonym }
this.specimens?.forEach { it.patient.id = patientPseudonym }
this.priorDiagnosticReports?.forEach { it.patient.id = patientPseudonym }
this.performanceStatus.forEach { it.patient.id = patientPseudonym }
this.systemicTherapies.forEach {
it.history?.forEach {
it.patient.id = patientPseudonym
}
}
}
/**
* Creates new hash of content IDs with given prefix except for patient IDs
*
* @since 0.11.0
*
* @param pseudonymizeService The pseudonymizeService to be used
* @return The MTB file containing rehashed content IDs
*/
infix fun Mtb.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
val prefix = pseudonymizeService.prefix()
fun anonymize(id: String): String {
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
return "$prefix$hash"
}
this.episodesOfCare?.forEach {
it?.apply {
id = id?.let {
anonymize(it)
}
}
}
// TODO all other properties
}

View File

@ -1,7 +1,7 @@
/* /*
* This file is part of ETL-Processor * This file is part of ETL-Processor
* *
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors * Copyright (c) 2025 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published * it under the terms of the GNU Affero General Public License as published
@ -21,7 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import de.ukw.ccc.bwhc.dto.* import de.ukw.ccc.bwhc.dto.*
import dev.pcvolkmer.mv64e.mtb.MTBEpisodeOfCare
import dev.pcvolkmer.mv64e.mtb.Mtb
import dev.pcvolkmer.mv64e.mtb.PeriodDate
import dev.pcvolkmer.mv64e.mtb.Reference
import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Nested
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.extension.ExtendWith
@ -32,12 +37,15 @@ import org.mockito.kotlin.doAnswer
import org.mockito.kotlin.whenever import org.mockito.kotlin.whenever
import org.springframework.core.io.ClassPathResource import org.springframework.core.io.ClassPathResource
const val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
const val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
@ExtendWith(MockitoExtension::class) @ExtendWith(MockitoExtension::class)
class ExtensionsTest { class ExtensionsTest {
@Nested
inner class UsingBwhcDatamodel {
val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
private fun fakeMtbFile(): MtbFile { private fun fakeMtbFile(): MtbFile {
val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
return ObjectMapper().readValue(mtbFile, MtbFile::class.java) return ObjectMapper().readValue(mtbFile, MtbFile::class.java)
@ -191,8 +199,83 @@ class ExtensionsTest {
mtbFile.pseudonymizeWith(pseudonymizeService) mtbFile.pseudonymizeWith(pseudonymizeService)
mtbFile.anonymizeContentWith(pseudonymizeService) mtbFile.anonymizeContentWith(pseudonymizeService)
assertThat(mtbFile.episode.id).isNotNull() assertThat(mtbFile.episode.id).isNotNull()
} }
}
@Nested
inner class UsingDnpmV2Datamodel {
val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json"
val CLEAN_PATIENT_ID = "63f8fd7b-8127-4f3c-8843-aa9199e21c29"
private fun fakeMtbFile(): Mtb {
val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
return ObjectMapper().readValue(mtbFile, Mtb::class.java)
}
private fun Mtb.serialized(): String {
return ObjectMapper().writeValueAsString(this)
}
@Test
fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) {
doAnswer {
it.arguments[0]
"PSEUDO-ID"
}.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
val mtbFile = fakeMtbFile()
mtbFile.pseudonymizeWith(pseudonymizeService)
assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID")
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
}
@Test
fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) {
doAnswer {
it.arguments[0]
"PSEUDO-ID"
}.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
doAnswer {
"TESTDOMAIN"
}.whenever(pseudonymizeService).prefix()
val mtbFile = Mtb.builder()
.withPatient(
dev.pcvolkmer.mv64e.mtb.Patient.builder()
.withId("1")
.withBirthDate("2000-08-08")
.withGender(null)
.build()
)
.withEpisodesOfCare(
listOf(
MTBEpisodeOfCare.builder()
.withId("1")
.withPatient(Reference("1"))
.withPeriod(PeriodDate.builder().withStart("2023-08-08").build())
.build()
)
)
.withClaims(null)
.withDiagnoses(null)
.withCarePlans(null)
.withClaimResponses(null)
.withHistologyReports(null)
.withNgsReports(null)
.withResponses(null)
.withSpecimens(null)
.build()
mtbFile.pseudonymizeWith(pseudonymizeService)
mtbFile.anonymizeContentWith(pseudonymizeService)
assertThat(mtbFile.episodesOfCare).hasSize(1)
assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull
}
}
} }