mirror of
https://github.com/pcvolkmer/etl-processor.git
synced 2025-04-19 17:26:51 +00:00
feat: add pseudonymization for patient IDs (#107)
This commit is contained in:
parent
c5c553f817
commit
8e3de6a220
@ -21,12 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
|
|||||||
|
|
||||||
import de.ukw.ccc.bwhc.dto.MtbFile
|
import de.ukw.ccc.bwhc.dto.MtbFile
|
||||||
import dev.dnpm.etl.processor.PatientId
|
import dev.dnpm.etl.processor.PatientId
|
||||||
|
import dev.pcvolkmer.mv64e.mtb.Mtb
|
||||||
import org.apache.commons.codec.digest.DigestUtils
|
import org.apache.commons.codec.digest.DigestUtils
|
||||||
|
|
||||||
/** Replaces patient ID with generated patient pseudonym
|
/** Replaces patient ID with generated patient pseudonym
|
||||||
*
|
*
|
||||||
* @param pseudonymizeService The pseudonymizeService to be used
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
*
|
|
||||||
* @return The MTB file containing patient pseudonymes
|
* @return The MTB file containing patient pseudonymes
|
||||||
*/
|
*/
|
||||||
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
||||||
@ -49,7 +49,11 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
|||||||
}
|
}
|
||||||
this.lastGuidelineTherapies?.forEach { it.patient = patientPseudonym }
|
this.lastGuidelineTherapies?.forEach { it.patient = patientPseudonym }
|
||||||
this.molecularPathologyFindings?.forEach { it.patient = patientPseudonym }
|
this.molecularPathologyFindings?.forEach { it.patient = patientPseudonym }
|
||||||
this.molecularTherapies?.forEach { molecularTherapy -> molecularTherapy.history.forEach { it.patient = patientPseudonym } }
|
this.molecularTherapies?.forEach { molecularTherapy ->
|
||||||
|
molecularTherapy.history.forEach {
|
||||||
|
it.patient = patientPseudonym
|
||||||
|
}
|
||||||
|
}
|
||||||
this.ngsReports?.forEach { it.patient = patientPseudonym }
|
this.ngsReports?.forEach { it.patient = patientPseudonym }
|
||||||
this.previousGuidelineTherapies?.forEach { it.patient = patientPseudonym }
|
this.previousGuidelineTherapies?.forEach { it.patient = patientPseudonym }
|
||||||
this.rebiopsyRequests?.forEach { it.patient = patientPseudonym }
|
this.rebiopsyRequests?.forEach { it.patient = patientPseudonym }
|
||||||
@ -63,7 +67,6 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
|||||||
* Creates new hash of content IDs with given prefix except for patient IDs
|
* Creates new hash of content IDs with given prefix except for patient IDs
|
||||||
*
|
*
|
||||||
* @param pseudonymizeService The pseudonymizeService to be used
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
*
|
|
||||||
* @return The MTB file containing rehashed content IDs
|
* @return The MTB file containing rehashed content IDs
|
||||||
*/
|
*/
|
||||||
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
|
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
|
||||||
@ -224,3 +227,89 @@ infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Replaces patient ID with generated patient pseudonym
|
||||||
|
*
|
||||||
|
* @since 0.11.0
|
||||||
|
*
|
||||||
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
|
* @return The MTB file containing patient pseudonymes
|
||||||
|
*/
|
||||||
|
infix fun Mtb.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
||||||
|
val patientPseudonym = pseudonymizeService.patientPseudonym(PatientId(this.patient.id)).value
|
||||||
|
|
||||||
|
this.episodesOfCare?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.carePlans?.forEach {
|
||||||
|
it.patient.id = patientPseudonym
|
||||||
|
it.rebiopsyRequests?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.histologyReevaluationRequests?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.medicationRecommendations.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.studyEnrollmentRecommendations?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.procedureRecommendations?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.geneticCounselingRecommendation.patient.id = patientPseudonym
|
||||||
|
}
|
||||||
|
this.diagnoses?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.guidelineTherapies?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.guidelineProcedures?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.patient.id = patientPseudonym
|
||||||
|
this.claims?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.claimResponses?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.diagnoses?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.histologyReports?.forEach {
|
||||||
|
it.patient.id = patientPseudonym
|
||||||
|
it.results.tumorMorphology?.patient?.id = patientPseudonym
|
||||||
|
it.results.tumorCellContent?.patient?.id = patientPseudonym
|
||||||
|
}
|
||||||
|
this.ngsReports?.forEach {
|
||||||
|
it.patient.id = patientPseudonym
|
||||||
|
it.results.simpleVariants?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.results.copyNumberVariants?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.results.dnaFusions?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.results.rnaFusions?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.results.tumorCellContent?.patient?.id = patientPseudonym
|
||||||
|
it.results.brcaness?.patient?.id = patientPseudonym
|
||||||
|
it.results.tmb?.patient?.id = patientPseudonym
|
||||||
|
it.results.hrdScore?.patient?.id = patientPseudonym
|
||||||
|
}
|
||||||
|
this.ihcReports?.forEach {
|
||||||
|
it.patient.id = patientPseudonym
|
||||||
|
it.results.msiMmr?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
it.results.proteinExpression?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
}
|
||||||
|
this.responses?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.specimens?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.priorDiagnosticReports?.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.performanceStatus.forEach { it.patient.id = patientPseudonym }
|
||||||
|
this.systemicTherapies.forEach {
|
||||||
|
it.history?.forEach {
|
||||||
|
it.patient.id = patientPseudonym
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates new hash of content IDs with given prefix except for patient IDs
|
||||||
|
*
|
||||||
|
* @since 0.11.0
|
||||||
|
*
|
||||||
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
|
* @return The MTB file containing rehashed content IDs
|
||||||
|
*/
|
||||||
|
infix fun Mtb.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
|
||||||
|
val prefix = pseudonymizeService.prefix()
|
||||||
|
|
||||||
|
fun anonymize(id: String): String {
|
||||||
|
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
|
||||||
|
return "$prefix$hash"
|
||||||
|
}
|
||||||
|
|
||||||
|
this.episodesOfCare?.forEach {
|
||||||
|
it?.apply {
|
||||||
|
id = id?.let {
|
||||||
|
anonymize(it)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO all other properties
|
||||||
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* This file is part of ETL-Processor
|
* This file is part of ETL-Processor
|
||||||
*
|
*
|
||||||
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
|
* Copyright (c) 2025 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Affero General Public License as published
|
* it under the terms of the GNU Affero General Public License as published
|
||||||
@ -21,7 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import de.ukw.ccc.bwhc.dto.*
|
import de.ukw.ccc.bwhc.dto.*
|
||||||
|
import dev.pcvolkmer.mv64e.mtb.MTBEpisodeOfCare
|
||||||
|
import dev.pcvolkmer.mv64e.mtb.Mtb
|
||||||
|
import dev.pcvolkmer.mv64e.mtb.PeriodDate
|
||||||
|
import dev.pcvolkmer.mv64e.mtb.Reference
|
||||||
import org.assertj.core.api.Assertions.assertThat
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
|
import org.junit.jupiter.api.Nested
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import org.junit.jupiter.api.assertThrows
|
import org.junit.jupiter.api.assertThrows
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
@ -32,12 +37,15 @@ import org.mockito.kotlin.doAnswer
|
|||||||
import org.mockito.kotlin.whenever
|
import org.mockito.kotlin.whenever
|
||||||
import org.springframework.core.io.ClassPathResource
|
import org.springframework.core.io.ClassPathResource
|
||||||
|
|
||||||
const val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
|
|
||||||
const val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension::class)
|
@ExtendWith(MockitoExtension::class)
|
||||||
class ExtensionsTest {
|
class ExtensionsTest {
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
inner class UsingBwhcDatamodel {
|
||||||
|
|
||||||
|
val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
|
||||||
|
val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
|
||||||
|
|
||||||
private fun fakeMtbFile(): MtbFile {
|
private fun fakeMtbFile(): MtbFile {
|
||||||
val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
|
val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
|
||||||
return ObjectMapper().readValue(mtbFile, MtbFile::class.java)
|
return ObjectMapper().readValue(mtbFile, MtbFile::class.java)
|
||||||
@ -191,8 +199,83 @@ class ExtensionsTest {
|
|||||||
mtbFile.pseudonymizeWith(pseudonymizeService)
|
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||||
mtbFile.anonymizeContentWith(pseudonymizeService)
|
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||||
|
|
||||||
|
|
||||||
assertThat(mtbFile.episode.id).isNotNull()
|
assertThat(mtbFile.episode.id).isNotNull()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nested
|
||||||
|
inner class UsingDnpmV2Datamodel {
|
||||||
|
|
||||||
|
val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json"
|
||||||
|
val CLEAN_PATIENT_ID = "63f8fd7b-8127-4f3c-8843-aa9199e21c29"
|
||||||
|
|
||||||
|
private fun fakeMtbFile(): Mtb {
|
||||||
|
val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
|
||||||
|
return ObjectMapper().readValue(mtbFile, Mtb::class.java)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun Mtb.serialized(): String {
|
||||||
|
return ObjectMapper().writeValueAsString(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) {
|
||||||
|
doAnswer {
|
||||||
|
it.arguments[0]
|
||||||
|
"PSEUDO-ID"
|
||||||
|
}.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
|
||||||
|
|
||||||
|
val mtbFile = fakeMtbFile()
|
||||||
|
|
||||||
|
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||||
|
|
||||||
|
assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID")
|
||||||
|
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) {
|
||||||
|
doAnswer {
|
||||||
|
it.arguments[0]
|
||||||
|
"PSEUDO-ID"
|
||||||
|
}.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
|
||||||
|
|
||||||
|
doAnswer {
|
||||||
|
"TESTDOMAIN"
|
||||||
|
}.whenever(pseudonymizeService).prefix()
|
||||||
|
|
||||||
|
val mtbFile = Mtb.builder()
|
||||||
|
.withPatient(
|
||||||
|
dev.pcvolkmer.mv64e.mtb.Patient.builder()
|
||||||
|
.withId("1")
|
||||||
|
.withBirthDate("2000-08-08")
|
||||||
|
.withGender(null)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
.withEpisodesOfCare(
|
||||||
|
listOf(
|
||||||
|
MTBEpisodeOfCare.builder()
|
||||||
|
.withId("1")
|
||||||
|
.withPatient(Reference("1"))
|
||||||
|
.withPeriod(PeriodDate.builder().withStart("2023-08-08").build())
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withClaims(null)
|
||||||
|
.withDiagnoses(null)
|
||||||
|
.withCarePlans(null)
|
||||||
|
.withClaimResponses(null)
|
||||||
|
.withHistologyReports(null)
|
||||||
|
.withNgsReports(null)
|
||||||
|
.withResponses(null)
|
||||||
|
.withSpecimens(null)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||||
|
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||||
|
|
||||||
|
assertThat(mtbFile.episodesOfCare).hasSize(1)
|
||||||
|
assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user