1
0
mirror of https://github.com/pcvolkmer/etl-processor.git synced 2025-04-19 17:26:51 +00:00

Merge pull request #60 from CCC-MF/issue_44

feat: salted re-hash IDs within MTB file except patient ID
This commit is contained in:
Paul-Christian Volkmer 2024-03-12 13:18:32 +01:00 committed by GitHub
commit 40b89dd4f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 254 additions and 5 deletions

View File

@ -33,4 +33,8 @@ class PseudonymizeService(
}
}
fun prefix(): String {
return configProperties.prefix
}
}

View File

@ -20,7 +20,14 @@
package dev.dnpm.etl.processor.pseudonym
import de.ukw.ccc.bwhc.dto.MtbFile
import org.apache.commons.codec.digest.DigestUtils
/** Replaces patient ID with generated patient pseudonym
*
* @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing patient pseudonymes
*/
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id)
@ -46,8 +53,171 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
this.rebiopsyRequests.forEach { it.patient = patientPseudonym }
this.recommendations.forEach { it.patient = patientPseudonym }
this.recommendations.forEach { it.patient = patientPseudonym }
this.responses.forEach { it.patient = patientPseudonym }
this.studyInclusionRequests.forEach { it.patient = patientPseudonym }
this.specimens.forEach { it.patient = patientPseudonym }
}
/**
* Creates new hash of content IDs with given prefix except for patient IDs
*
* @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing rehashed content IDs
*/
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
val prefix = pseudonymizeService.prefix()
fun anonymize(id: String): String {
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
return "$prefix$hash"
}
this.episode.apply {
id = anonymize(id)
}
this.carePlans.onEach { carePlan ->
carePlan.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
geneticCounsellingRequest = anonymize(geneticCounsellingRequest)
rebiopsyRequests = rebiopsyRequests.map { anonymize(it) }
recommendations = recommendations.map { anonymize(it) }
studyInclusionRequests = studyInclusionRequests.map { anonymize(it) }
}
}
this.claims.onEach { claim ->
claim.apply {
id = anonymize(id)
therapy = anonymize(therapy)
}
}
this.claimResponses.onEach { claimResponse ->
claimResponse.apply {
id = anonymize(id)
claim = anonymize(claim)
}
}
this.consent.apply {
id = anonymize(id)
}
this.diagnoses.onEach { diagnosis ->
diagnosis.apply {
id = anonymize(id)
histologyResults = histologyResults.map { anonymize(it) }
}
}
this.ecogStatus.onEach { ecogStatus ->
ecogStatus.apply {
id = anonymize(id)
}
}
this.familyMemberDiagnoses.onEach { familyMemberDiagnosis ->
familyMemberDiagnosis.apply {
id = anonymize(id)
}
}
this.geneticCounsellingRequests.onEach { geneticCounsellingRequest ->
geneticCounsellingRequest.apply {
id = anonymize(id)
}
}
this.histologyReevaluationRequests.onEach { histologyReevaluationRequest ->
histologyReevaluationRequest.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.histologyReports.onEach { histologyReport ->
histologyReport.apply {
id = anonymize(id)
specimen = anonymize(specimen)
tumorMorphology.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
tumorCellContent.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
}
this.lastGuidelineTherapies.onEach { lastGuidelineTherapy ->
lastGuidelineTherapy.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
}
}
this.molecularPathologyFindings.onEach { molecularPathologyFinding ->
molecularPathologyFinding.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.molecularTherapies.onEach { molecularTherapy ->
molecularTherapy.apply {
history.onEach { history ->
history.apply {
id = anonymize(id)
basedOn = anonymize(basedOn)
}
}
}
}
this.ngsReports.onEach { ngsReport ->
ngsReport.apply {
id = anonymize(id)
specimen = anonymize(specimen)
tumorCellContent.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
simpleVariants.onEach { simpleVariant ->
simpleVariant.apply {
id = anonymize(id)
}
}
}
}
this.previousGuidelineTherapies.onEach { previousGuidelineTherapy ->
previousGuidelineTherapy.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
this.medication.forEach { medication ->
medication.apply {
id = anonymize(id)
}
}
}
}
this.rebiopsyRequests.onEach { rebiopsyRequest ->
rebiopsyRequest.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.recommendations.onEach { recommendation ->
recommendation.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
ngsReport = anonymize(ngsReport)
}
}
this.responses.onEach { response ->
response.apply {
id = anonymize(id)
therapy = anonymize(therapy)
}
}
this.studyInclusionRequests.onEach { studyInclusionRequest ->
studyInclusionRequest.apply {
id = anonymize(id)
reason = anonymize(reason)
}
}
this.specimens.onEach { specimen ->
specimen.apply {
id = anonymize(id)
}
}
}

View File

@ -28,6 +28,7 @@ import dev.dnpm.etl.processor.monitoring.RequestStatus
import dev.dnpm.etl.processor.monitoring.RequestType
import dev.dnpm.etl.processor.output.MtbFileSender
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
import dev.dnpm.etl.processor.pseudonym.anonymizeContentWith
import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith
import org.apache.commons.codec.binary.Base32
import org.apache.commons.codec.digest.DigestUtils
@ -55,6 +56,7 @@ class RequestProcessor(
val pid = mtbFile.patient.id
mtbFile pseudonymizeWith pseudonymizeService
mtbFile anonymizeContentWith pseudonymizeService
val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile))

View File

@ -20,9 +20,10 @@
package dev.dnpm.etl.processor.pseudonym
import com.fasterxml.jackson.databind.ObjectMapper
import de.ukw.ccc.bwhc.dto.MtbFile
import de.ukw.ccc.bwhc.dto.*
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.api.extension.ExtendWith
import org.mockito.ArgumentMatchers
import org.mockito.Mock
@ -61,4 +62,76 @@ class ExtensionsTest {
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
}
@Test
fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
doAnswer {
it.arguments[0]
"PSEUDO-ID"
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
doAnswer {
"TESTDOMAIN"
}.whenever(pseudonymizeService).prefix()
val mtbFile = fakeMtbFile()
mtbFile.pseudonymizeWith(pseudonymizeService)
mtbFile.anonymizeContentWith(pseudonymizeService)
val pattern = "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern()
val matcher = pattern.matcher(mtbFile.serialized())
assertThrows<IllegalStateException> {
matcher.find()
matcher.group()
}.also {
assertThat(it.message).isEqualTo("No match found")
}
}
@Test
fun shouldRehashIdsWithPrefix(@Mock pseudonymizeService: PseudonymizeService) {
doAnswer {
it.arguments[0]
"PSEUDO-ID"
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
doAnswer {
"TESTDOMAIN"
}.whenever(pseudonymizeService).prefix()
val mtbFile = MtbFile.builder()
.withPatient(
Patient.builder()
.withId("1")
.withBirthDate("2000-08-08")
.withGender(Patient.Gender.MALE)
.build()
)
.withConsent(
Consent.builder()
.withId("1")
.withStatus(Consent.Status.ACTIVE)
.withPatient("123")
.build()
)
.withEpisode(
Episode.builder()
.withId("1")
.withPatient("1")
.withPeriod(PeriodStart("2023-08-08"))
.build()
)
.build()
mtbFile.pseudonymizeWith(pseudonymizeService)
mtbFile.anonymizeContentWith(pseudonymizeService)
assertThat(mtbFile.episode.id)
// TESTDOMAIN<sha256(TESTDOMAIN-1)[0-41]>
.isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")
}
}

View File

@ -92,7 +92,7 @@ class RequestProcessorTest {
uuid = UUID.randomUUID().toString(),
patientId = "TEST_12345678901",
pid = "P1",
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
type = RequestType.MTB_FILE,
status = RequestStatus.SUCCESS,
processedAt = Instant.parse("2023-08-08T02:00:00Z")
@ -151,7 +151,7 @@ class RequestProcessorTest {
uuid = UUID.randomUUID().toString(),
patientId = "TEST_12345678901",
pid = "P1",
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
type = RequestType.MTB_FILE,
status = RequestStatus.SUCCESS,
processedAt = Instant.parse("2023-08-08T02:00:00Z")

File diff suppressed because one or more lines are too long