1
0
mirror of https://github.com/pcvolkmer/etl-processor.git synced 2025-07-04 07:22:55 +00:00

feat: salted re-hash IDs within MTB file except patient ID

This commit is contained in:
2024-03-12 13:13:31 +01:00
parent 07e59f9b02
commit e3aeee61de
6 changed files with 254 additions and 5 deletions

View File

@ -33,4 +33,8 @@ class PseudonymizeService(
}
}
fun prefix(): String {
return configProperties.prefix
}
}

View File

@ -20,7 +20,14 @@
package dev.dnpm.etl.processor.pseudonym
import de.ukw.ccc.bwhc.dto.MtbFile
import org.apache.commons.codec.digest.DigestUtils
/** Replaces patient ID with generated patient pseudonym
*
* @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing patient pseudonymes
*/
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id)
@ -46,8 +53,171 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
this.rebiopsyRequests.forEach { it.patient = patientPseudonym }
this.recommendations.forEach { it.patient = patientPseudonym }
this.recommendations.forEach { it.patient = patientPseudonym }
this.responses.forEach { it.patient = patientPseudonym }
this.studyInclusionRequests.forEach { it.patient = patientPseudonym }
this.specimens.forEach { it.patient = patientPseudonym }
}
/**
* Creates new hash of content IDs with given prefix except for patient IDs
*
* @param pseudonymizeService The pseudonymizeService to be used
*
* @return The MTB file containing rehashed content IDs
*/
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
val prefix = pseudonymizeService.prefix()
fun anonymize(id: String): String {
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
return "$prefix$hash"
}
this.episode.apply {
id = anonymize(id)
}
this.carePlans.onEach { carePlan ->
carePlan.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
geneticCounsellingRequest = anonymize(geneticCounsellingRequest)
rebiopsyRequests = rebiopsyRequests.map { anonymize(it) }
recommendations = recommendations.map { anonymize(it) }
studyInclusionRequests = studyInclusionRequests.map { anonymize(it) }
}
}
this.claims.onEach { claim ->
claim.apply {
id = anonymize(id)
therapy = anonymize(therapy)
}
}
this.claimResponses.onEach { claimResponse ->
claimResponse.apply {
id = anonymize(id)
claim = anonymize(claim)
}
}
this.consent.apply {
id = anonymize(id)
}
this.diagnoses.onEach { diagnosis ->
diagnosis.apply {
id = anonymize(id)
histologyResults = histologyResults.map { anonymize(it) }
}
}
this.ecogStatus.onEach { ecogStatus ->
ecogStatus.apply {
id = anonymize(id)
}
}
this.familyMemberDiagnoses.onEach { familyMemberDiagnosis ->
familyMemberDiagnosis.apply {
id = anonymize(id)
}
}
this.geneticCounsellingRequests.onEach { geneticCounsellingRequest ->
geneticCounsellingRequest.apply {
id = anonymize(id)
}
}
this.histologyReevaluationRequests.onEach { histologyReevaluationRequest ->
histologyReevaluationRequest.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.histologyReports.onEach { histologyReport ->
histologyReport.apply {
id = anonymize(id)
specimen = anonymize(specimen)
tumorMorphology.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
tumorCellContent.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
}
this.lastGuidelineTherapies.onEach { lastGuidelineTherapy ->
lastGuidelineTherapy.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
}
}
this.molecularPathologyFindings.onEach { molecularPathologyFinding ->
molecularPathologyFinding.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.molecularTherapies.onEach { molecularTherapy ->
molecularTherapy.apply {
history.onEach { history ->
history.apply {
id = anonymize(id)
basedOn = anonymize(basedOn)
}
}
}
}
this.ngsReports.onEach { ngsReport ->
ngsReport.apply {
id = anonymize(id)
specimen = anonymize(specimen)
tumorCellContent.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
simpleVariants.onEach { simpleVariant ->
simpleVariant.apply {
id = anonymize(id)
}
}
}
}
this.previousGuidelineTherapies.onEach { previousGuidelineTherapy ->
previousGuidelineTherapy.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
this.medication.forEach { medication ->
medication.apply {
id = anonymize(id)
}
}
}
}
this.rebiopsyRequests.onEach { rebiopsyRequest ->
rebiopsyRequest.apply {
id = anonymize(id)
specimen = anonymize(specimen)
}
}
this.recommendations.onEach { recommendation ->
recommendation.apply {
id = anonymize(id)
diagnosis = anonymize(diagnosis)
ngsReport = anonymize(ngsReport)
}
}
this.responses.onEach { response ->
response.apply {
id = anonymize(id)
therapy = anonymize(therapy)
}
}
this.studyInclusionRequests.onEach { studyInclusionRequest ->
studyInclusionRequest.apply {
id = anonymize(id)
reason = anonymize(reason)
}
}
this.specimens.onEach { specimen ->
specimen.apply {
id = anonymize(id)
}
}
}

View File

@ -28,6 +28,7 @@ import dev.dnpm.etl.processor.monitoring.RequestStatus
import dev.dnpm.etl.processor.monitoring.RequestType
import dev.dnpm.etl.processor.output.MtbFileSender
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
import dev.dnpm.etl.processor.pseudonym.anonymizeContentWith
import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith
import org.apache.commons.codec.binary.Base32
import org.apache.commons.codec.digest.DigestUtils
@ -55,6 +56,7 @@ class RequestProcessor(
val pid = mtbFile.patient.id
mtbFile pseudonymizeWith pseudonymizeService
mtbFile anonymizeContentWith pseudonymizeService
val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile))