mirror of
https://github.com/pcvolkmer/etl-processor.git
synced 2025-04-19 17:26:51 +00:00
Merge pull request #60 from CCC-MF/issue_44
feat: salted re-hash IDs within MTB file except patient ID
This commit is contained in:
commit
40b89dd4f1
@ -33,4 +33,8 @@ class PseudonymizeService(
|
||||
}
|
||||
}
|
||||
|
||||
fun prefix(): String {
|
||||
return configProperties.prefix
|
||||
}
|
||||
|
||||
}
|
@ -20,7 +20,14 @@
|
||||
package dev.dnpm.etl.processor.pseudonym
|
||||
|
||||
import de.ukw.ccc.bwhc.dto.MtbFile
|
||||
import org.apache.commons.codec.digest.DigestUtils
|
||||
|
||||
/** Replaces patient ID with generated patient pseudonym
|
||||
*
|
||||
* @param pseudonymizeService The pseudonymizeService to be used
|
||||
*
|
||||
* @return The MTB file containing patient pseudonymes
|
||||
*/
|
||||
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
||||
val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id)
|
||||
|
||||
@ -46,8 +53,171 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
||||
this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
|
||||
this.rebiopsyRequests.forEach { it.patient = patientPseudonym }
|
||||
this.recommendations.forEach { it.patient = patientPseudonym }
|
||||
this.recommendations.forEach { it.patient = patientPseudonym }
|
||||
this.responses.forEach { it.patient = patientPseudonym }
|
||||
this.studyInclusionRequests.forEach { it.patient = patientPseudonym }
|
||||
this.specimens.forEach { it.patient = patientPseudonym }
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates new hash of content IDs with given prefix except for patient IDs
|
||||
*
|
||||
* @param pseudonymizeService The pseudonymizeService to be used
|
||||
*
|
||||
* @return The MTB file containing rehashed content IDs
|
||||
*/
|
||||
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
|
||||
val prefix = pseudonymizeService.prefix()
|
||||
|
||||
fun anonymize(id: String): String {
|
||||
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
|
||||
return "$prefix$hash"
|
||||
}
|
||||
|
||||
this.episode.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
this.carePlans.onEach { carePlan ->
|
||||
carePlan.apply {
|
||||
id = anonymize(id)
|
||||
diagnosis = anonymize(diagnosis)
|
||||
geneticCounsellingRequest = anonymize(geneticCounsellingRequest)
|
||||
rebiopsyRequests = rebiopsyRequests.map { anonymize(it) }
|
||||
recommendations = recommendations.map { anonymize(it) }
|
||||
studyInclusionRequests = studyInclusionRequests.map { anonymize(it) }
|
||||
}
|
||||
}
|
||||
this.claims.onEach { claim ->
|
||||
claim.apply {
|
||||
id = anonymize(id)
|
||||
therapy = anonymize(therapy)
|
||||
}
|
||||
}
|
||||
this.claimResponses.onEach { claimResponse ->
|
||||
claimResponse.apply {
|
||||
id = anonymize(id)
|
||||
claim = anonymize(claim)
|
||||
}
|
||||
}
|
||||
this.consent.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
this.diagnoses.onEach { diagnosis ->
|
||||
diagnosis.apply {
|
||||
id = anonymize(id)
|
||||
histologyResults = histologyResults.map { anonymize(it) }
|
||||
}
|
||||
}
|
||||
this.ecogStatus.onEach { ecogStatus ->
|
||||
ecogStatus.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
this.familyMemberDiagnoses.onEach { familyMemberDiagnosis ->
|
||||
familyMemberDiagnosis.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
this.geneticCounsellingRequests.onEach { geneticCounsellingRequest ->
|
||||
geneticCounsellingRequest.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
this.histologyReevaluationRequests.onEach { histologyReevaluationRequest ->
|
||||
histologyReevaluationRequest.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
}
|
||||
this.histologyReports.onEach { histologyReport ->
|
||||
histologyReport.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
tumorMorphology.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
tumorCellContent.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
}
|
||||
}
|
||||
this.lastGuidelineTherapies.onEach { lastGuidelineTherapy ->
|
||||
lastGuidelineTherapy.apply {
|
||||
id = anonymize(id)
|
||||
diagnosis = anonymize(diagnosis)
|
||||
}
|
||||
}
|
||||
this.molecularPathologyFindings.onEach { molecularPathologyFinding ->
|
||||
molecularPathologyFinding.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
}
|
||||
this.molecularTherapies.onEach { molecularTherapy ->
|
||||
molecularTherapy.apply {
|
||||
history.onEach { history ->
|
||||
history.apply {
|
||||
id = anonymize(id)
|
||||
basedOn = anonymize(basedOn)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.ngsReports.onEach { ngsReport ->
|
||||
ngsReport.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
tumorCellContent.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
simpleVariants.onEach { simpleVariant ->
|
||||
simpleVariant.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.previousGuidelineTherapies.onEach { previousGuidelineTherapy ->
|
||||
previousGuidelineTherapy.apply {
|
||||
id = anonymize(id)
|
||||
diagnosis = anonymize(diagnosis)
|
||||
this.medication.forEach { medication ->
|
||||
medication.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this.rebiopsyRequests.onEach { rebiopsyRequest ->
|
||||
rebiopsyRequest.apply {
|
||||
id = anonymize(id)
|
||||
specimen = anonymize(specimen)
|
||||
}
|
||||
}
|
||||
this.recommendations.onEach { recommendation ->
|
||||
recommendation.apply {
|
||||
id = anonymize(id)
|
||||
diagnosis = anonymize(diagnosis)
|
||||
ngsReport = anonymize(ngsReport)
|
||||
}
|
||||
}
|
||||
this.responses.onEach { response ->
|
||||
response.apply {
|
||||
id = anonymize(id)
|
||||
therapy = anonymize(therapy)
|
||||
}
|
||||
}
|
||||
this.studyInclusionRequests.onEach { studyInclusionRequest ->
|
||||
studyInclusionRequest.apply {
|
||||
id = anonymize(id)
|
||||
reason = anonymize(reason)
|
||||
}
|
||||
}
|
||||
this.specimens.onEach { specimen ->
|
||||
specimen.apply {
|
||||
id = anonymize(id)
|
||||
}
|
||||
}
|
||||
}
|
@ -28,6 +28,7 @@ import dev.dnpm.etl.processor.monitoring.RequestStatus
|
||||
import dev.dnpm.etl.processor.monitoring.RequestType
|
||||
import dev.dnpm.etl.processor.output.MtbFileSender
|
||||
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
|
||||
import dev.dnpm.etl.processor.pseudonym.anonymizeContentWith
|
||||
import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith
|
||||
import org.apache.commons.codec.binary.Base32
|
||||
import org.apache.commons.codec.digest.DigestUtils
|
||||
@ -55,6 +56,7 @@ class RequestProcessor(
|
||||
val pid = mtbFile.patient.id
|
||||
|
||||
mtbFile pseudonymizeWith pseudonymizeService
|
||||
mtbFile anonymizeContentWith pseudonymizeService
|
||||
|
||||
val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile))
|
||||
|
||||
|
@ -20,9 +20,10 @@
|
||||
package dev.dnpm.etl.processor.pseudonym
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import de.ukw.ccc.bwhc.dto.MtbFile
|
||||
import de.ukw.ccc.bwhc.dto.*
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.assertThrows
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.mockito.ArgumentMatchers
|
||||
import org.mockito.Mock
|
||||
@ -61,4 +62,76 @@ class ExtensionsTest {
|
||||
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
|
||||
doAnswer {
|
||||
it.arguments[0]
|
||||
"PSEUDO-ID"
|
||||
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
|
||||
|
||||
doAnswer {
|
||||
"TESTDOMAIN"
|
||||
}.whenever(pseudonymizeService).prefix()
|
||||
|
||||
val mtbFile = fakeMtbFile()
|
||||
|
||||
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||
|
||||
val pattern = "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern()
|
||||
val matcher = pattern.matcher(mtbFile.serialized())
|
||||
|
||||
assertThrows<IllegalStateException> {
|
||||
matcher.find()
|
||||
matcher.group()
|
||||
}.also {
|
||||
assertThat(it.message).isEqualTo("No match found")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
fun shouldRehashIdsWithPrefix(@Mock pseudonymizeService: PseudonymizeService) {
|
||||
doAnswer {
|
||||
it.arguments[0]
|
||||
"PSEUDO-ID"
|
||||
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
|
||||
|
||||
doAnswer {
|
||||
"TESTDOMAIN"
|
||||
}.whenever(pseudonymizeService).prefix()
|
||||
|
||||
val mtbFile = MtbFile.builder()
|
||||
.withPatient(
|
||||
Patient.builder()
|
||||
.withId("1")
|
||||
.withBirthDate("2000-08-08")
|
||||
.withGender(Patient.Gender.MALE)
|
||||
.build()
|
||||
)
|
||||
.withConsent(
|
||||
Consent.builder()
|
||||
.withId("1")
|
||||
.withStatus(Consent.Status.ACTIVE)
|
||||
.withPatient("123")
|
||||
.build()
|
||||
)
|
||||
.withEpisode(
|
||||
Episode.builder()
|
||||
.withId("1")
|
||||
.withPatient("1")
|
||||
.withPeriod(PeriodStart("2023-08-08"))
|
||||
.build()
|
||||
)
|
||||
.build()
|
||||
|
||||
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||
|
||||
|
||||
assertThat(mtbFile.episode.id)
|
||||
// TESTDOMAIN<sha256(TESTDOMAIN-1)[0-41]>
|
||||
.isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")
|
||||
}
|
||||
|
||||
}
|
@ -92,7 +92,7 @@ class RequestProcessorTest {
|
||||
uuid = UUID.randomUUID().toString(),
|
||||
patientId = "TEST_12345678901",
|
||||
pid = "P1",
|
||||
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
|
||||
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
|
||||
type = RequestType.MTB_FILE,
|
||||
status = RequestStatus.SUCCESS,
|
||||
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
||||
@ -151,7 +151,7 @@ class RequestProcessorTest {
|
||||
uuid = UUID.randomUUID().toString(),
|
||||
patientId = "TEST_12345678901",
|
||||
pid = "P1",
|
||||
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
|
||||
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
|
||||
type = RequestType.MTB_FILE,
|
||||
status = RequestStatus.SUCCESS,
|
||||
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user