mirror of
https://github.com/pcvolkmer/etl-processor.git
synced 2025-04-20 17:56:50 +00:00
Merge pull request #60 from CCC-MF/issue_44
feat: salted re-hash IDs within MTB file except patient ID
This commit is contained in:
commit
40b89dd4f1
@ -33,4 +33,8 @@ class PseudonymizeService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun prefix(): String {
|
||||||
|
return configProperties.prefix
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -20,7 +20,14 @@
|
|||||||
package dev.dnpm.etl.processor.pseudonym
|
package dev.dnpm.etl.processor.pseudonym
|
||||||
|
|
||||||
import de.ukw.ccc.bwhc.dto.MtbFile
|
import de.ukw.ccc.bwhc.dto.MtbFile
|
||||||
|
import org.apache.commons.codec.digest.DigestUtils
|
||||||
|
|
||||||
|
/** Replaces patient ID with generated patient pseudonym
|
||||||
|
*
|
||||||
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
|
*
|
||||||
|
* @return The MTB file containing patient pseudonymes
|
||||||
|
*/
|
||||||
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
||||||
val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id)
|
val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id)
|
||||||
|
|
||||||
@ -46,8 +53,171 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
|
|||||||
this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
|
this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
|
||||||
this.rebiopsyRequests.forEach { it.patient = patientPseudonym }
|
this.rebiopsyRequests.forEach { it.patient = patientPseudonym }
|
||||||
this.recommendations.forEach { it.patient = patientPseudonym }
|
this.recommendations.forEach { it.patient = patientPseudonym }
|
||||||
this.recommendations.forEach { it.patient = patientPseudonym }
|
|
||||||
this.responses.forEach { it.patient = patientPseudonym }
|
this.responses.forEach { it.patient = patientPseudonym }
|
||||||
this.studyInclusionRequests.forEach { it.patient = patientPseudonym }
|
this.studyInclusionRequests.forEach { it.patient = patientPseudonym }
|
||||||
this.specimens.forEach { it.patient = patientPseudonym }
|
this.specimens.forEach { it.patient = patientPseudonym }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates new hash of content IDs with given prefix except for patient IDs
|
||||||
|
*
|
||||||
|
* @param pseudonymizeService The pseudonymizeService to be used
|
||||||
|
*
|
||||||
|
* @return The MTB file containing rehashed content IDs
|
||||||
|
*/
|
||||||
|
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
|
||||||
|
val prefix = pseudonymizeService.prefix()
|
||||||
|
|
||||||
|
fun anonymize(id: String): String {
|
||||||
|
val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
|
||||||
|
return "$prefix$hash"
|
||||||
|
}
|
||||||
|
|
||||||
|
this.episode.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
this.carePlans.onEach { carePlan ->
|
||||||
|
carePlan.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
diagnosis = anonymize(diagnosis)
|
||||||
|
geneticCounsellingRequest = anonymize(geneticCounsellingRequest)
|
||||||
|
rebiopsyRequests = rebiopsyRequests.map { anonymize(it) }
|
||||||
|
recommendations = recommendations.map { anonymize(it) }
|
||||||
|
studyInclusionRequests = studyInclusionRequests.map { anonymize(it) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.claims.onEach { claim ->
|
||||||
|
claim.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
therapy = anonymize(therapy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.claimResponses.onEach { claimResponse ->
|
||||||
|
claimResponse.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
claim = anonymize(claim)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.consent.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
this.diagnoses.onEach { diagnosis ->
|
||||||
|
diagnosis.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
histologyResults = histologyResults.map { anonymize(it) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.ecogStatus.onEach { ecogStatus ->
|
||||||
|
ecogStatus.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.familyMemberDiagnoses.onEach { familyMemberDiagnosis ->
|
||||||
|
familyMemberDiagnosis.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.geneticCounsellingRequests.onEach { geneticCounsellingRequest ->
|
||||||
|
geneticCounsellingRequest.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.histologyReevaluationRequests.onEach { histologyReevaluationRequest ->
|
||||||
|
histologyReevaluationRequest.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.histologyReports.onEach { histologyReport ->
|
||||||
|
histologyReport.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
tumorMorphology.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
tumorCellContent.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.lastGuidelineTherapies.onEach { lastGuidelineTherapy ->
|
||||||
|
lastGuidelineTherapy.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
diagnosis = anonymize(diagnosis)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.molecularPathologyFindings.onEach { molecularPathologyFinding ->
|
||||||
|
molecularPathologyFinding.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.molecularTherapies.onEach { molecularTherapy ->
|
||||||
|
molecularTherapy.apply {
|
||||||
|
history.onEach { history ->
|
||||||
|
history.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
basedOn = anonymize(basedOn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.ngsReports.onEach { ngsReport ->
|
||||||
|
ngsReport.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
tumorCellContent.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
simpleVariants.onEach { simpleVariant ->
|
||||||
|
simpleVariant.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.previousGuidelineTherapies.onEach { previousGuidelineTherapy ->
|
||||||
|
previousGuidelineTherapy.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
diagnosis = anonymize(diagnosis)
|
||||||
|
this.medication.forEach { medication ->
|
||||||
|
medication.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.rebiopsyRequests.onEach { rebiopsyRequest ->
|
||||||
|
rebiopsyRequest.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
specimen = anonymize(specimen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.recommendations.onEach { recommendation ->
|
||||||
|
recommendation.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
diagnosis = anonymize(diagnosis)
|
||||||
|
ngsReport = anonymize(ngsReport)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.responses.onEach { response ->
|
||||||
|
response.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
therapy = anonymize(therapy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.studyInclusionRequests.onEach { studyInclusionRequest ->
|
||||||
|
studyInclusionRequest.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
reason = anonymize(reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.specimens.onEach { specimen ->
|
||||||
|
specimen.apply {
|
||||||
|
id = anonymize(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -28,6 +28,7 @@ import dev.dnpm.etl.processor.monitoring.RequestStatus
|
|||||||
import dev.dnpm.etl.processor.monitoring.RequestType
|
import dev.dnpm.etl.processor.monitoring.RequestType
|
||||||
import dev.dnpm.etl.processor.output.MtbFileSender
|
import dev.dnpm.etl.processor.output.MtbFileSender
|
||||||
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
|
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
|
||||||
|
import dev.dnpm.etl.processor.pseudonym.anonymizeContentWith
|
||||||
import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith
|
import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith
|
||||||
import org.apache.commons.codec.binary.Base32
|
import org.apache.commons.codec.binary.Base32
|
||||||
import org.apache.commons.codec.digest.DigestUtils
|
import org.apache.commons.codec.digest.DigestUtils
|
||||||
@ -55,6 +56,7 @@ class RequestProcessor(
|
|||||||
val pid = mtbFile.patient.id
|
val pid = mtbFile.patient.id
|
||||||
|
|
||||||
mtbFile pseudonymizeWith pseudonymizeService
|
mtbFile pseudonymizeWith pseudonymizeService
|
||||||
|
mtbFile anonymizeContentWith pseudonymizeService
|
||||||
|
|
||||||
val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile))
|
val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile))
|
||||||
|
|
||||||
|
@ -20,9 +20,10 @@
|
|||||||
package dev.dnpm.etl.processor.pseudonym
|
package dev.dnpm.etl.processor.pseudonym
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import de.ukw.ccc.bwhc.dto.MtbFile
|
import de.ukw.ccc.bwhc.dto.*
|
||||||
import org.assertj.core.api.Assertions.assertThat
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
|
import org.junit.jupiter.api.assertThrows
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.mockito.ArgumentMatchers
|
import org.mockito.ArgumentMatchers
|
||||||
import org.mockito.Mock
|
import org.mockito.Mock
|
||||||
@ -61,4 +62,76 @@ class ExtensionsTest {
|
|||||||
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
|
assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
|
||||||
|
doAnswer {
|
||||||
|
it.arguments[0]
|
||||||
|
"PSEUDO-ID"
|
||||||
|
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
|
||||||
|
|
||||||
|
doAnswer {
|
||||||
|
"TESTDOMAIN"
|
||||||
|
}.whenever(pseudonymizeService).prefix()
|
||||||
|
|
||||||
|
val mtbFile = fakeMtbFile()
|
||||||
|
|
||||||
|
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||||
|
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||||
|
|
||||||
|
val pattern = "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern()
|
||||||
|
val matcher = pattern.matcher(mtbFile.serialized())
|
||||||
|
|
||||||
|
assertThrows<IllegalStateException> {
|
||||||
|
matcher.find()
|
||||||
|
matcher.group()
|
||||||
|
}.also {
|
||||||
|
assertThat(it.message).isEqualTo("No match found")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun shouldRehashIdsWithPrefix(@Mock pseudonymizeService: PseudonymizeService) {
|
||||||
|
doAnswer {
|
||||||
|
it.arguments[0]
|
||||||
|
"PSEUDO-ID"
|
||||||
|
}.whenever(pseudonymizeService).patientPseudonym(ArgumentMatchers.anyString())
|
||||||
|
|
||||||
|
doAnswer {
|
||||||
|
"TESTDOMAIN"
|
||||||
|
}.whenever(pseudonymizeService).prefix()
|
||||||
|
|
||||||
|
val mtbFile = MtbFile.builder()
|
||||||
|
.withPatient(
|
||||||
|
Patient.builder()
|
||||||
|
.withId("1")
|
||||||
|
.withBirthDate("2000-08-08")
|
||||||
|
.withGender(Patient.Gender.MALE)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
.withConsent(
|
||||||
|
Consent.builder()
|
||||||
|
.withId("1")
|
||||||
|
.withStatus(Consent.Status.ACTIVE)
|
||||||
|
.withPatient("123")
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
.withEpisode(
|
||||||
|
Episode.builder()
|
||||||
|
.withId("1")
|
||||||
|
.withPatient("1")
|
||||||
|
.withPeriod(PeriodStart("2023-08-08"))
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
.build()
|
||||||
|
|
||||||
|
mtbFile.pseudonymizeWith(pseudonymizeService)
|
||||||
|
mtbFile.anonymizeContentWith(pseudonymizeService)
|
||||||
|
|
||||||
|
|
||||||
|
assertThat(mtbFile.episode.id)
|
||||||
|
// TESTDOMAIN<sha256(TESTDOMAIN-1)[0-41]>
|
||||||
|
.isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -92,7 +92,7 @@ class RequestProcessorTest {
|
|||||||
uuid = UUID.randomUUID().toString(),
|
uuid = UUID.randomUUID().toString(),
|
||||||
patientId = "TEST_12345678901",
|
patientId = "TEST_12345678901",
|
||||||
pid = "P1",
|
pid = "P1",
|
||||||
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
|
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
|
||||||
type = RequestType.MTB_FILE,
|
type = RequestType.MTB_FILE,
|
||||||
status = RequestStatus.SUCCESS,
|
status = RequestStatus.SUCCESS,
|
||||||
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
||||||
@ -151,7 +151,7 @@ class RequestProcessorTest {
|
|||||||
uuid = UUID.randomUUID().toString(),
|
uuid = UUID.randomUUID().toString(),
|
||||||
patientId = "TEST_12345678901",
|
patientId = "TEST_12345678901",
|
||||||
pid = "P1",
|
pid = "P1",
|
||||||
fingerprint = "xrysxpozhbs2lnrjgf3yq4fzj33kxr7xr5c2cbuskmelfdmckl3a",
|
fingerprint = "zdlzv5s5ydmd4ktw2v5piohegc4jcyrm6j66bq6tv2uxuerndmga",
|
||||||
type = RequestType.MTB_FILE,
|
type = RequestType.MTB_FILE,
|
||||||
status = RequestStatus.SUCCESS,
|
status = RequestStatus.SUCCESS,
|
||||||
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
processedAt = Instant.parse("2023-08-08T02:00:00Z")
|
||||||
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user