1
0
mirror of https://github.com/pcvolkmer/etl-processor.git synced 2025-07-01 22:22:53 +00:00

Initial commit

This commit is contained in:
2023-07-24 18:50:12 +02:00
commit 05149bac0b
22 changed files with 1703 additions and 0 deletions

View File

@ -0,0 +1,26 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.pseudonym;
public interface Generator {
String generate(String id);
}

View File

@ -0,0 +1,41 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.pseudonym;
import java.net.URI;
public class GpasPseudonymGenerator implements Generator {
private final URI uri;
private final String target;
public GpasPseudonymGenerator(URI uri, String target) {
this.uri = uri;
this.target = target;
}
@Override
public String generate(String id) {
// TODO Implement this
return "?";
}
}

View File

@ -0,0 +1,31 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.boot.runApplication
@SpringBootApplication
class EtlProcessorApplication
fun main(args: Array<String>) {
runApplication<EtlProcessorApplication>(*args)
}

View File

@ -0,0 +1,31 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor
import org.springframework.boot.builder.SpringApplicationBuilder
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer
class ServletInitializer : SpringBootServletInitializer() {
override fun configure(application: SpringApplicationBuilder): SpringApplicationBuilder {
return application.sources(EtlProcessorApplication::class.java)
}
}

View File

@ -0,0 +1,77 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.config
import org.springframework.boot.context.properties.ConfigurationProperties
import java.net.URI
import java.net.URL
@ConfigurationProperties(AppConfigProperties.NAME)
data class AppConfigProperties(
var bwhc_uri: String?,
var pseudonymizer: Pseudonymizer = Pseudonymizer.BUILDIN
) {
companion object {
const val NAME = "app"
}
}
@ConfigurationProperties(PseudonymizeConfigProperties.NAME)
data class PseudonymizeConfigProperties(
val prefix: String = "UNKNOWN",
) {
companion object {
const val NAME = "app.pseudonymize"
}
}
@ConfigurationProperties(GPasConfigProperties.NAME)
data class GPasConfigProperties(
val uri: String?,
val target: String = "etl-processor"
) {
companion object {
const val NAME = "app.pseudonymize.gpas"
}
}
@ConfigurationProperties(RestTargetProperties.NAME)
data class RestTargetProperties(
val uri: String?,
) {
companion object {
const val NAME = "app.rest"
}
}
@ConfigurationProperties(KafkaTargetProperties.NAME)
data class KafkaTargetProperties(
val topic: String = "etl-processor",
val servers: String = ""
) {
companion object {
const val NAME = "app.kafka"
}
}
enum class Pseudonymizer {
BUILDIN,
GPAS
}

View File

@ -0,0 +1,82 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.config
import com.fasterxml.jackson.databind.ObjectMapper
import dev.dnpm.etl.processor.output.KafkaMtbFileSender
import dev.dnpm.etl.processor.output.MtbFileSender
import dev.dnpm.etl.processor.output.RestMtbFileSender
import dev.dnpm.etl.processor.pseudonym.AnonymizingGenerator
import dev.dnpm.etl.processor.pseudonym.Generator
import dev.dnpm.etl.processor.pseudonym.GpasPseudonymGenerator
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty
import org.springframework.boot.context.properties.EnableConfigurationProperties
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
import org.springframework.kafka.core.KafkaTemplate
import java.net.URI
@Configuration
@EnableConfigurationProperties(
value = [
AppConfigProperties::class,
PseudonymizeConfigProperties::class,
GPasConfigProperties::class,
RestTargetProperties::class,
KafkaTargetProperties::class
]
)
class AppConfiguration {
@ConditionalOnProperty(value = ["app.pseudonymizer"], havingValue = "GPAS")
@Bean
fun gpasPseudonymGenerator(configProperties: GPasConfigProperties): Generator {
return GpasPseudonymGenerator(URI.create(configProperties.uri!!), configProperties.target)
}
@ConditionalOnProperty(value = ["app.pseudonymizer"], havingValue = "BUILDIN", matchIfMissing = true)
@Bean
fun buildinPseudonymGenerator(): Generator {
return AnonymizingGenerator()
}
@Bean
fun pseudonymizeService(generator: Generator, pseudonymizeConfigProperties: PseudonymizeConfigProperties): PseudonymizeService {
return PseudonymizeService(generator, pseudonymizeConfigProperties)
}
@ConditionalOnProperty(value = ["app.rest.uri"])
@Bean
fun restMtbFileSender(restTargetProperties: RestTargetProperties): MtbFileSender {
return RestMtbFileSender(restTargetProperties)
}
@ConditionalOnProperty(value = ["app.kafka.topic", "app.kafka.servers"])
@Bean
fun kafkaMtbFileSender(
kafkaTemplate: KafkaTemplate<String, String>,
objectMapper: ObjectMapper
): MtbFileSender {
return KafkaMtbFileSender(kafkaTemplate, objectMapper)
}
}

View File

@ -0,0 +1,46 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.output
import com.fasterxml.jackson.databind.ObjectMapper
import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.config.KafkaTargetProperties
import org.slf4j.LoggerFactory
import org.springframework.kafka.core.KafkaTemplate
class KafkaMtbFileSender(
private val kafkaTemplate: KafkaTemplate<String, String>,
private val objectMapper: ObjectMapper
) : MtbFileSender {
private val logger = LoggerFactory.getLogger(KafkaMtbFileSender::class.java)
override fun send(mtbFile: MtbFile): Boolean {
return try {
kafkaTemplate.sendDefault(objectMapper.writeValueAsString(mtbFile))
logger.debug("Sent file via KafkaMtbFileSender")
true
} catch (e: Exception) {
logger.error("An error occured sending to kafka", e)
false
}
}
}

View File

@ -0,0 +1,28 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.output
import de.ukw.ccc.bwhc.dto.MtbFile
interface MtbFileSender {
fun send(mtbFile: MtbFile): Boolean
}

View File

@ -0,0 +1,62 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.output
import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.config.RestTargetProperties
import org.slf4j.LoggerFactory
import org.springframework.http.HttpEntity
import org.springframework.http.HttpHeaders
import org.springframework.http.MediaType
import org.springframework.web.client.RestClientException
import org.springframework.web.client.RestTemplate
class RestMtbFileSender(private val restTargetProperties: RestTargetProperties) : MtbFileSender {
private val logger = LoggerFactory.getLogger(RestMtbFileSender::class.java)
private val restTemplate = RestTemplate()
override fun send(mtbFile: MtbFile): Boolean {
try {
val headers = HttpHeaders()
headers.contentType = MediaType.APPLICATION_JSON
val entityReq = HttpEntity(mtbFile, headers)
val response = restTemplate.postForEntity(
restTargetProperties.uri!!,
entityReq,
String::class.java
)
if (!response.statusCode.is2xxSuccessful) {
logger.warn("Error sending to remote system: {}", response.body)
return false
}
logger.debug("Sent file via RestMtbFileSender")
return true
} catch (e: IllegalArgumentException) {
logger.error("Not a valid URI to export to: '{}'", restTargetProperties.uri!!)
} catch (e: RestClientException) {
logger.info(restTargetProperties.uri!!.toString())
logger.error("Cannot send data to remote system", e)
}
return false
}
}

View File

@ -0,0 +1,34 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.pseudonym
import org.apache.commons.codec.binary.Base32
import org.apache.commons.codec.digest.DigestUtils
class AnonymizingGenerator : Generator {
override fun generate(id: String): String {
return Base32().encodeAsString(DigestUtils.sha256(id))
.substring(0..41)
.lowercase()
}
}

View File

@ -0,0 +1,60 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.pseudonym
import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.config.PseudonymizeConfigProperties
class PseudonymizeService(
private val generator: Generator,
private val configProperties: PseudonymizeConfigProperties
) {
fun pseudonymize(mtbFile: MtbFile): MtbFile {
val patientPseudonym = "${configProperties.prefix}_${generator.generate(mtbFile.patient.id)}"
mtbFile.episode.patient = patientPseudonym
mtbFile.carePlans.forEach { it.patient = patientPseudonym }
mtbFile.patient.id = patientPseudonym
mtbFile.claims.forEach { it.patient = patientPseudonym }
mtbFile.consent.patient = patientPseudonym
mtbFile.claimResponses.forEach { it.patient = patientPseudonym }
mtbFile.diagnoses.forEach { it.patient = patientPseudonym }
mtbFile.ecogStatus.forEach { it.patient = patientPseudonym }
mtbFile.familyMemberDiagnoses.forEach { it.patient = patientPseudonym }
mtbFile.geneticCounsellingRequests.forEach { it.patient = patientPseudonym }
mtbFile.histologyReevaluationRequests.forEach { it.patient = patientPseudonym }
mtbFile.histologyReports.forEach { it.patient = patientPseudonym }
mtbFile.lastGuidelineTherapies.forEach { it.patient = patientPseudonym }
mtbFile.molecularPathologyFindings.forEach { it.patient = patientPseudonym }
mtbFile.molecularTherapies.forEach { it.history.forEach { it.patient = patientPseudonym } }
mtbFile.ngsReports.forEach { it.patient = patientPseudonym }
mtbFile.previousGuidelineTherapies.forEach { it.patient = patientPseudonym }
mtbFile.rebiopsyRequests.forEach { it.patient = patientPseudonym }
mtbFile.recommendations.forEach { it.patient = patientPseudonym }
mtbFile.recommendations.forEach { it.patient = patientPseudonym }
mtbFile.responses.forEach { it.patient = patientPseudonym }
mtbFile.specimens.forEach { it.patient = patientPseudonym }
mtbFile.specimens.forEach { it.patient = patientPseudonym }
return mtbFile
}
}

View File

@ -0,0 +1,52 @@
/*
* This file is part of ETL-Processor
*
* Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package dev.dnpm.etl.processor.web
import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.output.MtbFileSender
import dev.dnpm.etl.processor.output.RestMtbFileSender
import dev.dnpm.etl.processor.pseudonym.PseudonymizeService
import org.slf4j.LoggerFactory
import org.springframework.web.bind.annotation.PostMapping
import org.springframework.web.bind.annotation.RequestBody
import org.springframework.web.bind.annotation.RestController
@RestController
class MtbFileController(
private val pseudonymizeService: PseudonymizeService,
private val senders: List<MtbFileSender>
) {
private val logger = LoggerFactory.getLogger(MtbFileController::class.java)
@PostMapping(path = ["/mtbfile"])
fun mtbFile(@RequestBody mtbFile: MtbFile) {
val pseudonymized = pseudonymizeService.pseudonymize(mtbFile)
senders.forEach {
val success = it.send(pseudonymized)
if (success) {
logger.info("Sent file for Patient '{}' using '{}'", pseudonymized.patient.id, it.javaClass.simpleName)
} else {
logger.error("Error sending file for Patient '{}' using '{}'", pseudonymized.patient.id, it.javaClass.simpleName)
}
}
}
}

View File

@ -0,0 +1,5 @@
spring:
kafka:
bootstrap-servers: ${app.kafka.servers}
template:
default-topic: ${app.kafka.topic}