diff --git a/build.gradle.kts b/build.gradle.kts index 71a3559..97c02b9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -32,6 +32,7 @@ dependencies { implementation("org.jetbrains.kotlin:kotlin-reflect") implementation("org.flywaydb:flyway-database-postgresql") implementation("org.flywaydb:flyway-mysql") + implementation("org.springframework.integration:spring-integration-file") developmentOnly("org.springframework.boot:spring-boot-devtools") developmentOnly("org.springframework.boot:spring-boot-docker-compose") runtimeOnly("org.mariadb.jdbc:mariadb-java-client") diff --git a/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/config/AppIntegrationConfig.kt b/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/config/AppIntegrationConfig.kt new file mode 100644 index 0000000..78e2edf --- /dev/null +++ b/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/config/AppIntegrationConfig.kt @@ -0,0 +1,97 @@ +package dev.pcvolkmer.onco.grzmetadataprocessor.config + +import dev.pcvolkmer.onco.grzmetadataprocessor.data.File +import dev.pcvolkmer.onco.grzmetadataprocessor.data.FileRepository +import dev.pcvolkmer.onco.grzmetadataprocessor.data.FileType +import org.apache.tomcat.util.buf.HexUtils +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.boot.context.properties.ConfigurationProperties +import org.springframework.boot.context.properties.EnableConfigurationProperties +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.integration.dsl.IntegrationFlow +import org.springframework.integration.dsl.Pollers +import org.springframework.integration.file.dsl.Files +import org.springframework.util.Assert +import java.nio.file.Path +import java.security.DigestInputStream +import java.security.MessageDigest +import java.time.Duration +import kotlin.io.path.* +import kotlin.time.Duration.Companion.minutes +import kotlin.time.toJavaDuration + +@ConfigurationProperties(AppSourceFsProperties.NAME) +data class AppSourceFsProperties( + val directory: Path? = null, + val pollDelay: Duration = 1.minutes.toJavaDuration(), +) { + companion object { + const val NAME = "app.source.fs" + } +} + +@Configuration +@EnableConfigurationProperties(AppSourceFsProperties::class) +class AppIntegrationConfig { + + @Bean + @ConditionalOnProperty( + name = ["app.source.fs.directory"] + ) + fun fileInputFlow( + applicationFsProperties: AppSourceFsProperties, + fileRepository: FileRepository + ): IntegrationFlow { + val sourceDirectory = applicationFsProperties.directory + Assert.state(null != sourceDirectory && sourceDirectory.isDirectory()) { + "Property 'app.source.fs.active' is 'true' but source directory is not available" + } + return IntegrationFlow + .from( + Files.inboundAdapter(sourceDirectory!!.toFile()).useWatchService(true) + ) + .log() + .handle { msg -> + val path = Path(msg.payload.toString()) + val relativePath = applicationFsProperties.directory.relativize(Path(msg.payload.toString())).pathString + fileRepository.findByFilePath(relativePath).ifPresentOrElse({ + // File already present + }, { + fileRepository.save( + File( + filePath = relativePath, + labDataId = null, + fileChecksum = calcFileChecksum(path), + fileSizeInBytes = path.fileSize(), + fileType = getFileType(path), + ) + ) + }) + + } + .get() + } + + private fun calcFileChecksum(path: Path): String { + val messageDigest = MessageDigest.getInstance("SHA-256") + val digestInputStream = DigestInputStream(path.inputStream(), messageDigest) + digestInputStream.readAllBytes() + return HexUtils.toHexString(messageDigest.digest()) + } + + private fun getFileType(path: Path): FileType? { + return if (path.toString().lowercase().endsWith(".fastq.gz")) { + FileType.FASTQ + } else if (path.toString().lowercase().endsWith(".bed")) { + FileType.BED + } else if (path.toString().lowercase().endsWith(".bam")) { + FileType.BAM + } else if (path.toString().lowercase().endsWith(".vcf")) { + FileType.VCF + } else { + null + } + } + +} diff --git a/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/data/File.kt b/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/data/File.kt index 7b4b799..b361678 100644 --- a/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/data/File.kt +++ b/src/main/kotlin/dev/pcvolkmer/onco/grzmetadataprocessor/data/File.kt @@ -17,8 +17,8 @@ data class File( val labDataId: Long?, val filePath: String? = null, val fileType: FileType? = null, - var fileChecksum: String? = null, - var fileSizeInBytes: Long? = null, + var fileChecksum: String = "", + var fileSizeInBytes: Long = 0, ) { fun calcFileChecksum(): String { if (filePath == null) { @@ -26,7 +26,6 @@ data class File( } val path = Path.of(filePath) val messageDigest = MessageDigest.getInstance("SHA-256") - val digestInputStream = DigestInputStream(path.inputStream(), messageDigest) digestInputStream.readAllBytes() return HexUtils.toHexString(messageDigest.digest()) @@ -50,4 +49,5 @@ enum class FileType(val value: String) { interface FileRepository : CrudRepository { fun findByLabDataId(labDataId: Long): MutableList fun findByLabDataIdIsNull(): List + fun findByFilePath(filePath: String): Optional } diff --git a/src/main/resources/db/migrations/mariadb/V0_1_0_2__FileChecksums.sql b/src/main/resources/db/migrations/mariadb/V0_1_0_2__FileChecksums.sql new file mode 100644 index 0000000..85bd183 --- /dev/null +++ b/src/main/resources/db/migrations/mariadb/V0_1_0_2__FileChecksums.sql @@ -0,0 +1 @@ +ALTER TABLE tbl_file DROP INDEX file_checksum; diff --git a/src/main/resources/db/migrations/postgresql/V0_1_0_2__FileChecksums.sql b/src/main/resources/db/migrations/postgresql/V0_1_0_2__FileChecksums.sql new file mode 100644 index 0000000..e5306cc --- /dev/null +++ b/src/main/resources/db/migrations/postgresql/V0_1_0_2__FileChecksums.sql @@ -0,0 +1 @@ +ALTER TABLE tbl_file DROP CONSTRAINT tbl_file_file_checksum_key; diff --git a/src/main/resources/templates/unusedfiles.html b/src/main/resources/templates/unusedfiles.html index 1b00775..ecca840 100644 --- a/src/main/resources/templates/unusedfiles.html +++ b/src/main/resources/templates/unusedfiles.html @@ -40,7 +40,7 @@ @@ -50,10 +50,10 @@