commit 6ebf969d35c115300a2092987a89669639eceb4b Author: Gleb Koval Date: Fri Dec 29 18:43:54 2023 +0600 Initial implementations diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b63da45 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +.gradle +build/ +!gradle/wrapper/gradle-wrapper.jar +!**/src/main/**/build/ +!**/src/test/**/build/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/gradle.xml b/.idea/gradle.xml new file mode 100644 index 0000000..2a65317 --- /dev/null +++ b/.idea/gradle.xml @@ -0,0 +1,17 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/kotlinc.xml b/.idea/kotlinc.xml new file mode 100644 index 0000000..ae3f30a --- /dev/null +++ b/.idea/kotlinc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f6589e3 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..97ba7d0 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# TeamCity Executors - A new way to execute builds - Test Task +This is a small backup utility for uploading/restoring a local directory to/from +an AWS S3 bucket. + +## Assumptions +1. This test task is not interested in re-implementations of common libraries (AWS SDK, Clikt, Gradle Shadow, ...) +2. The last part (restoration of a single file) should be optimised so that only the part of the blob required for this + file is downloaded. +3. Only this tool is ever used to create backups, so S3 object keys are in the expected format, and ZIP files do not have + a comment in the *end of central directory* record (making it a predictable length of 22 bytes). + - EOCD64 should similarly not have a comment. + +## Design decisions +- Backups may be large, so we want to use multipart uploads if possible (< 100mb is recommended). + https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html + - The Java SDK has high-level support for this via [S3TransferManager](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/transfer/s3/S3TransferManager.html), + but unfortunately when the content is too small, the HTTP `Content-Length` is not automatically calculated resulting + in an error response from the API. + - I'm not sure whether this is intended behaviour or a bug, but decided to manually implement multipart uploads using + the Kotlin SDK instead anyway. + - **Note**: I could have just used a temporary file (with a known `Content-Length`), but I wanted to play around with + streams and kotlin concurrency a bit, which is why I went with the more scalable way using streams. +- Zip files are used so that the backups can be stored in a very common format which also provides compression. + - Java zip specification: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/zip/package-summary.html + - ZIP64 implementation is optional, but possible, so we'll handle it. + - The End of Central Directory record is also useful for locating the exact positions of files in the blob, so that + single files can be downloaded using the HTTP `Range` header. + - End of Central Directory comment must be blank (assumption 3). Otherwise, the EOCD length is unpredictable and so we + cannot use just a single request the HTTP `Range` header to get the entire EOCD. + - Alternative: use S3 object tags to store the EOCD offset, but this way the blob itself would no longer contain all + the data required by this backup tool. + - Alternative: store the EOCD offset in the EOCD comment or the beginning of the file, but this makes a similar, but + more strict assumption anyway. + +## Instructions +Create a backup utility that copies files to AWS S3. The utility should take a local directory with files and put it into AWS S3 in the form of one blob file. The reverse behavior should also be possible. We should be able to specify what backup we want to restore and where it should put the files on the local system. The utility should be able to restore one individual file from a backup. \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..d3f475d --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,29 @@ +plugins { + kotlin("jvm") version "1.9.21" + id("com.github.johnrengelman.shadow") version "8.1.1" +} + +group = "net.koval" +version = "1.0-SNAPSHOT" + +repositories { + mavenCentral() +} + +dependencies { + implementation("aws.sdk.kotlin:s3:1.0.25") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3") + testImplementation("org.jetbrains.kotlin:kotlin-test") +} + +tasks.test { + useJUnitPlatform() +} +kotlin { + jvmToolchain(17) +} +tasks.jar { + manifest { + attributes("Main-Class" to "backup.MainKt") + } +} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..7fc6f1f --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +kotlin.code.style=official diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..249e583 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..eef1617 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Dec 26 09:55:09 GMT 2023 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..1b6c787 --- /dev/null +++ b/gradlew @@ -0,0 +1,234 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..ac1b06f --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..b206040 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,5 @@ +plugins { + id("org.gradle.toolchains.foojay-resolver-convention") version "0.5.0" +} +rootProject.name = "teamcity-executors-test-task" + diff --git a/src/main/kotlin/backup/BackupClient.kt b/src/main/kotlin/backup/BackupClient.kt new file mode 100644 index 0000000..713bcdd --- /dev/null +++ b/src/main/kotlin/backup/BackupClient.kt @@ -0,0 +1,269 @@ +package backup + +import aws.sdk.kotlin.services.s3.* +import aws.sdk.kotlin.services.s3.model.CompletedMultipartUpload +import aws.sdk.kotlin.services.s3.model.CompletedPart +import aws.sdk.kotlin.services.s3.model.GetObjectRequest +import aws.sdk.kotlin.services.s3.model.UploadPartResponse +import aws.smithy.kotlin.runtime.content.ByteStream +import aws.smithy.kotlin.runtime.content.toByteArray +import aws.smithy.kotlin.runtime.content.toInputStream +import kotlinx.coroutines.* +import ziputils.* +import ziputils.CentralDirectoryFileHeader +import ziputils.EndOfCentralDirectoryLocator +import ziputils.EndOfCentralDirectoryRecord +import ziputils.EndOfCentralDirectoryRecord64 +import java.io.* +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.attribute.BasicFileAttributeView +import java.time.Instant +import java.util.zip.ZipEntry +import java.util.zip.ZipInputStream +import java.util.zip.ZipOutputStream +import kotlin.io.path.createDirectory + +class BackupClient( + private val s3: S3Client, + private val bucketName: String, + private val bufSize: Int = 1024 * 1024 * 100 +) { + suspend fun upload(file: File) = coroutineScope { + val backupKey = "${file.name}/${Instant.now()}.zip" + PipedInputStream().use { inputStream -> + val outputStream = PipedOutputStream(inputStream) + val zipper = launch(Dispatchers.IO) { + file.compressToZip(outputStream) + } + + val data = ByteArray(bufSize) + val initialRead = inputStream.readNBytes(data, 0, bufSize) + if (initialRead == bufSize) { + // Large upload, use multipart + // TODO: multipart uploads can be asynchronous, which would improve + // performance a little bit for big uploads. + val upload = s3.createMultipartUpload { + bucket = bucketName + key = backupKey + } + try { + val uploadParts = mutableListOf() + var number = 1 + var bytesRead = initialRead + while (bytesRead > 0) { + val part = s3.uploadPart { + bucket = bucketName + key = backupKey + partNumber = number + uploadId = upload.uploadId + body = ByteStream.fromBytes(data.take(bytesRead)) + }.asCompletedPart(number) + uploadParts.add(part) + number++ + bytesRead = inputStream.readNBytes(data, 0, bufSize) + } + s3.completeMultipartUpload { + bucket = bucketName + key = backupKey + uploadId = upload.uploadId + multipartUpload = CompletedMultipartUpload { + parts = uploadParts + } + } + } catch (e: Exception) { + s3.abortMultipartUpload { + bucket = bucketName + key = backupKey + uploadId = upload.uploadId + } + throw e + } + } else { + // Small upload, use single request + s3.putObject { + bucket = bucketName + key = backupKey + body = ByteStream.fromBytes(data.take(initialRead)) + } + } + zipper.join() // Should be instant + } + backupKey + } + + suspend fun restore(destination: Path, backupKey: String) = coroutineScope { + val req = GetObjectRequest { + bucket = bucketName + key = backupKey + } + s3.getObject(req) { resp -> + ZipInputStream( + resp.body?.toInputStream() + ?: throw IOException("S3 response is missing body") + ).use { zipStream -> + zipStream.decompress { destination.resolve(it) } + } + } + } + + suspend fun restoreFile(destination: Path, backupKey: String, fileName: String) = coroutineScope { + // For byte ranges refer to https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT + val eocdReq = GetObjectRequest { + bucket = bucketName + key = backupKey + // Assumption: EOCD has an empty comment + // Assumption: Backups are at least 22 + 20 (= 42) bytes. Only COMPLETELY empty backups can be smaller, + // in which case this function would error anyway, so it should be fine to have this edge-case. + range = "bytes=-${EndOfCentralDirectoryRecord.SIZE + EndOfCentralDirectoryLocator.SIZE}" + } + val eocdBytes = s3.getObject(eocdReq) { resp -> + val bytes = resp.body?.toByteArray() ?: throw IOException("S3 response is missing body") + bytes + } + val eocd = EndOfCentralDirectoryRecord.fromByteArray(eocdBytes, EndOfCentralDirectoryLocator.SIZE) + val eocd64 = if (eocd.eocd64Required()) { + val locator = EndOfCentralDirectoryLocator.fromByteArray(eocdBytes, 0) + val eocd64Req = GetObjectRequest { + bucket = bucketName + key = backupKey + range = "bytes=${locator.endOfCentralDirectory64Offset}-" + } + s3.getObject(eocd64Req) { resp -> + val bytes = resp.body?.toByteArray() ?: throw IOException("S3 response is missing body") + EndOfCentralDirectoryRecord64.fromByteArray(bytes, 0) + } + } else null + val cenOffset = if (eocd.centralDirectoryOffset == 0xffffffffU && eocd64 != null) { + eocd64.centralDirectoryOffset + } else eocd.centralDirectoryOffset.toULong() + val censReq = GetObjectRequest { + bucket = bucketName + key = backupKey + // We only know where to fetch until if we've also fetched EOCD64 (which isn't always the case). + // So just over-fetch a little bit, these headers aren't that big anyway. + range = "bytes=${cenOffset}-" + } + val cen = s3.getObject(censReq) { resp -> + val bytes = resp.body?.toByteArray() ?: throw IOException("S3 response is missing body") + var p = 0 + while (p < bytes.size) { + try { + val cen = CentralDirectoryFileHeader.fromByteArray(bytes, p) + p += cen.size + if (cen.fileName == fileName) return@getObject cen + } catch (_: InvalidSignatureException) { + return@getObject null + } + } + null + } ?: throw FileNotFoundException("File '${fileName}' not found in backup") + + val localHeaderOffset = cen.extraFieldRecords.firstNotNullOfOrNull { + if (it is Zip64ExtraFieldRecord && it.localHeaderOffset != null) it else null + }?.localHeaderOffset ?: cen.localHeaderOffset.toULong() + val compressedSize = cen.extraFieldRecords.firstNotNullOfOrNull { + if (it is Zip64ExtraFieldRecord && it.compressedSize != null) it else null + }?.compressedSize ?: cen.compressedSize.toULong() + val req = GetObjectRequest { + bucket = bucketName + key = backupKey + range = "bytes=${localHeaderOffset}-${ + // Add CEN min size (46 bytes) so that the next CEN / LOC header is seen by the ZipInputStream + // and so it can see the current entry has stopped. + // Note: yes ZipInputStream should know the exact content length from the LOC, but it was still sending + // EOF errors. Perhaps due to fetching multiples of a power of two, or something else. But this helps. + localHeaderOffset + cen.size.toULong() + compressedSize + CentralDirectoryFileHeader.SIZE.toULong() + }" + } + s3.getObject(req) { resp -> + ZipInputStream( + resp.body?.toInputStream() + ?: throw IOException("S3 response is missing body") + ).use { zipStream -> + zipStream.decompress { name -> destination.resolve(name.takeLastWhile { it != '/' }) } + } + } + } +} + +private fun UploadPartResponse.asCompletedPart(number: Int): CompletedPart { + val part = this + return CompletedPart { + partNumber = number + eTag = part.eTag + checksumSha256 = part.checksumSha256 + checksumSha1 = part.checksumSha1 + checksumCrc32 = part.checksumCrc32 + checksumCrc32C = part.checksumCrc32C + } +} + +private fun ByteArray.take(n: Int) = + if (n == size) this // No copy + else asList().subList(0, n).toByteArray() // TODO: One copy (toByteArray()), not sure how to do 0 copies here + +private fun File.compressToZip(outputStream: OutputStream) = ZipOutputStream(outputStream).use { zipStream -> + val parentDir = this.absoluteFile.parent + "/" + val fileQueue = ArrayDeque() + fileQueue.add(this) + fileQueue.forEach { subFile -> + val path = subFile.absolutePath.removePrefix(parentDir) + val subFiles = subFile.listFiles() + if (subFiles != null) { // Is a directory + val entry = ZipEntry("$path/") + setZipAttributes(entry, subFile.toPath()) + zipStream.putNextEntry(entry) + fileQueue.addAll(subFiles) + } else { // Otherwise, treat it as a file + BufferedInputStream(subFile.inputStream()).use { origin -> + val entry = ZipEntry(path) + setZipAttributes(entry, subFile.toPath()) + zipStream.putNextEntry(entry) + origin.copyTo(zipStream) + } + } + } +} + +private fun ZipInputStream.decompress( + bufSize: Int = 1024 * 1024, + entryNameToPath: (String) -> Path +) { + var entry = this.nextEntry + while (entry != null) { + val path = entryNameToPath(entry.name) + if (entry.isDirectory) { + path.createDirectory() + } else { + val buf = ByteArray(bufSize) + path.toFile().outputStream().use { fileStream -> + var bytesRead = this.read(buf) + while (bytesRead > 0) { + fileStream.write(buf, 0, bytesRead) + bytesRead = this.read(buf) + } + } + } + applyZipAttributes(entry, path) + entry = this.nextEntry + } +} + +private fun setZipAttributes(entry: ZipEntry, path: Path) { + try { + val attrs = Files.getFileAttributeView(path, BasicFileAttributeView::class.java).readAttributes() + entry.setCreationTime(attrs.creationTime()) + entry.setLastModifiedTime(attrs.lastModifiedTime()) + entry.setLastAccessTime(attrs.lastAccessTime()) + } catch (_: IOException) { + } +} + +private fun applyZipAttributes(entry: ZipEntry, path: Path) { + try { + val attrs = Files.getFileAttributeView(path, BasicFileAttributeView::class.java) + attrs.setTimes(entry.lastModifiedTime, entry.lastAccessTime, entry.creationTime) + } catch (_: IOException) { + } +} \ No newline at end of file diff --git a/src/main/kotlin/backup/main.kt b/src/main/kotlin/backup/main.kt new file mode 100644 index 0000000..f022828 --- /dev/null +++ b/src/main/kotlin/backup/main.kt @@ -0,0 +1,13 @@ +package backup + +import aws.sdk.kotlin.services.s3.S3Client +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.runBlocking +import java.io.File +import kotlin.io.path.Path + +fun main() = runBlocking { + S3Client.fromEnvironment().use { s3 -> + val backupClient = BackupClient(s3, "teamcity-executors-test-task", 1024 * 1024 * 10) + } +} \ No newline at end of file diff --git a/src/main/kotlin/ziputils/CentralDirectoryFileHeader.kt b/src/main/kotlin/ziputils/CentralDirectoryFileHeader.kt new file mode 100644 index 0000000..6cee6c0 --- /dev/null +++ b/src/main/kotlin/ziputils/CentralDirectoryFileHeader.kt @@ -0,0 +1,93 @@ +package ziputils + +import java.nio.ByteBuffer +import java.nio.ByteOrder + +internal class CentralDirectoryFileHeader( + val compressedSize: UInt, + val uncompressedSize: UInt, + val nameLength: UShort, + val extraFieldLength: UShort, + val commentLength: UShort, + val disk: UShort, + val localHeaderOffset: UInt, + val fileName: String, + val extraFieldRecords: List +) { + val size: Int + get() = SIZE + nameLength.toInt() + extraFieldLength.toInt() + commentLength.toInt() + + companion object { + const val SIGNATURE = 0x02014b50U + const val SIZE = 46 + + /** + * Create CentralDirectoryFileHeader from raw byte data. + * @throws InvalidDataException provided ByteArray is not a supported CEN. + */ + @Throws(InvalidDataException::class) + fun fromByteArray(data: ByteArray, offset: Int): CentralDirectoryFileHeader { + if (data.size - offset < SIZE) { + throw InvalidDataException("CEN must be at least 46 bytes") + } + val buf = ByteBuffer.wrap(data, offset, 46).order(ByteOrder.LITTLE_ENDIAN) + if (buf.getInt().toUInt() != SIGNATURE) { + throw InvalidSignatureException("Invalid signature") + } + + val extraFieldRecords = mutableListOf() + val nameLength = buf.getShort(offset + 28).toUShort() + buf.position(offset + 20) + val cen = CentralDirectoryFileHeader( + compressedSize = buf.getInt().toUInt(), + uncompressedSize = buf.getInt().toUInt(), + nameLength = nameLength + .also { buf.position(offset + 30) }, + extraFieldLength = buf.getShort().toUShort(), + commentLength = buf.getShort().toUShort(), + disk = buf.getShort().toUShort() + .also { buf.position(offset + 42) }, + localHeaderOffset = buf.getInt().toUInt(), + fileName = String(data.sliceArray(offset + SIZE.. 0) { + val id = extraFieldsBuf.getShort().toUShort() + val size = extraFieldsBuf.getShort().toUShort() + extraFieldRecords.add(when (id) { + Zip64ExtraFieldRecord.ID -> { + Zip64ExtraFieldRecord( + size, + if (cen.uncompressedSize == 0xffffffffU) { + extraFieldsBuf.getLong().toULong() + } else null, + if (cen.compressedSize == 0xffffffffU) { + extraFieldsBuf.getLong().toULong() + } else null, + if (cen.localHeaderOffset == 0xffffffffU) { + extraFieldsBuf.getLong().toULong() + } else null, + if (cen.disk == 0xffffU.toUShort()) { + extraFieldsBuf.getInt().toUInt() + } else null + ) + } + else -> { + extraFieldsBuf.position(extraFieldsBuf.position() + size.toInt()) + ExtraFieldRecord(id, size) + } + }) + } + + return cen + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/ziputils/EndOfCentralDirectoryLocator.kt b/src/main/kotlin/ziputils/EndOfCentralDirectoryLocator.kt new file mode 100644 index 0000000..4a6a075 --- /dev/null +++ b/src/main/kotlin/ziputils/EndOfCentralDirectoryLocator.kt @@ -0,0 +1,25 @@ +package ziputils + +import java.nio.ByteBuffer +import java.nio.ByteOrder + +internal class EndOfCentralDirectoryLocator( + val endOfCentralDirectory64Offset: ULong +) { + companion object { + const val SIGNATURE = 0x07064b50U + const val SIZE = 20 + @Throws(InvalidDataException::class) + fun fromByteArray(data: ByteArray, offset: Int): EndOfCentralDirectoryLocator { + if (data.size - offset < SIZE) { + throw InvalidDataException("EOCD64 locator must be at least 20 bytes") + } + val buf = ByteBuffer.wrap(data, offset, SIZE).order(ByteOrder.LITTLE_ENDIAN) + if (buf.getInt().toUInt() != SIGNATURE) { + throw InvalidSignatureException("Invalid signature") + } + buf.position(offset + 8) + return EndOfCentralDirectoryLocator(buf.getLong().toULong()) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord.kt b/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord.kt new file mode 100644 index 0000000..e30154b --- /dev/null +++ b/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord.kt @@ -0,0 +1,38 @@ +package ziputils + +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Partial End of Central Directory record class. + * Only supports data required by the backup tool. + */ +internal class EndOfCentralDirectoryRecord( + val centralDirectoryOffset: UInt +) { + fun eocd64Required(): Boolean = + centralDirectoryOffset == 0xffffffffU + + companion object { + const val SIGNATURE = 0x06054b50U + const val SIZE = 22 + /** + * Create EndOfCentralDirectoryRecord from raw byte data. + * @throws InvalidDataException provided ByteArray is not a supported EOCD64. + */ + @Throws(InvalidDataException::class) + fun fromByteArray(data: ByteArray, offset: Int): EndOfCentralDirectoryRecord { + if (data.size - offset < SIZE) { + throw InvalidDataException("EOCD must be at least 22 bytes") + } + val buf = ByteBuffer.wrap(data, offset, SIZE).order(ByteOrder.LITTLE_ENDIAN) + if (buf.getInt().toUInt() != SIGNATURE) { + throw InvalidSignatureException("Invalid signature") + } + buf.position(offset + 16) + return EndOfCentralDirectoryRecord( + centralDirectoryOffset = buf.getInt().toUInt() + ) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord64.kt b/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord64.kt new file mode 100644 index 0000000..8312492 --- /dev/null +++ b/src/main/kotlin/ziputils/EndOfCentralDirectoryRecord64.kt @@ -0,0 +1,35 @@ +package ziputils + +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Partial End of Central Directory record (ZIP64) class. + * Only supports data required by the backup tool. + */ +internal class EndOfCentralDirectoryRecord64( + val centralDirectoryOffset: ULong +) { + companion object { + const val SIGNATURE = 0x06064b50U + const val SIZE = 56 + /** + * Create EndOfCentralDirectoryRecord64 from raw byte data. + * @throws InvalidDataException provided ByteArray is not a supported EOCD. + */ + @Throws(InvalidDataException::class) + fun fromByteArray(data: ByteArray, offset: Int): EndOfCentralDirectoryRecord64 { + if (data.size - offset < SIZE) { + throw InvalidDataException("EOCD64 must be at least 56 bytes") + } + val buf = ByteBuffer.wrap(data, offset, SIZE).order(ByteOrder.LITTLE_ENDIAN) + if (buf.getInt().toUInt() != SIGNATURE) { + throw InvalidSignatureException("Invalid signature") + } + buf.position(offset + 48) + return EndOfCentralDirectoryRecord64( + centralDirectoryOffset = buf.getLong().toULong() + ) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/ziputils/Exceptions.kt b/src/main/kotlin/ziputils/Exceptions.kt new file mode 100644 index 0000000..8043654 --- /dev/null +++ b/src/main/kotlin/ziputils/Exceptions.kt @@ -0,0 +1,4 @@ +package ziputils + +class InvalidDataException(message: String): Exception(message) +class InvalidSignatureException(message: String): Exception(message) diff --git a/src/main/kotlin/ziputils/ExtraFieldRecord.kt b/src/main/kotlin/ziputils/ExtraFieldRecord.kt new file mode 100644 index 0000000..4c24b07 --- /dev/null +++ b/src/main/kotlin/ziputils/ExtraFieldRecord.kt @@ -0,0 +1,6 @@ +package ziputils + +internal open class ExtraFieldRecord( + val id: UShort, + val size: UShort +) \ No newline at end of file diff --git a/src/main/kotlin/ziputils/README.md b/src/main/kotlin/ziputils/README.md new file mode 100644 index 0000000..d47a9b7 --- /dev/null +++ b/src/main/kotlin/ziputils/README.md @@ -0,0 +1,6 @@ +# ZipUtils + +These are **internal** utility classes for reading zip file metadata. They only implement what is required for the +backup tool, and no more. + +Specifically, we are looking at the [ZIP v6.3.9 specification](https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT). \ No newline at end of file diff --git a/src/main/kotlin/ziputils/Zip64ExtraFieldRecord.kt b/src/main/kotlin/ziputils/Zip64ExtraFieldRecord.kt new file mode 100644 index 0000000..f437888 --- /dev/null +++ b/src/main/kotlin/ziputils/Zip64ExtraFieldRecord.kt @@ -0,0 +1,13 @@ +package ziputils + +internal class Zip64ExtraFieldRecord( + size: UShort, + val uncompressedSize: ULong?, + val compressedSize: ULong?, + val localHeaderOffset: ULong?, + val disk: UInt? +): ExtraFieldRecord(ID, size) { + companion object { + const val ID: UShort = 0x0001U + } +} \ No newline at end of file