From 6c948c96309f8ea433ec83bcb7b94157ecb1e552 Mon Sep 17 00:00:00 2001 From: Gleb Koval Date: Fri, 1 Dec 2023 19:24:26 +0000 Subject: [PATCH] Initial library implementations and tests --- .github/workflows/main.yaml | 9 ++ .idea/gradle.xml | 1 + .idea/misc.xml | 6 +- README.md | 2 + build.gradle.kts | 3 +- src/main/kotlin/tinyvm/Commit.kt | 33 +++++++ src/main/kotlin/tinyvm/Extensions.kt | 6 ++ src/main/kotlin/tinyvm/Repository.kt | 90 ++++++++++++++++++ src/main/kotlin/tinyvm/Tree.kt | 13 +++ src/test/kotlin/tinyvm/RepositoryTest.kt | 111 +++++++++++++++++++++++ 10 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/main.yaml create mode 100644 src/main/kotlin/tinyvm/Commit.kt create mode 100644 src/main/kotlin/tinyvm/Extensions.kt create mode 100644 src/main/kotlin/tinyvm/Repository.kt create mode 100644 src/main/kotlin/tinyvm/Tree.kt create mode 100644 src/test/kotlin/tinyvm/RepositoryTest.kt diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..1c9df4c --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,9 @@ +name: Main Workflow +on: + pull_request: + branches: + - main + push: + branches: + - main +jobs: diff --git a/.idea/gradle.xml b/.idea/gradle.xml index f9163b4..ce1c62c 100644 --- a/.idea/gradle.xml +++ b/.idea/gradle.xml @@ -1,5 +1,6 @@ + \ No newline at end of file diff --git a/README.md b/README.md index 2bf8db3..c3d3a0b 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ This is a small project to implement a subset of git's functionality in Kotlin and was created using the instructions below as part of my application to the JetBrains internship project "TeamCity support for Gitea". +The package is named `tinyvm` for 'tiny version manager'. + ## Assumptions Since this is an internship application project, I have assumed that a minimal usage of external libraries is preferred, diff --git a/build.gradle.kts b/build.gradle.kts index 58603c7..5f55b92 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,5 +1,6 @@ plugins { kotlin("jvm") version "1.9.21" + id("org.jmailen.kotlinter") version "4.1.0" } group = "net.koval" @@ -18,5 +19,5 @@ tasks.test { } kotlin { - jvmToolchain(8) + jvmToolchain(17) } \ No newline at end of file diff --git a/src/main/kotlin/tinyvm/Commit.kt b/src/main/kotlin/tinyvm/Commit.kt new file mode 100644 index 0000000..9d9172a --- /dev/null +++ b/src/main/kotlin/tinyvm/Commit.kt @@ -0,0 +1,33 @@ +package tinyvm + +import java.security.MessageDigest +import java.time.Instant + +abstract class Object( + val type: String, +) { + abstract val data: String + fun hash(): String = MessageDigest + .getInstance("SHA-1") + .digest("$type ${data.length}\u0000$data".toByteArray()) + .toHex() +} + +class Commit( + val tree: Tree, + val author: Author, + val message: String, + val timestamp: Instant +) : Object("commit") { + // Use \n\n for end of header in-case additional metadata is implemented in the future. + override val data: String + get() = "tree ${tree.hash()}\nauthor $author\ntimestamp ${timestamp.epochSecond}\n\n$message" +} + +data class Author( + val name: String, + val email: String +) { + override fun toString(): String = + "$name <$email>" +} \ No newline at end of file diff --git a/src/main/kotlin/tinyvm/Extensions.kt b/src/main/kotlin/tinyvm/Extensions.kt new file mode 100644 index 0000000..b7cc489 --- /dev/null +++ b/src/main/kotlin/tinyvm/Extensions.kt @@ -0,0 +1,6 @@ +package tinyvm + +import java.util.HexFormat + +fun ByteArray.toHex(): String = + HexFormat.of().formatHex(this) diff --git a/src/main/kotlin/tinyvm/Repository.kt b/src/main/kotlin/tinyvm/Repository.kt new file mode 100644 index 0000000..df7f8a1 --- /dev/null +++ b/src/main/kotlin/tinyvm/Repository.kt @@ -0,0 +1,90 @@ +package tinyvm + +class HashCollisionException(hash: String) : Exception("Different object types with identical hash '$hash'") + +class CommitTimeComparator: Comparator { + override fun compare(o1: Commit, o2: Commit): Int = + (o1.timestamp.epochSecond - o2.timestamp.epochSecond).toInt() +} + +class Repository { + private val commits = sortedSetOf(CommitTimeComparator()) + + // Store all objects in one map like git does. This would simplify the data persistence implementation (if there was + // one) and allows for other objects to be added in the future without modifying the data persistence implementation + // at all. + private val objects = mutableMapOf() + + /** + * (Deep) get or put a commit object into the repository. + * This will also get or put all child trees and blobs. + */ + fun commit(commit: Commit): Commit { + val hash = commit.hash() + val obj = findObject(hash) + if (obj != null) return obj + val newCommit = Commit( + tree = addTree(commit.tree), + author = commit.author, + message = commit.message, + timestamp = commit.timestamp + ) + objects[hash] = newCommit + commits.add(newCommit) + return newCommit + } + + /** + * Get a commit by its hash. + */ + fun getCommit(hash: String): Commit? = findObject(hash) + + /** + * List all commits. + */ + fun listCommits(): List = commits.toList() + + /** + * Find commit. + */ + fun findCommit(predicate: (Commit) -> Boolean): Commit? = + commits.find(predicate) + + /** + * Dump repository objects + */ + fun dumpObjects(): Map = objects + + /** + * (Deep) get or put a tree object into the repository. + * This will also get or put all child trees and blobs. + */ + private fun addTree(tree: Tree): Tree { + val hash = tree.hash() + val obj = findObject(hash) + if (obj != null) return obj + val newTree = Tree( + tree.nodes.map { (name, node) -> + when (node) { + is Tree -> name to addTree(node) + is Blob -> name to addObject(node) + } + }.toMap() + ) + objects[hash] = newTree + return tree + } + + /** + * (Shallow) get or put an object into the repository. + */ + private inline fun addObject(obj: T): T = + objects.getOrPut(obj.hash()) { obj } as? T ?: throw HashCollisionException(obj.hash()) + + /** + * Find an object in the repository by its hash. + */ + private inline fun findObject(hash: String): T? = + objects[hash]?.let { it as? T ?: throw HashCollisionException(hash) } + +} \ No newline at end of file diff --git a/src/main/kotlin/tinyvm/Tree.kt b/src/main/kotlin/tinyvm/Tree.kt new file mode 100644 index 0000000..587f7de --- /dev/null +++ b/src/main/kotlin/tinyvm/Tree.kt @@ -0,0 +1,13 @@ +package tinyvm + +sealed class Node(type: String): Object(type) + +class Tree(val nodes: Map) : Node("tree") { + // For simplicity just use the hex-formatted hash, not the actual value like git does. + override val data: String + get() = nodes.map { (name, node) -> + "${node.type} $name\u0000${node.hash()}" + }.sorted().joinToString() +} + +class Blob(override val data: String) : Node("blob") \ No newline at end of file diff --git a/src/test/kotlin/tinyvm/RepositoryTest.kt b/src/test/kotlin/tinyvm/RepositoryTest.kt new file mode 100644 index 0000000..8cef40b --- /dev/null +++ b/src/test/kotlin/tinyvm/RepositoryTest.kt @@ -0,0 +1,111 @@ +package tinyvm + +import java.time.Instant +import kotlin.test.Test +import kotlin.test.assertEquals + +internal class RepositoryTest { + private val repository = Repository() + private val commits = listOf( + Commit( + tree = Tree(mapOf("test1.txt" to Blob("Hello World!"))), + author = Author("Gleb Koval", "gleb@koval.net"), + message = "Add test1.txt", + timestamp = Instant.ofEpochSecond(0) + ), Commit( + tree = Tree( + mapOf( + "dir1" to Tree( + mapOf( + "test1.txt" to Blob("Hello World!") + ) + ), + "dir2" to Tree( + mapOf( + "test2.txt" to Blob("This is a second file") + ) + ) + ) + ), + author = Author("Gleb Koval", "gleb@koval.net"), + message = "Move test1.txt and add dir2/test2.txt", + timestamp = Instant.ofEpochSecond(50) + ), Commit( + tree = Tree( + mapOf( + "dir1" to Tree( + mapOf( + "test1.txt" to Blob("Hello World!") + ) + ), + "dir2" to Tree( + mapOf( + "test2.txt" to Blob("This is a second file") + ) + ), + "README.md" to Blob("# This is a test repo!") + ) + ), + author = Author("Gleb Koval", "gleb@koval.net"), + message = "Add README.md", + timestamp = Instant.ofEpochSecond(100) + ) + ) + + @Test + fun `can commit`() { + val committed = repository.commit(commits[0]) + assertEquals(commits[0].hash(), committed.hash()) + assertEquals(1, repository.listCommits().size) + assertEquals(3, repository.dumpObjects().size) + } + + @Test + fun `can deduplicate commit`() { + repository.commit(commits[0]) + assertEquals(1, repository.listCommits().size) + val committed = repository.commit(commits[0]) + assertEquals(1, repository.listCommits().size) + assertEquals(3, repository.dumpObjects().size) + assertEquals(commits[0].hash(), committed.hash()) + } + + @Test + fun `can commit twice and deduplicate objects`() { + repository.commit(commits[0]) + val committed = repository.commit(commits[1]) + assertEquals(2, repository.listCommits().size) + assertEquals(7, repository.dumpObjects().size) + assertEquals(commits[1].hash(), committed.hash()) + assertEquals(commits.take(2).map { it.hash() }, repository.listCommits().map { it.hash() }) + } + + @Test + fun `can commit in-between existing commits`() { + repository.commit(commits[0]) + repository.commit(commits[2]) + val committed = repository.commit(commits[1]) + assertEquals(3, repository.listCommits().size) + assertEquals(10, repository.dumpObjects().size) + assertEquals(commits[1].hash(), committed.hash()) + assertEquals(commits.map { it.hash() }, repository.listCommits().map { it.hash() }) + } + + @Test + fun `can get commit by hash`() { + repository.commit(commits[0]) + val committed = repository.commit(commits[1]) + repository.commit(commits[2]) + assertEquals(committed, repository.getCommit(commits[1].hash())) + assertEquals(null, repository.getCommit("00000000000000000000")) + } + + @Test + fun `can find commit by predicate`() { + repository.commit(commits[0]) + val committed = repository.commit(commits[1]) + repository.commit(commits[2]) + assertEquals(committed, repository.findCommit { it.message.matches("Move.*".toRegex()) }) + assertEquals(null, repository.getCommit("00000000000000000000")) + } +} \ No newline at end of file