Initial library implementations and tests #1
|
@ -0,0 +1,9 @@
|
||||||
|
name: Main Workflow
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
jobs:
|
|
@ -1,5 +1,6 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
|
<component name="GradleMigrationSettings" migrationVersion="1" />
|
||||||
<component name="GradleSettings">
|
<component name="GradleSettings">
|
||||||
<option name="linkedExternalProjectsSettings">
|
<option name="linkedExternalProjectsSettings">
|
||||||
<GradleProjectSettings>
|
<GradleProjectSettings>
|
||||||
|
|
|
@ -4,7 +4,11 @@
|
||||||
<component name="FrameworkDetectionExcludesConfiguration">
|
<component name="FrameworkDetectionExcludesConfiguration">
|
||||||
<file type="web" url="file://$PROJECT_DIR$" />
|
<file type="web" url="file://$PROJECT_DIR$" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="corretto-19" project-jdk-type="JavaSDK">
|
<component name="PWA">
|
||||||
|
<option name="enabled" value="true" />
|
||||||
|
<option name="wasEnabledAtLeastOnce" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" project-jdk-name="openjdk-21" project-jdk-type="JavaSDK">
|
||||||
<output url="file://$PROJECT_DIR$/out" />
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -3,6 +3,8 @@
|
||||||
This is a small project to implement a subset of git's functionality in Kotlin and was created using the instructions
|
This is a small project to implement a subset of git's functionality in Kotlin and was created using the instructions
|
||||||
below as part of my application to the JetBrains internship project "TeamCity support for Gitea".
|
below as part of my application to the JetBrains internship project "TeamCity support for Gitea".
|
||||||
|
|
||||||
|
The package is named `tinyvm` for 'tiny version manager'.
|
||||||
|
|
||||||
## Assumptions
|
## Assumptions
|
||||||
|
|
||||||
Since this is an internship application project, I have assumed that a minimal usage of external libraries is preferred,
|
Since this is an internship application project, I have assumed that a minimal usage of external libraries is preferred,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
plugins {
|
plugins {
|
||||||
kotlin("jvm") version "1.9.21"
|
kotlin("jvm") version "1.9.21"
|
||||||
|
id("org.jmailen.kotlinter") version "4.1.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
group = "net.koval"
|
group = "net.koval"
|
||||||
|
@ -18,5 +19,5 @@ tasks.test {
|
||||||
}
|
}
|
||||||
|
|
||||||
kotlin {
|
kotlin {
|
||||||
jvmToolchain(8)
|
jvmToolchain(17)
|
||||||
}
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package tinyvm
|
||||||
|
|
||||||
|
import java.security.MessageDigest
|
||||||
|
import java.time.Instant
|
||||||
|
|
||||||
|
abstract class Object(
|
||||||
|
val type: String,
|
||||||
|
) {
|
||||||
|
abstract val data: String
|
||||||
|
fun hash(): String = MessageDigest
|
||||||
|
.getInstance("SHA-1")
|
||||||
|
.digest("$type ${data.length}\u0000$data".toByteArray())
|
||||||
|
.toHex()
|
||||||
|
}
|
||||||
|
|
||||||
|
class Commit(
|
||||||
|
val tree: Tree,
|
||||||
|
val author: Author,
|
||||||
|
val message: String,
|
||||||
|
val timestamp: Instant
|
||||||
|
) : Object("commit") {
|
||||||
|
// Use \n\n for end of header in-case additional metadata is implemented in the future.
|
||||||
|
override val data: String
|
||||||
|
get() = "tree ${tree.hash()}\nauthor $author\ntimestamp ${timestamp.epochSecond}\n\n$message"
|
||||||
|
}
|
||||||
|
|
||||||
|
data class Author(
|
||||||
|
val name: String,
|
||||||
|
val email: String
|
||||||
|
) {
|
||||||
|
override fun toString(): String =
|
||||||
|
"$name <$email>"
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
package tinyvm
|
||||||
|
|
||||||
|
import java.util.HexFormat
|
||||||
|
|
||||||
|
fun ByteArray.toHex(): String =
|
||||||
|
HexFormat.of().formatHex(this)
|
|
@ -0,0 +1,90 @@
|
||||||
|
package tinyvm
|
||||||
|
|
||||||
|
class HashCollisionException(hash: String) : Exception("Different object types with identical hash '$hash'")
|
||||||
|
|
||||||
|
class CommitTimeComparator: Comparator<Commit> {
|
||||||
|
override fun compare(o1: Commit, o2: Commit): Int =
|
||||||
|
(o1.timestamp.epochSecond - o2.timestamp.epochSecond).toInt()
|
||||||
|
}
|
||||||
|
|
||||||
|
class Repository {
|
||||||
|
private val commits = sortedSetOf(CommitTimeComparator())
|
||||||
|
|
||||||
|
// Store all objects in one map like git does. This would simplify the data persistence implementation (if there was
|
||||||
|
// one) and allows for other objects to be added in the future without modifying the data persistence implementation
|
||||||
|
// at all.
|
||||||
|
private val objects = mutableMapOf<String, Object>()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* (Deep) get or put a commit object into the repository.
|
||||||
|
* This will also get or put all child trees and blobs.
|
||||||
|
*/
|
||||||
|
fun commit(commit: Commit): Commit {
|
||||||
|
val hash = commit.hash()
|
||||||
|
val obj = findObject<Commit>(hash)
|
||||||
|
if (obj != null) return obj
|
||||||
|
val newCommit = Commit(
|
||||||
|
tree = addTree(commit.tree),
|
||||||
|
author = commit.author,
|
||||||
|
message = commit.message,
|
||||||
|
timestamp = commit.timestamp
|
||||||
|
)
|
||||||
|
objects[hash] = newCommit
|
||||||
|
commits.add(newCommit)
|
||||||
|
return newCommit
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a commit by its hash.
|
||||||
|
*/
|
||||||
|
fun getCommit(hash: String): Commit? = findObject<Commit>(hash)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all commits.
|
||||||
|
*/
|
||||||
|
fun listCommits(): List<Commit> = commits.toList()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find commit.
|
||||||
|
*/
|
||||||
|
fun findCommit(predicate: (Commit) -> Boolean): Commit? =
|
||||||
|
commits.find(predicate)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump repository objects
|
||||||
|
*/
|
||||||
|
fun dumpObjects(): Map<String, Object> = objects
|
||||||
|
|
||||||
|
/**
|
||||||
|
* (Deep) get or put a tree object into the repository.
|
||||||
|
* This will also get or put all child trees and blobs.
|
||||||
|
*/
|
||||||
|
private fun addTree(tree: Tree): Tree {
|
||||||
|
val hash = tree.hash()
|
||||||
|
val obj = findObject<Tree>(hash)
|
||||||
|
if (obj != null) return obj
|
||||||
|
val newTree = Tree(
|
||||||
|
tree.nodes.map { (name, node) ->
|
||||||
|
when (node) {
|
||||||
|
is Tree -> name to addTree(node)
|
||||||
|
is Blob -> name to addObject(node)
|
||||||
|
}
|
||||||
|
}.toMap()
|
||||||
|
)
|
||||||
|
objects[hash] = newTree
|
||||||
|
return tree
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* (Shallow) get or put an object into the repository.
|
||||||
|
|||||||
|
*/
|
||||||
|
private inline fun <reified T : Object> addObject(obj: T): T =
|
||||||
|
objects.getOrPut(obj.hash()) { obj } as? T ?: throw HashCollisionException(obj.hash())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find an object in the repository by its hash.
|
||||||
|
*/
|
||||||
|
private inline fun <reified T : Object> findObject(hash: String): T? =
|
||||||
|
objects[hash]?.let { it as? T ?: throw HashCollisionException(hash) }
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
package tinyvm
|
||||||
|
|
||||||
|
sealed class Node(type: String): Object(type)
|
||||||
|
|
||||||
|
class Tree(val nodes: Map<String, Node>) : Node("tree") {
|
||||||
|
// For simplicity just use the hex-formatted hash, not the actual value like git does.
|
||||||
|
override val data: String
|
||||||
|
get() = nodes.map { (name, node) ->
|
||||||
|
"${node.type} $name\u0000${node.hash()}"
|
||||||
|
}.sorted().joinToString()
|
||||||
|
}
|
||||||
|
|
||||||
|
class Blob(override val data: String) : Node("blob")
|
|
@ -0,0 +1,111 @@
|
||||||
|
package tinyvm
|
||||||
|
|
||||||
|
import java.time.Instant
|
||||||
|
import kotlin.test.Test
|
||||||
|
import kotlin.test.assertEquals
|
||||||
|
|
||||||
|
internal class RepositoryTest {
|
||||||
|
private val repository = Repository()
|
||||||
|
private val commits = listOf(
|
||||||
|
Commit(
|
||||||
|
tree = Tree(mapOf("test1.txt" to Blob("Hello World!"))),
|
||||||
|
author = Author("Gleb Koval", "gleb@koval.net"),
|
||||||
|
message = "Add test1.txt",
|
||||||
|
timestamp = Instant.ofEpochSecond(0)
|
||||||
|
), Commit(
|
||||||
|
tree = Tree(
|
||||||
|
mapOf(
|
||||||
|
"dir1" to Tree(
|
||||||
|
mapOf(
|
||||||
|
"test1.txt" to Blob("Hello World!")
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"dir2" to Tree(
|
||||||
|
mapOf(
|
||||||
|
"test2.txt" to Blob("This is a second file")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
author = Author("Gleb Koval", "gleb@koval.net"),
|
||||||
|
message = "Move test1.txt and add dir2/test2.txt",
|
||||||
|
timestamp = Instant.ofEpochSecond(50)
|
||||||
|
), Commit(
|
||||||
|
tree = Tree(
|
||||||
|
mapOf(
|
||||||
|
"dir1" to Tree(
|
||||||
|
mapOf(
|
||||||
|
"test1.txt" to Blob("Hello World!")
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"dir2" to Tree(
|
||||||
|
mapOf(
|
||||||
|
"test2.txt" to Blob("This is a second file")
|
||||||
|
)
|
||||||
|
),
|
||||||
|
"README.md" to Blob("# This is a test repo!")
|
||||||
|
)
|
||||||
|
),
|
||||||
|
author = Author("Gleb Koval", "gleb@koval.net"),
|
||||||
|
message = "Add README.md",
|
||||||
|
timestamp = Instant.ofEpochSecond(100)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can commit`() {
|
||||||
|
val committed = repository.commit(commits[0])
|
||||||
|
assertEquals(commits[0].hash(), committed.hash())
|
||||||
|
assertEquals(1, repository.listCommits().size)
|
||||||
|
assertEquals(3, repository.dumpObjects().size)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can deduplicate commit`() {
|
||||||
|
repository.commit(commits[0])
|
||||||
|
assertEquals(1, repository.listCommits().size)
|
||||||
|
val committed = repository.commit(commits[0])
|
||||||
|
assertEquals(1, repository.listCommits().size)
|
||||||
|
assertEquals(3, repository.dumpObjects().size)
|
||||||
|
assertEquals(commits[0].hash(), committed.hash())
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can commit twice and deduplicate objects`() {
|
||||||
|
repository.commit(commits[0])
|
||||||
|
val committed = repository.commit(commits[1])
|
||||||
|
assertEquals(2, repository.listCommits().size)
|
||||||
|
assertEquals(7, repository.dumpObjects().size)
|
||||||
|
assertEquals(commits[1].hash(), committed.hash())
|
||||||
|
assertEquals(commits.take(2).map { it.hash() }, repository.listCommits().map { it.hash() })
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can commit in-between existing commits`() {
|
||||||
|
repository.commit(commits[0])
|
||||||
|
repository.commit(commits[2])
|
||||||
|
val committed = repository.commit(commits[1])
|
||||||
|
assertEquals(3, repository.listCommits().size)
|
||||||
|
assertEquals(10, repository.dumpObjects().size)
|
||||||
|
assertEquals(commits[1].hash(), committed.hash())
|
||||||
|
assertEquals(commits.map { it.hash() }, repository.listCommits().map { it.hash() })
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can get commit by hash`() {
|
||||||
|
repository.commit(commits[0])
|
||||||
|
val committed = repository.commit(commits[1])
|
||||||
|
repository.commit(commits[2])
|
||||||
|
assertEquals(committed, repository.getCommit(commits[1].hash()))
|
||||||
|
assertEquals(null, repository.getCommit("00000000000000000000"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can find commit by predicate`() {
|
||||||
|
repository.commit(commits[0])
|
||||||
|
val committed = repository.commit(commits[1])
|
||||||
|
repository.commit(commits[2])
|
||||||
|
assertEquals(committed, repository.findCommit { it.message.matches("Move.*".toRegex()) })
|
||||||
|
assertEquals(null, repository.getCommit("00000000000000000000"))
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Note we're sort of fine with recursion here since we have to keep track of the state anyway (ie there would just be a big queue instead of a callstack if we didn't have recursion)