Initial library implementations and tests #1

Merged
cyclane merged 3 commits from initial-setup into main 2023-12-01 20:42:08 +00:00
10 changed files with 272 additions and 2 deletions
Showing only changes of commit 6c948c9630 - Show all commits

9
.github/workflows/main.yaml vendored Normal file
View File

@ -0,0 +1,9 @@
name: Main Workflow
on:
pull_request:
branches:
- main
push:
branches:
- main
jobs:

View File

@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings"> <component name="GradleSettings">
<option name="linkedExternalProjectsSettings"> <option name="linkedExternalProjectsSettings">
<GradleProjectSettings> <GradleProjectSettings>

View File

@ -4,7 +4,11 @@
<component name="FrameworkDetectionExcludesConfiguration"> <component name="FrameworkDetectionExcludesConfiguration">
<file type="web" url="file://$PROJECT_DIR$" /> <file type="web" url="file://$PROJECT_DIR$" />
</component> </component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="corretto-19" project-jdk-type="JavaSDK"> <component name="PWA">
<option name="enabled" value="true" />
<option name="wasEnabledAtLeastOnce" value="true" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" project-jdk-name="openjdk-21" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" /> <output url="file://$PROJECT_DIR$/out" />
</component> </component>
</project> </project>

View File

@ -3,6 +3,8 @@
This is a small project to implement a subset of git's functionality in Kotlin and was created using the instructions This is a small project to implement a subset of git's functionality in Kotlin and was created using the instructions
below as part of my application to the JetBrains internship project "TeamCity support for Gitea". below as part of my application to the JetBrains internship project "TeamCity support for Gitea".
The package is named `tinyvm` for 'tiny version manager'.
## Assumptions ## Assumptions
Since this is an internship application project, I have assumed that a minimal usage of external libraries is preferred, Since this is an internship application project, I have assumed that a minimal usage of external libraries is preferred,

View File

@ -1,5 +1,6 @@
plugins { plugins {
kotlin("jvm") version "1.9.21" kotlin("jvm") version "1.9.21"
id("org.jmailen.kotlinter") version "4.1.0"
} }
group = "net.koval" group = "net.koval"
@ -18,5 +19,5 @@ tasks.test {
} }
kotlin { kotlin {
jvmToolchain(8) jvmToolchain(17)
} }

View File

@ -0,0 +1,33 @@
package tinyvm
import java.security.MessageDigest
import java.time.Instant
abstract class Object(
val type: String,
) {
abstract val data: String
fun hash(): String = MessageDigest
.getInstance("SHA-1")
.digest("$type ${data.length}\u0000$data".toByteArray())
.toHex()
}
class Commit(
val tree: Tree,
val author: Author,
val message: String,
val timestamp: Instant
) : Object("commit") {
// Use \n\n for end of header in-case additional metadata is implemented in the future.
override val data: String
get() = "tree ${tree.hash()}\nauthor $author\ntimestamp ${timestamp.epochSecond}\n\n$message"
}
data class Author(
val name: String,
val email: String
) {
override fun toString(): String =
"$name <$email>"
}

View File

@ -0,0 +1,6 @@
package tinyvm
import java.util.HexFormat
fun ByteArray.toHex(): String =
HexFormat.of().formatHex(this)

View File

@ -0,0 +1,90 @@
package tinyvm
class HashCollisionException(hash: String) : Exception("Different object types with identical hash '$hash'")
class CommitTimeComparator: Comparator<Commit> {
override fun compare(o1: Commit, o2: Commit): Int =
(o1.timestamp.epochSecond - o2.timestamp.epochSecond).toInt()
}
class Repository {
private val commits = sortedSetOf(CommitTimeComparator())
// Store all objects in one map like git does. This would simplify the data persistence implementation (if there was
// one) and allows for other objects to be added in the future without modifying the data persistence implementation
// at all.
private val objects = mutableMapOf<String, Object>()
/**
* (Deep) get or put a commit object into the repository.
* This will also get or put all child trees and blobs.
*/
fun commit(commit: Commit): Commit {
val hash = commit.hash()
val obj = findObject<Commit>(hash)
if (obj != null) return obj
val newCommit = Commit(
tree = addTree(commit.tree),
author = commit.author,
message = commit.message,
timestamp = commit.timestamp
)
objects[hash] = newCommit
commits.add(newCommit)
return newCommit
}
/**
* Get a commit by its hash.
*/
fun getCommit(hash: String): Commit? = findObject<Commit>(hash)
/**
* List all commits.
*/
fun listCommits(): List<Commit> = commits.toList()
/**
* Find commit.
*/
fun findCommit(predicate: (Commit) -> Boolean): Commit? =
commits.find(predicate)
/**
* Dump repository objects
*/
fun dumpObjects(): Map<String, Object> = objects
/**
* (Deep) get or put a tree object into the repository.
* This will also get or put all child trees and blobs.
*/
private fun addTree(tree: Tree): Tree {
val hash = tree.hash()
val obj = findObject<Tree>(hash)
if (obj != null) return obj
val newTree = Tree(
tree.nodes.map { (name, node) ->
when (node) {
is Tree -> name to addTree(node)
is Blob -> name to addObject(node)
}
}.toMap()
)
objects[hash] = newTree
return tree
}
/**
* (Shallow) get or put an object into the repository.
Review

Note we're sort of fine with recursion here since we have to keep track of the state anyway (ie there would just be a big queue instead of a callstack if we didn't have recursion)

Note we're sort of fine with recursion here since we have to keep track of the state anyway (ie there would just be a big queue instead of a callstack if we didn't have recursion)
*/
private inline fun <reified T : Object> addObject(obj: T): T =
objects.getOrPut(obj.hash()) { obj } as? T ?: throw HashCollisionException(obj.hash())
/**
* Find an object in the repository by its hash.
*/
private inline fun <reified T : Object> findObject(hash: String): T? =
objects[hash]?.let { it as? T ?: throw HashCollisionException(hash) }
}

View File

@ -0,0 +1,13 @@
package tinyvm
sealed class Node(type: String): Object(type)
class Tree(val nodes: Map<String, Node>) : Node("tree") {
// For simplicity just use the hex-formatted hash, not the actual value like git does.
override val data: String
get() = nodes.map { (name, node) ->
"${node.type} $name\u0000${node.hash()}"
}.sorted().joinToString()
}
class Blob(override val data: String) : Node("blob")

View File

@ -0,0 +1,111 @@
package tinyvm
import java.time.Instant
import kotlin.test.Test
import kotlin.test.assertEquals
internal class RepositoryTest {
private val repository = Repository()
private val commits = listOf(
Commit(
tree = Tree(mapOf("test1.txt" to Blob("Hello World!"))),
author = Author("Gleb Koval", "gleb@koval.net"),
message = "Add test1.txt",
timestamp = Instant.ofEpochSecond(0)
), Commit(
tree = Tree(
mapOf(
"dir1" to Tree(
mapOf(
"test1.txt" to Blob("Hello World!")
)
),
"dir2" to Tree(
mapOf(
"test2.txt" to Blob("This is a second file")
)
)
)
),
author = Author("Gleb Koval", "gleb@koval.net"),
message = "Move test1.txt and add dir2/test2.txt",
timestamp = Instant.ofEpochSecond(50)
), Commit(
tree = Tree(
mapOf(
"dir1" to Tree(
mapOf(
"test1.txt" to Blob("Hello World!")
)
),
"dir2" to Tree(
mapOf(
"test2.txt" to Blob("This is a second file")
)
),
"README.md" to Blob("# This is a test repo!")
)
),
author = Author("Gleb Koval", "gleb@koval.net"),
message = "Add README.md",
timestamp = Instant.ofEpochSecond(100)
)
)
@Test
fun `can commit`() {
val committed = repository.commit(commits[0])
assertEquals(commits[0].hash(), committed.hash())
assertEquals(1, repository.listCommits().size)
assertEquals(3, repository.dumpObjects().size)
}
@Test
fun `can deduplicate commit`() {
repository.commit(commits[0])
assertEquals(1, repository.listCommits().size)
val committed = repository.commit(commits[0])
assertEquals(1, repository.listCommits().size)
assertEquals(3, repository.dumpObjects().size)
assertEquals(commits[0].hash(), committed.hash())
}
@Test
fun `can commit twice and deduplicate objects`() {
repository.commit(commits[0])
val committed = repository.commit(commits[1])
assertEquals(2, repository.listCommits().size)
assertEquals(7, repository.dumpObjects().size)
assertEquals(commits[1].hash(), committed.hash())
assertEquals(commits.take(2).map { it.hash() }, repository.listCommits().map { it.hash() })
}
@Test
fun `can commit in-between existing commits`() {
repository.commit(commits[0])
repository.commit(commits[2])
val committed = repository.commit(commits[1])
assertEquals(3, repository.listCommits().size)
assertEquals(10, repository.dumpObjects().size)
assertEquals(commits[1].hash(), committed.hash())
assertEquals(commits.map { it.hash() }, repository.listCommits().map { it.hash() })
}
@Test
fun `can get commit by hash`() {
repository.commit(commits[0])
val committed = repository.commit(commits[1])
repository.commit(commits[2])
assertEquals(committed, repository.getCommit(commits[1].hash()))
assertEquals(null, repository.getCommit("00000000000000000000"))
}
@Test
fun `can find commit by predicate`() {
repository.commit(commits[0])
val committed = repository.commit(commits[1])
repository.commit(commits[2])
assertEquals(committed, repository.findCommit { it.message.matches("Move.*".toRegex()) })
assertEquals(null, repository.getCommit("00000000000000000000"))
}
}