Пример #1
0
def test_generate_removed_file(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    revision1 = commit(hg)

    remove_file(hg, local, "file.cpp")
    revision2 = commit(hg)

    assert not os.path.exists(os.path.join(local, "file.cpp"))

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=True,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(
            repo.head.target, pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
        )
    )

    assert (
        commits[0].message
        == f"""Commit A file.cpp

UltraBlame original commit: {revision1}"""
    )

    assert (
        commits[1].message
        == f"""Commit R file.cpp

UltraBlame original commit: {revision2}"""
    )

    assert not os.path.exists(os.path.join(git_repo, "file.cpp"))

    proc = subprocess.run(
        ["git", "show", "HEAD"], cwd=git_repo, capture_output=True, check=True
    )
    assert b"diff --git a/file.cpp b/file.cpp\ndeleted file mode 100644" in proc.stdout
Пример #2
0
def test_generate_tokenized_and_comments_removed_unusupported_extension(
        fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.surely_unsupported",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    revision1 = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=True,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.surely_unsupported

UltraBlame original commit: {revision1}""")

    with open(os.path.join(git_repo, "file.surely_unsupported"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
return
0
;
}
""")
Пример #3
0
def test_generate_binary_remove_comments(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "an_object_file",
        b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00",
    )
    revision = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=False,
        remove_comments=True,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A an_object_file

UltraBlame original commit: {revision}""")

    with open(os.path.join(git_repo, "an_object_file"), "rb") as f:
        obj_file = f.read()
        assert obj_file == b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00"
Пример #4
0
def main():
    description = "Generate a mirror git repository where content is split by word"
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument("cache-root", help="Cache for repository clones.")

    args = parser.parse_args()

    generator = MicroannotateGenerator(getattr(args, "cache-root"))

    generator.generate()
Пример #5
0
    def generate(self):
        db_path = os.path.join("data", self.git_repo_path)
        db.register(
            db_path,
            "https://s3-us-west-2.amazonaws.com/communitytc-bugbug/data/",
            VERSION,
        )

        is_old_version = db.is_old_schema(db_path)

        with ThreadPoolExecutorResult(max_workers=2) as executor:
            cloner = executor.submit(repository.clone, self.repo_dir)
            cloner.add_done_callback(
                lambda future: logger.info("mozilla-central cloned")
            )

            git_user = get_secret("GIT_USER")
            git_password = get_secret("GIT_PASSWORD")

            repo_push_url = self.repo_url.replace(
                "https://", f"https://{git_user}:{git_password}@"
            )

            if not is_old_version:
                executor.submit(self.clone_git_repo)
            else:
                executor.submit(self.init_git_repo)

        tenacity.retry(
            lambda: subprocess.run(
                ["git", "config", "--global", "http.postBuffer", "12M"], check=True
            ),
            wait=tenacity.wait_fixed(30),
            stop=tenacity.stop_after_attempt(5),
        )()

        push_args = ["git", "push", repo_push_url, "master"]
        if is_old_version:
            push_args.append("--force")

        done = False
        while not done:
            done = generator.generate(
                self.repo_dir,
                self.git_repo_path,
                limit=COMMITS_STEP,
                tokenize=self.tokenize,
                remove_comments=self.remove_comments,
            )

            tenacity.retry(
                lambda: subprocess.run(push_args, cwd=self.git_repo_path, check=True),
                wait=tenacity.wait_fixed(30),
                stop=tenacity.stop_after_attempt(5),
            )()

            # We are not using db.upload as we don't need to upload the git repo.
            upload_s3([f"{db_path}.version"])
    def generate(self):
        shared_dir = self.repo_dir + "-shared"
        cmd = hglib.util.cmdbuilder(
            "robustcheckout",
            "https://hg.mozilla.org/mozilla-central",
            self.repo_dir,
            purge=True,
            sharebase=shared_dir,
            networkattempts=7,
            branch=b"tip",
        )

        cmd.insert(0, hglib.HGPATH)

        proc = hglib.util.popen(cmd)
        out, err = proc.communicate()
        if proc.returncode:
            raise hglib.error.CommandError(cmd, proc.returncode, out, err)

        logger.info("mozilla-central cloned")

        git_user = get_secret("GIT_USER")
        git_password = get_secret("GIT_PASSWORD")

        repo_url = "https://github.com/marco-c/gecko-dev-wordified"
        repo_push_url = (
            f"https://{git_user}:{git_password}@github.com/marco-c/gecko-dev-wordified"
        )
        git_repo_path = os.path.basename(repo_url)

        retry(lambda: subprocess.run(["git", "clone", repo_url, git_repo_path],
                                     check=True))

        try:
            retry(lambda: subprocess.run(
                ["git", "pull", repo_url, "master"],
                cwd=git_repo_path,
                capture_output=True,
                check=True,
            ))
        except subprocess.CalledProcessError as e:
            # When the repo is empty.
            if b"Couldn't find remote ref master" in e.stdout:
                pass

        done = generator.generate(self.repo_dir, git_repo_path, limit=10000)

        with open("done", "w") as f:
            f.write(str(1 if done else 0))

        retry(lambda: subprocess.run(
            ["git", "config", "--global", "http.postBuffer", "12M"],
            check=True))
        retry(lambda: subprocess.run(["git", "push", repo_push_url, "master"],
                                     cwd=git_repo_path,
                                     check=True))
Пример #7
0
    def generate(self):
        db_path = os.path.join("data", self.git_repo_path)
        db.register(
            db_path,
            f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.microannotate_{self.git_repo_path}.latest/artifacts/public/",
            VERSION,
        )

        # TODO: Check the version again once we can run tasks for longer (https://bugzilla.mozilla.org/show_bug.cgi?id=1604175).
        is_old_version = False  # db.is_old_schema(db_path)

        with ThreadPoolExecutorResult(max_workers=2) as executor:
            cloner = executor.submit(repository.clone, self.repo_dir)
            cloner.add_done_callback(
                lambda future: logger.info("mozilla-central cloned"))

            git_user = get_secret("GIT_USER")
            git_password = get_secret("GIT_PASSWORD")

            repo_push_url = self.repo_url.replace(
                "https://", f"https://{git_user}:{git_password}@")

            if not is_old_version:
                executor.submit(self.clone_git_repo)
            else:
                executor.submit(self.init_git_repo)

        tenacity.retry(
            lambda: subprocess.run(
                ["git", "config", "--global", "http.postBuffer", "12M"],
                check=True),
            wait=tenacity.wait_fixed(30),
            stop=tenacity.stop_after_attempt(5),
        )()

        push_args = ["git", "push", repo_push_url, "master"]
        if is_old_version:
            push_args.append("--force")

        done = False
        while not done:
            done = generator.generate(
                self.repo_dir,
                self.git_repo_path,
                limit=COMMITS_STEP,
                tokenize=self.tokenize,
                remove_comments=self.remove_comments,
            )

            tenacity.retry(
                lambda: subprocess.run(
                    push_args, cwd=self.git_repo_path, check=True),
                wait=tenacity.wait_fixed(30),
                stop=tenacity.stop_after_attempt(5),
            )()
Пример #8
0
    def generate(self):
        repository.clone(self.repo_dir)

        logger.info("mozilla-central cloned")

        git_user = get_secret("GIT_USER")
        git_password = get_secret("GIT_PASSWORD")

        repo_push_url = self.repo_url.replace(
            "https://", f"https://{git_user}:{git_password}@")
        git_repo_path = os.path.basename(self.repo_url)

        retry(lambda: subprocess.run(
            ["git", "clone", self.repo_url, git_repo_path], check=True))

        try:
            retry(lambda: subprocess.run(
                ["git", "pull", self.repo_url, "master"],
                cwd=git_repo_path,
                capture_output=True,
                check=True,
            ))
        except subprocess.CalledProcessError as e:
            # When the repo is empty.
            if b"Couldn't find remote ref master" in e.stdout:
                pass

        retry(lambda: subprocess.run(
            ["git", "config", "--global", "http.postBuffer", "12M"],
            check=True))

        for i in range(STEPS):
            logger.info(f"Step {i} out of {STEPS}")

            done = generator.generate(
                self.repo_dir,
                git_repo_path,
                limit=TOTAL_COMMITS // STEPS,
                tokenize=self.tokenize,
                remove_comments=self.remove_comments,
            )

            with open("done", "w") as f:
                f.write(str(1 if done else 0))

            retry(lambda: subprocess.run(
                ["git", "push", repo_push_url, "master"],
                cwd=git_repo_path,
                check=True,
            ))

            if done:
                break
Пример #9
0
def test_generate_comments_removed_bad_chars(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.java",
        b"""private static String /* comment */ utf8String = "Non-Ascii 1 byte chars: \x8e\x89\x8a\x88\x8c\x8d, 2 byte chars: \\u1234 \\u1235 \\u1236";""",
    )
    revision = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=False,
        remove_comments=True,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.java

UltraBlame original commit: {revision}""")

    with open(os.path.join(git_repo, "file.java"), "rb") as f:
        java_file = f.read()
        assert (
            java_file ==
            b"""private static String  utf8String = "Non-Ascii 1 byte chars: \x8e\x89\x8a\x88\x8c\x8d, 2 byte chars: \\u1234 \\u1235 \\u1236";"""
        )
Пример #10
0
def main():
    description = "Generate a mirror git repository where content is split by word"
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument("cache-root", help="Cache for repository clones.")
    parser.add_argument("repo-url", help="Mirror repository URL.")
    parser.add_argument("--tokenize",
                        help="Enable word-level tokenization.",
                        action="store_true")
    parser.add_argument("--remove-comments",
                        help="Enable comment removal.",
                        action="store_true")

    args = parser.parse_args()

    generator = MicroannotateGenerator(
        getattr(args, "cache-root"),
        getattr(args, "repo-url"),
        args.tokenize,
        args.remove_comments,
    )

    generator.generate()
Пример #11
0
    def generate(self):

        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
            cloner = executor.submit(repository.clone, self.repo_dir)
            cloner.add_done_callback(
                lambda future: logger.info("mozilla-central cloned"))

            git_user = get_secret("GIT_USER")
            git_password = get_secret("GIT_PASSWORD")

            repo_push_url = self.repo_url.replace(
                "https://", f"https://{git_user}:{git_password}@")
            git_repo_path = os.path.basename(self.repo_url)

            executor.submit(self.clone_git_repo, git_repo_path)

        retry(lambda: subprocess.run(
            ["git", "config", "--global", "http.postBuffer", "12M"],
            check=True))

        for i in range(STEPS):
            logger.info(f"Step {i} out of {STEPS}")

            done = generator.generate(
                self.repo_dir,
                git_repo_path,
                limit=TOTAL_COMMITS // STEPS,
                tokenize=self.tokenize,
                remove_comments=self.remove_comments,
            )

            with open("done", "w") as f:
                f.write(str(1 if done else 0))

            retry(lambda: subprocess.run(
                ["git", "push", repo_push_url, "master"],
                cwd=git_repo_path,
                check=True,
            ))

            if done:
                break
Пример #12
0
def test_generate_progressive(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    revision1 = commit(hg)

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    cout << "Hello, world!";
    return 0;
}""",
    )
    add_file(
        hg,
        local,
        "file.jsm",
        """function ciao(str) {
  // Comment one
  console.log(str);
}""",
    )
    revision2 = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end=revision1,
        limit=None,
        tokenize=True,
        remove_comments=False,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert len(commits) == 1

    assert (commits[0].message == f"""Commit A file.cpp

UltraBlame original commit: {revision1}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
return
0
;
}
""")

    assert not os.path.exists(os.path.join(git_repo, "file.jsm"))

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=False,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert len(commits) == 2

    assert (commits[0].message == f"""Commit A file.cpp

UltraBlame original commit: {revision1}""")

    assert (commits[1].message == f"""Commit M file.cpp A file.jsm

UltraBlame original commit: {revision2}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
cout
<
<
"
Hello
world
!
"
;
return
0
;
}
""")

    with open(os.path.join(git_repo, "file.jsm"), "r") as f:
        js_file = f.read()
        assert (js_file == """function
ciao
(
str
)
{
/
/
Comment
one
console
.
log
(
str
)
;
}
""")
    )
    parser.add_argument(
        "repository_out_dir", help="Path to the output repository", action="store"
    )
    parser.add_argument(
        "--rev-start",
        help="Which revision to start with (0 by default)",
        action="store",
        default="0",
    )
    parser.add_argument(
        "--rev-end",
        help="Which revision to end with (tip by default)",
        action="store",
        default="tip",
    )
    parser.add_argument("--tokenize", action="store_true", default=True)
    parser.add_argument("--remove-comments", action="store_true", default=False)
    args = parser.parse_args()

    repo_out_dir = os.path.realpath(args.repository_out_dir)

    generator.generate(
        args.repository_dir,
        repo_out_dir,
        args.rev_start,
        args.rev_end,
        args.tokenize,
        args.remove_comments,
    )
Пример #14
0
def test_generate_tokenized_operators(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    if (ciao > 0 && ciao.obj <= 7 && ciao.obj->prova < 42 || !bo) {
      int x = ciao ? 1 : 2;
      return 1 + 1 * 41 + 0 / ~3 + 3 % 5 - x ^ 3;
    }
    return 0;
}""",
    )
    revision = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=False,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.cpp

UltraBlame original commit: {revision}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
if
(
ciao
>
0
&
&
ciao
.
obj
<
=
7
&
&
ciao
.
obj
-
>
prova
<
42
|
|
!
bo
)
{
int
x
=
ciao
?
1
:
2
;
return
1
+
1
*
41
+
0
/
~
3
+
3
%
5
-
x
^
3
;
}
return
0
;
}
""")
Пример #15
0
def test_generate_copied_and_moved_file(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    add_file(
        hg,
        local,
        "file2.cpp",
        """#include <stdio.h>

/* main2 */
void main() {
    return 42;
}""",
    )
    revision1 = commit(hg)

    hg.copy(
        bytes(os.path.join(local, "file.cpp"), "ascii"),
        bytes(os.path.join(local, "filecopy.cpp"), "ascii"),
    )
    revision2 = commit(hg)

    hg.move(
        bytes(os.path.join(local, "file2.cpp"), "ascii"),
        bytes(os.path.join(local, "file2move.cpp"), "ascii"),
    )
    revision3 = commit(hg)

    assert os.path.exists(os.path.join(local, "file.cpp"))
    assert os.path.exists(os.path.join(local, "filecopy.cpp"))
    assert not os.path.exists(os.path.join(local, "file2.cpp"))
    assert os.path.exists(os.path.join(local, "file2move.cpp"))

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=False,
        remove_comments=True,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.cpp A file2.cpp

UltraBlame original commit: {revision1}""")

    assert (commits[1].message == f"""Commit A filecopy.cpp

UltraBlame original commit: {revision2}""")

    assert (commits[2].message == f"""Commit A file2move.cpp R file2.cpp

UltraBlame original commit: {revision3}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#include <iostream>


int main() {
    return 0;
}""")

    assert not os.path.exists(os.path.join(git_repo, "file2.cpp"))

    with open(os.path.join(git_repo, "filecopy.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#include <iostream>


int main() {
    return 0;
}""")

    with open(os.path.join(git_repo, "file2move.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#include <stdio.h>


void main() {
    return 42;
}""")
Пример #16
0
def test_generate_tokenized_python(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.py",
        """import sys

if sys:

    print("hello")
elif sys != 3:
    print("maybe")
else:
    
    print("nope")
""",
    )
    revision = commit(hg)

    generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=False,
    )

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.py

UltraBlame original commit: {revision}""")

    with open(os.path.join(git_repo, "file.py"), "r") as f:
        py_file = f.read()
        assert (py_file == """import
sys
if
sys
:
    
print
(
"
hello
"
)
elif
sys
!
=
3
:
    
print
(
"
maybe
"
)
else
:
    
print
(
"
nope
"
)
""")
Пример #17
0
def test_generate_tokenized(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    revision1 = commit(hg)

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    cout << "Hello, world!";
    return 0;
}""",
    )
    add_file(
        hg,
        local,
        "file.jsm",
        """function ciao(str) {
  // Comment one
  console.log(str);
}""",
    )
    revision2 = commit(hg)

    assert (generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=False,
    ) is True)

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.cpp

UltraBlame original commit: {revision1}""")

    assert (commits[1].message == f"""Commit M file.cpp A file.jsm

UltraBlame original commit: {revision2}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
cout
<
<
"
Hello
world
!
"
;
return
0
;
}
""")

    with open(os.path.join(git_repo, "file.jsm"), "r") as f:
        js_file = f.read()
        assert (js_file == """function
ciao
(
str
)
{
/
/
Comment
one
console
.
log
(
str
)
;
}
""")

    assert utils.get_original_hash(repo, "HEAD") == revision2
    assert utils.get_original_hash(repo, commits[0].hex) == revision1
    assert utils.get_original_hash(repo, commits[1].hex) == revision2
    transformed_to_original, original_to_transformed = utils.get_commit_mapping(
        git_repo)
    assert transformed_to_original[commits[0].hex] == revision1
    assert transformed_to_original[commits[1].hex] == revision2
    assert original_to_transformed[revision1] == commits[0].hex
    assert original_to_transformed[revision2] == commits[1].hex