Пример #1
0
    def test_exceptional_values(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        experiment = project.experiments.create(disable_heartbeat=True)
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("nan")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("-inf")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("+inf")},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": None},
            primary_metric=("accuracy", "maximize"),
        )

        experiment = project.experiments.get(experiment.id)
        assert math.isnan(experiment.checkpoints[0].metrics["accuracy"])
        assert math.isinf(experiment.checkpoints[1].metrics["accuracy"])
        assert experiment.checkpoints[1].metrics["accuracy"] < 0
        assert math.isinf(experiment.checkpoints[2].metrics["accuracy"])
        assert experiment.checkpoints[2].metrics["accuracy"] > 0
        assert experiment.checkpoints[3].metrics["accuracy"] is None
Пример #2
0
    def test_open(self, temp_workdir):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".keepsake/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        # test with already existing checkpoint
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"

        # test with checkpoint from keepsake.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        assert chk.open("foo.txt").read().decode() == "foo"
        assert chk.open("bar.txt").read().decode() == "bar"
Пример #3
0
    def test_delete(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        with open("foo.txt", "w") as f:
            f.write("hello")

        experiment = project.experiments.create(path=".",
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        with open("model.txt", "w") as f:
            f.write("i'm a model")
        chk = experiment.checkpoint(path="model.txt",
                                    metrics={"accuracy": "awesome"})

        def get_paths():
            return set(
                str(p).replace(".keepsake/", "")
                for p in Path(".keepsake").rglob("*"))

        chk_tar_path = os.path.join(".keepsake/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )

        paths = get_paths()
        expected = set([
            "repository.json",
            "metadata/experiments/{}.json".format(experiment.id),
            "experiments",
            "checkpoints/{}.tar.gz".format(chk.id),
            "metadata",
            "metadata/experiments",
            "experiments/{}.tar.gz".format(experiment.id),
            "checkpoints",
        ])
        assert paths == expected

        experiment.delete()

        paths = get_paths()
        expected = set([
            "repository.json",  # we're not deleting the project spec
            "experiments",
            "metadata",
            "metadata/experiments",
            "checkpoints",
        ])
        assert paths == expected
Пример #4
0
    def test_checkpoints(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        chk1 = experiment.checkpoint(path=None, metrics={"accuracy": "ok"})
        chk2 = experiment.checkpoint(path=None, metrics={"accuracy": "super"})
        assert len(experiment.checkpoints) == 2
        assert experiment.checkpoints[0].id == chk1.id
        assert experiment.checkpoints[1].id == chk2.id
Пример #5
0
    def test_list_project_options(self, has_repository, has_directory,
                                  has_config, should_error, temp_workdir):
        repo = "file://.keepsake/" if has_repository else None
        directory = "." if has_directory else None

        if has_config:
            with open("keepsake.yaml", "w") as f:
                f.write("repository: file://.keepsake/")

        project = Project(repository=repo, directory=directory)
        if should_error:
            with pytest.raises((ValueError, ConfigNotFound)):
                project.experiments.list()
        else:
            exps = project.experiments.list()
            assert isinstance(exps, ExperimentList)
            assert len(exps) == 0
Пример #6
0
    def test_checkpoint_auto_increments_step(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                disable_heartbeat=True)
        chk1 = experiment.checkpoint()
        chk2 = experiment.checkpoint()
        chk3 = experiment.checkpoint(step=10)
        chk4 = experiment.checkpoint()
        assert chk1.step == 0
        assert chk2.step == 1
        assert chk3.step == 10
        assert chk4.step == 11
Пример #7
0
    def test_best_none(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        experiment = project.experiments.create(disable_heartbeat=True)

        experiment.checkpoint(
            path=None,
            metrics={"accuracy": None},
            primary_metric=("accuracy", "maximize"),
        )
        experiment.checkpoint(
            path=None,
            metrics={"accuracy": float("nan")},
            primary_metric=("accuracy", "maximize"),
        )
        assert experiment.best() is None
Пример #8
0
    def test_list(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        exp1 = project.experiments.create(path=None,
                                          params={"foo": "bar"},
                                          disable_heartbeat=True)
        exp1.checkpoint(path=None, metrics={"accuracy": "wicked"})
        exp2 = project.experiments.create(path=None,
                                          params={"foo": "baz"},
                                          disable_heartbeat=True)

        experiments = project.experiments.list()
        assert len(experiments) == 2
        assert experiments[0].id == exp1.id
        assert len(experiments[0].checkpoints) == 1
        assert experiments[0].checkpoints[0].metrics == {"accuracy": "wicked"}
        assert experiments[1].id == exp2.id
Пример #9
0
    def test_refresh(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        experiment = project.experiments.create(params={"foo": "bar"},
                                                disable_heartbeat=True)

        experiment.checkpoint(metrics={"accuracy": 0})

        other_experiment = project.experiments.get(experiment.id)
        assert len(other_experiment.checkpoints) == 1

        experiment.checkpoint(metrics={"accuracy": 1})
        assert len(other_experiment.checkpoints) == 1

        other_experiment.refresh()
        assert len(other_experiment.checkpoints) == 2
        assert other_experiment.checkpoints[-1].metrics["accuracy"] == 1
Пример #10
0
    def test_create_project_options(self, has_repository, has_directory,
                                    has_config, exception, temp_workdir):
        repo = "file://.keepsake/" if has_repository else None
        directory = "." if has_directory else None

        if has_config:
            with open("keepsake.yaml", "w") as f:
                f.write("repository: file://.keepsake/")

        with open("foo.txt", "w") as f:
            f.write("hello world")

        project = Project(repository=repo, directory=directory)

        if exception:
            with pytest.raises(exception):
                project.experiments.create(path=".")

        else:
            exp = project.experiments.create(path=".")
            # to avoid writing heartbeats that sometimes cause
            # TemporaryDirectory cleanup to fail
            exp.stop()
Пример #11
0
    def test_get(self, temp_workdir):
        project = Project()

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        exp1 = project.experiments.create(path=None,
                                          params={"foo": "bar"},
                                          disable_heartbeat=True)
        exp1.checkpoint(path=None, metrics={"accuracy": "wicked"})
        exp2 = project.experiments.create(path=None,
                                          params={"foo": "baz"},
                                          disable_heartbeat=True)

        actual_exp = project.experiments.get(exp1.id)
        assert actual_exp.created == exp1.created
        assert len(actual_exp.checkpoints) == 1
        assert actual_exp.checkpoints[0].metrics == {"accuracy": "wicked"}
        # get by prefix
        assert project.experiments.get(exp2.id[:7]).created == exp2.created

        with pytest.raises(DoesNotExist):
            project.experiments.get("doesnotexist")
Пример #12
0
def test_empty_experiment_to_pb():
    project = Project()
    exp = empty_experiment(project)
    expected = empty_experiment_pb()
    assert pb_convert.experiment_to_pb(exp) == expected
Пример #13
0
def test_empty_experiment_from_pb():
    exp_pb = empty_experiment_pb()
    project = Project()
    expected = empty_experiment(project)
    assert pb_convert.experiment_from_pb(project, exp_pb) == expected
Пример #14
0
import tempfile

from keepsake.project import Project
from keepsake.repository import repository_for_url

parser = argparse.ArgumentParser(
    description=
    "Create two projects: one with lots of metadata, and another which is the same but with a few new projects and checkpoints to test incremental updates"
)
parser.add_argument("bucket")
parser.add_argument("bucket_prime")
args = parser.parse_args()

with tempfile.TemporaryDirectory() as project_dir:
    print("Creating project...")
    project = Project(dir=project_dir)
    for i in range(1000):
        if i % 10 == 0:
            print("Experiment", i)
        experiment = project.experiments.create(path=None,
                                                params={"foo": "bar"},
                                                quiet=True,
                                                disable_heartbeat=True)
        for j in range(100):
            experiment.checkpoint(path=None,
                                  metrics={"loss": 0.00001},
                                  quiet=True)

    print("Uploading to bucket...")
    repository = repository_for_url(args.bucket)
    repository.put_path(os.path.join(project_dir, ".keepsake/"), "")
Пример #15
0
    def test_checkout(self, temp_workdir, tmpdir_factory):
        project = Project()
        with open("foo.txt", "w") as f:
            f.write("foo")

        with open("keepsake.yaml", "w") as f:
            f.write("repository: file://.keepsake/")

        exp = project.experiments.create(path=".",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        with open("bar.txt", "w") as f:
            f.write("bar")
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".keepsake/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        # test with already existing checkpoint
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with checkpoint from keepsake.experiments.list()
        exp = project.experiments.list()[0]
        chk = exp.checkpoints[0]
        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test with no paths
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})
        tmpdir = tmpdir_factory.mktemp("checkout")
        with pytest.raises(DoesNotExist):
            chk.checkout(output_directory=str(tmpdir))

        # test experiment with no path
        exp = project.experiments.create(params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(path="bar.txt", metrics={"accuracy": "awesome"})

        chk_tar_path = os.path.join(".keepsake/checkpoints",
                                    chk.id + ".tar.gz")
        wait(
            lambda: os.path.exists(chk_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "foo.txt")
        with open(tmpdir / "bar.txt") as f:
            assert f.read() == "bar"

        # test checkpoint with no path
        exp = project.experiments.create(path="foo.txt",
                                         params={"foo": "bar"},
                                         disable_heartbeat=True)
        chk = exp.checkpoint(metrics={"accuracy": "awesome"})

        exp_tar_path = os.path.join(".keepsake/experiments",
                                    exp.id + ".tar.gz")
        wait(
            lambda: os.path.exists(exp_tar_path),
            timeout_seconds=5,
            sleep_seconds=0.01,
        )
        time.sleep(0.1)  # wait to finish writing

        tmpdir = tmpdir_factory.mktemp("checkout")
        chk.checkout(output_directory=str(tmpdir))
        assert not os.path.exists(tmpdir / "bar.txt")
        with open(tmpdir / "foo.txt") as f:
            assert f.read() == "foo"
Пример #16
0
            metrics={"loss": loss.item(), "accuracy": acc},
            primary_metric=("loss", "minimize"),
        )

    experiment.stop()


parser = argparse.ArgumentParser(
    description="Create a project with a bunch of realistic-ish data to test `keepsake ls` output and things"
)
parser.add_argument("repository")
args = parser.parse_args()

with tempfile.TemporaryDirectory() as project_dir:
    print("Creating project...")
    project = Project(directory=".", repository=args.repository)
    train(project, learning_rate=0.01, num_epochs=10)
    train(project, learning_rate=0.05, num_epochs=10)
    train(project, learning_rate=0.01, num_epochs=100)
    train(project, learning_rate=0.05, num_epochs=100)
    train(project, learning_rate=0.001, num_epochs=100)
    train(project, learning_rate=0.1, num_epochs=100)
    train(project, learning_rate=0.01, num_epochs=100, hidden_layer_size=30)
    train(project, learning_rate=0.01, num_epochs=100, hidden_layer_size=10)
    train(project, learning_rate=0.01, num_epochs=50, model="bartnet")
    train(project, learning_rate=0.01, num_epochs=50, dropout_rate=0.5, model="homnet")
    train(project, learning_rate=0.01, num_epochs=50, dropout_rate=0.3, model="homnet")
    train(
        project,
        learning_rate=0.01,
        num_epochs=50,