Exemplo n.º 1
0
    def get_experiment(self, project_name, experiment_id):
        """Retrieve an experiment from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment to retrieve.

        Returns
        -------
        rubicon.domain.Experiment
            The experiment with ID `experiment_id`.
        """
        experiment_metadata_path = self._get_experiment_metadata_path(
            project_name, experiment_id)

        try:
            open_file = self.filesystem.open(experiment_metadata_path)
        except FileNotFoundError:
            raise RubiconException(
                f"No experiment with id `{experiment_id}` found.")

        with open_file as f:
            experiment = json.load(f)

        return domain.Experiment(**experiment)
Exemplo n.º 2
0
    def get_experiments(self, project_name):
        """Retrieve all experiments from the configured filesystem
        that belong to the project with name `project_name`.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all experiments
            from.

        Returns
        -------
        list of rubicon.domain.Experiment
            The experiments logged to the project with name
            `project_name`.
        """
        experiment_metadata_root = self._get_experiment_metadata_root(
            project_name)

        try:
            experiment_metadata_paths = self._ls_directories_only(
                experiment_metadata_root)
            experiments = [
                domain.Experiment(**json.loads(data)) for data in
                self.filesystem.cat(experiment_metadata_paths).values()
            ]
        except FileNotFoundError:
            return []

        return experiments
Exemplo n.º 3
0
    async def get_experiments(self, project_name):
        """Overrides `rubicon.repository.BaseRepository.get_experiments` to
        asynchronously retrieve all experiments from the configured filesystem
        that belong to the project with name `project_name`.

        Parameters
        ----------
        project_name : str
            The name of the project to retrieve all experiments
            from.

        Returns
        -------
        list of rubicon.domain.Experiment
            The experiments logged to the project with name
            `project_name`.
        """
        experiment_metadata_root = self._get_experiment_metadata_root(
            project_name)

        try:
            experiment_metadata_paths = await self._ls_directories_only(
                experiment_metadata_root)
            experiments = [
                domain.Experiment(**json.loads(data))
                for data in await asyncio.gather(*[
                    self.filesystem._cat_file(path)
                    for path in experiment_metadata_paths
                ])
            ]
        except FileNotFoundError:
            return []

        return experiments
Exemplo n.º 4
0
    def _create_experiment_domain(
        self,
        name,
        description,
        model_name,
        branch_name,
        commit_hash,
        training_metadata,
        tags,
    ):
        """Instantiates and returns an experiment domain object."""
        if self._config.is_auto_git_enabled:
            if branch_name is None:
                branch_name = self._get_branch_name()
            if commit_hash is None:
                commit_hash = self._get_commit_hash()

        if training_metadata is not None:
            training_metadata = domain.utils.TrainingMetadata(
                training_metadata)

        return domain.Experiment(
            project_name=self._domain.name,
            name=name,
            description=description,
            model_name=model_name,
            branch_name=branch_name,
            commit_hash=commit_hash,
            training_metadata=training_metadata,
            tags=tags,
        )
Exemplo n.º 5
0
def _create_experiment_domain(project=None, tags=[]):
    if project is None:
        project = domain.Project(f"Test Project {uuid.uuid4()}")

    return domain.Experiment(
        name=f"Test Experiment {uuid.uuid4()}", project_name=project.name, tags=[]
    )
Exemplo n.º 6
0
    async def get_experiment(self, project_name, experiment_id):
        """Overrides `rubicon.repository.BaseRepository.get_experiment` to
        asynchronously retrieve an experiment from the configured filesystem.

        Parameters
        ----------
        project_name : str
            The name of the project the experiment with ID
            `experiment_id` is logged to.
        experiment_id : str
            The ID of the experiment to retrieve.

        Returns
        -------
        rubicon.domain.Experiment
            The experiment with ID `experiment_id`.
        """
        experiment_metadata_path = self._get_experiment_metadata_path(
            project_name, experiment_id)

        try:
            experiment = json.loads(
                await self.filesystem._cat_file(experiment_metadata_path))
        except FileNotFoundError:
            raise RubiconException(
                f"No experiment with id `{experiment_id}` found.")

        return domain.Experiment(**experiment)
Exemplo n.º 7
0
def test_properties(project_client):
    project = project_client

    domain_experiment = domain.Experiment(
        project_name=project.name,
        description="some description",
        name="exp-1",
        model_name="ModelOne model",
        branch_name="branch",
        commit_hash="a-commit-hash",
        training_metadata=domain.utils.TrainingMetadata([
            ("test/path", "SELECT * FROM test")
        ]),
        tags=["x"],
    )
    experiment = Experiment(domain_experiment, project)

    assert experiment.name == "exp-1"
    assert experiment.description == "some description"
    assert experiment.model_name == "ModelOne model"
    assert experiment.branch_name == "branch"
    assert experiment.commit_hash == "a-commit-hash"
    assert experiment.name == domain_experiment.name
    assert experiment.commit_hash == domain_experiment.commit_hash
    assert experiment.training_metadata == domain_experiment.training_metadata.training_metadata[
        0]
    assert experiment.tags == domain_experiment.tags
    assert experiment.created_at == domain_experiment.created_at
    assert experiment.id == domain_experiment.id
    assert experiment.project == project
Exemplo n.º 8
0
def _create_experiment(repository, project=None, tags=[]):
    if project is None:
        project = _create_project(repository)

    experiment = domain.Experiment(name=f"Test Experiment {uuid.uuid4()}",
                                   project_name=project.name,
                                   tags=[])
    repository.create_experiment(experiment)

    return experiment
def test_log_unserializable_param_triggers_exception(project_client, fake_estimator_cls):
    project = project_client
    experiment = Experiment(domain.Experiment(project_name=project.name), project)
    estimator = fake_estimator_cls(params={"unserializable": b"not serializable"})

    base_logger = EstimatorLogger(estimator=estimator, experiment=experiment, step_name="vect")

    with patch.object(Experiment, "log_parameter", side_effect=Exception("test")):
        with pytest.warns(Warning):
            base_logger.log_parameters()
def test_log_parameters_triggers_experiment_log_parameter(project_client, fake_estimator_cls):
    project = project_client
    experiment = Experiment(domain.Experiment(project_name=project.name), project)
    estimator = fake_estimator_cls()

    base_logger = EstimatorLogger(estimator=estimator, experiment=experiment, step_name="vect")

    with patch.object(Experiment, "log_parameter", return_value=None) as mock_log_parameter:
        base_logger.log_parameters()

    assert mock_log_parameter.call_count == 3

    # the step name gets prepended to each param
    mock_log_parameter.assert_called_with(name="vect__ngram_range", value=(1, 2))
Exemplo n.º 11
0
def test_get_experiment(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project = asyncio.run(rubicon.create_project(project_name))
    experiment_domain = domain.Experiment(project_name=project.name)

    rubicon.repository.get_experiment.return_value = experiment_domain

    experiment = asyncio.run(project.experiment(experiment_domain.id))

    expected = [call.get_experiment(project.name, experiment_domain.id)]

    assert experiment.id == experiment_domain.id
    assert rubicon.repository.mock_calls[1:] == expected
def test_select_parameters(project_client, fake_estimator_cls):
    project = project_client
    experiment = Experiment(domain.Experiment(project_name=project.name),
                            project)
    estimator = fake_estimator_cls()

    logger = FilterEstimatorLogger(estimator=estimator,
                                   experiment=experiment,
                                   step_name="vect",
                                   ignore=["ngram_range"])

    with patch.object(Experiment, "log_parameter",
                      return_value=None) as mock_log_parameter:
        logger.log_parameters()

    assert mock_log_parameter.call_count == 2

    # the step name gets prepended to each param
    mock_log_parameter.assert_called_with(name="vect__lowercase", value=True)
Exemplo n.º 13
0
def test_get_experiments(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project = asyncio.run(rubicon.create_project(project_name))
    experiment_domains = [
        domain.Experiment(project_name=project.name) for _ in range(0, 3)
    ]

    rubicon.repository.get_experiments.return_value = experiment_domains

    experiments = asyncio.run(project.experiments())

    expected = [call.get_experiments(project.name)]

    experiment_ids = [e.id for e in experiments]
    for eid in [e.id for e in experiment_domains]:
        assert eid in experiment_ids
        experiment_ids.remove(eid)

    assert len(experiment_ids) == 0
    assert rubicon.repository.mock_calls[1:] == expected
def test_get_projects_as_dask_df(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project_domain = domain.Project(name=project_name)
    experiment_domains = [
        domain.Experiment(project_name=project_name,
                          name=f"Test Experiment {uuid.uuid4()}")
        for _ in range(0, 2)
    ]

    rubicon.repository.get_project.return_value = project_domain
    rubicon.repository.get_experiments.return_value = experiment_domains
    rubicon.repository.get_tags.return_value = [{
        "added_tags": [],
        "removed_tags": []
    }]
    rubicon.repository.get_parameters.return_value = []
    rubicon.repository.get_metrics.return_value = []

    ddf = asyncio.run(rubicon.get_project_as_dask_df(project_name))

    assert isinstance(ddf, dd.core.DataFrame)
    assert len(ddf.compute()) == 2
Exemplo n.º 15
0
def test_to_dask_df(asyn_client_w_mock_repo):
    rubicon = asyn_client_w_mock_repo

    project_name = f"Test Project {uuid.uuid4()}"
    project = asyncio.run(rubicon.create_project(project_name))

    experiment_domains = [
        domain.Experiment(project_name=project_name,
                          name=f"Test Experiment {uuid.uuid4()}")
        for _ in range(0, 2)
    ]

    parameter_domains = [domain.Parameter("n_components")]
    metric_domains = [domain.Metric("accuracy", 90)]

    rubicon.repository.get_experiments.return_value = experiment_domains
    rubicon.repository.get_tags.return_value = [{
        "added_tags": [],
        "removed_tags": []
    }]
    rubicon.repository.get_parameters.return_value = parameter_domains
    rubicon.repository.get_metrics.return_value = metric_domains

    ddf = asyncio.run(project.to_dask_df())
    df = ddf.compute()

    # check that all experiments made it into df
    assert len(df) == 2

    # check the cols within the df
    exp_details = [
        "id", "name", "description", "model_name", "commit_hash", "tags",
        "created_at"
    ]
    for detail in exp_details:
        assert detail in df.columns