示例#1
0
def test_trial_to_tuple(space, trial, fixed_suggestion):
    """Check if trial is correctly created from a sample/tuple."""
    data = trial_to_tuple(trial, space)
    assert data == fixed_suggestion

    trial.params[0].name = 'lalala'
    with pytest.raises(AssertionError):
        trial_to_tuple(trial, space)
    trial.params.pop(0)
    with pytest.raises(AssertionError):
        trial_to_tuple(trial, space)
示例#2
0
def test_trial_to_tuple(space, trial, fixed_suggestion):
    """Check if trial is correctly created from a sample/tuple."""
    data = trial_to_tuple(trial, space)
    assert data == fixed_suggestion

    trial._params[0].name = 'lalala'
    with pytest.raises(ValueError) as exc:
        trial_to_tuple(trial, space)

    assert "Trial params: [\'lalala\', \'yolo2\', \'yolo3\']" in str(exc.value)

    trial._params.pop(0)
    with pytest.raises(ValueError) as exc:
        trial_to_tuple(trial, space)

    assert "Trial params: [\'yolo2\', \'yolo3\']" in str(exc.value)
示例#3
0
def test_trial_to_tuple(space, fixed_suggestion, params_tuple):
    """Check if trial is correctly created from a sample/tuple."""
    data = trial_to_tuple(fixed_suggestion, space)
    assert data == params_tuple

    fixed_suggestion._params[0].name = "lalala"
    with pytest.raises(ValueError) as exc:
        trial_to_tuple(fixed_suggestion, space)

    assert "Trial params: ['lalala', 'yolo2', 'yolo3']" in str(exc.value)

    fixed_suggestion._params.pop(0)
    with pytest.raises(ValueError) as exc:
        trial_to_tuple(fixed_suggestion, space)

    assert "Trial params: ['yolo2', 'yolo3']" in str(exc.value)
示例#4
0
def test_change_trial_params(space, rspace):
    working_dir = "/new/working/dir"
    status = "interrupted"

    rtrial = rspace.sample()[0]
    # Sampling a new point in original space instead of using reserve()
    trial = space.sample()[0]
    point = format_trials.trial_to_tuple(trial, space)

    rtrial.exp_working_dir = working_dir
    rtrial.status = status

    restored_trial = change_trial_params(rtrial, point, space)

    # Test that attributes are conserved
    assert restored_trial.exp_working_dir == working_dir
    assert restored_trial.status == status

    # Test params are updated
    assert restored_trial.params != rtrial.params
    assert restored_trial.params == trial.params

    # Test that id is based on current params
    assert restored_trial.id != rtrial.id
    assert restored_trial.id == trial.id
示例#5
0
    def _update_algorithm(self, completed_trials):
        """Pull newest completed trials to update local model."""
        log.debug("### Fetch completed trials to observe:")

        new_completed_trials = []
        for trial in completed_trials:
            if trial not in self.trials_history:
                new_completed_trials.append(trial)

        log.debug("### %s", new_completed_trials)

        if new_completed_trials:
            log.debug("### Convert them to list of points and their results.")
            points = list(
                map(
                    lambda trial: format_trials.trial_to_tuple(
                        trial, self.space), new_completed_trials))
            results = list(
                map(format_trials.get_trial_results, new_completed_trials))

            log.debug("### Observe them.")
            self.trials_history.update(new_completed_trials)
            self.algorithm.observe(points, results)
            self.strategy.observe(points, results)
            self._update_params_hashes(new_completed_trials)
示例#6
0
def test_tuple_to_trial_to_tuple(space, trial, fixed_suggestion):
    """The two functions should be inverse."""
    data = trial_to_tuple(tuple_to_trial(fixed_suggestion, space), space)
    assert data == fixed_suggestion

    t = tuple_to_trial(trial_to_tuple(trial, space), space)
    assert t.experiment is None
    assert t.status == 'new'
    assert t.worker is None
    assert t.submit_time is None
    assert t.start_time is None
    assert t.end_time is None
    assert t.results == []
    assert len(t._params) == len(trial._params)
    for i in range(len(t._params)):
        assert t._params[i].to_dict() == trial._params[i].to_dict()
示例#7
0
    def reshape(self, trial):
        """Reshape the point"""
        point = format_trials.trial_to_tuple(trial, self._original_space)
        reshaped_point = []
        for dim in self.values():
            reshaped_point.append(dim.transform(point[dim.index]))

        return change_trial_params(trial, reshaped_point, self)
示例#8
0
    def reserve_trial(self, score_handle=None):
        """Find *new* trials that exist currently in database and select one of
        them based on the highest score return from `score_handle` callable.

        :param score_handle: A way to decide which trial out of the *new* ones to
           to pick as *reserved*, defaults to a random choice.
        :type score_handle: callable

        :return: selected `Trial` object, None if could not find any.
        """
        if score_handle is not None and not callable(score_handle):
            raise ValueError(
                "Argument `score_handle` must be callable with a `Trial`.")

        query = dict(experiment=self._id,
                     status={'$in': ['new', 'suspended', 'interrupted']})
        new_trials = Trial.build(self._db.read('trials', query))

        if not new_trials:
            return None

        if score_handle is not None and self.space:
            scores = list(
                map(score_handle,
                    map(lambda x: trial_to_tuple(x, self.space), new_trials)))
            scored_trials = zip(scores, new_trials)
            best_trials = filter(lambda st: st[0] == max(scores),
                                 scored_trials)
            new_trials = list(zip(*best_trials))[1]
        elif score_handle is not None:
            log.warning(
                "While reserving trial: `score_handle` was provided, but "
                "parameter space has not been defined yet.")

        selected_trial = random.sample(new_trials, 1)[0]

        # Query on status to ensure atomicity. If another process change the
        # status meanwhile, read_and_write will fail, because query will fail.
        query = {'_id': selected_trial.id, 'status': selected_trial.status}

        update = dict(status='reserved')

        if selected_trial.status == 'new':
            update["start_time"] = datetime.datetime.utcnow()

        selected_trial_dict = self._db.read_and_write('trials',
                                                      query=query,
                                                      data=update)

        if selected_trial_dict is None:
            selected_trial = self.reserve_trial(score_handle=score_handle)
        else:
            selected_trial = Trial(**selected_trial_dict)

        return selected_trial
示例#9
0
    def restore_shape(self, transformed_trial):
        """Restore shape."""
        transformed_point = format_trials.trial_to_tuple(transformed_trial, self)
        original_keys = self._original_space.keys()
        point = [None for _ in original_keys]
        for index, dim in enumerate(self.values()):
            if dim.first:
                point_index = original_keys.index(dim.original_dimension.name)
                point[point_index] = dim.reverse(transformed_point, index)

        return change_trial_params(transformed_trial, point, self._original_space)
示例#10
0
    def test_suggest_initial_points(self, tpe: TPE, monkeypatch):
        """Test that initial points can be sampled correctly"""
        _points = [(i, i - 6, "c") for i in range(1, 12)]
        _trials = [
            format_trials.tuple_to_trial(point, space=tpe.space)
            for point in _points
        ]
        index = 0

        def sample(num: int = 1, seed=None) -> list[Trial]:
            nonlocal index
            result = _trials[index:index + num]
            index += num
            return result

        monkeypatch.setattr(tpe.space, "sample", sample)

        tpe.n_initial_points = 10
        results = numpy.random.random(10)
        for i in range(1, 11):
            trials = tpe.suggest(1)
            assert trials is not None
            trial = trials[0]
            assert trial.params == _trials[i]
            point = format_trials.trial_to_tuple(trial, space=tpe.space)
            assert point == (i, i - 6, "c")
            trial.results = [
                Trial.Result(name="objective",
                             type="objective",
                             value=results[i - 1])
            ]
            tpe.observe([trial])

        trials = tpe.suggest(1)
        assert trials is not None
        trial = trials[0]
        assert trial == _trials[-1]
        # BUG: This is failing. We expect this trial to be sampled from the model, not from the
        # search space.
        assert format_trials.trial_to_tuple(trial,
                                            space=tpe.space) != (11, 5, "c")
示例#11
0
def flatten_numpy(trials_array, flattened_space):
    """Flatten dimensions"""

    flattened_points = numpy.array([
        format_trials.trial_to_tuple(
            flattened_space.transform(
                format_trials.tuple_to_trial(point[:-1],
                                             flattened_space.original)),
            flattened_space,
        ) for point in trials_array
    ])

    return numpy.concatenate((flattened_points, trials_array[:, -1:]), axis=1)
示例#12
0
    def observe(self, trials):
        """Observe evaluation `results` corresponding to list of `points` in
        space.

        Save current point and gradient corresponding to this point.

        """
        if trials[-1].status != "completed":
            return

        self.current_point = numpy.asarray(
            format_trials.trial_to_tuple(trials[-1], self.space))
        self.gradient = numpy.asarray(trials[-1].gradient.value)
        self.has_observed_once = True
示例#13
0
    def update(self):
        """Pull newest completed trials to update local model."""
        log.debug("### Fetch trials to observe:")
        completed_trials = self.experiment.fetch_completed_trials()
        log.debug("### %s", completed_trials)

        if completed_trials:
            log.debug("### Convert them to list of points and their results.")
            points = list(
                map(
                    lambda trial: format_trials.trial_to_tuple(
                        trial, self.space), completed_trials))
            results = list(
                map(format_trials.get_trial_results, completed_trials))

            log.debug("### Observe them.")
            self.algorithm.observe(points, results)
示例#14
0
    def _update_naive_algorithm(self, incomplete_trials):
        """Pull all non completed trials to update naive model."""
        self.naive_algorithm = copy.deepcopy(self.algorithm)
        self.naive_trials_history = copy.deepcopy(self.trials_history)
        log.debug("### Create fake trials to observe:")
        lying_trials = self._produce_lies(incomplete_trials)
        log.debug("### %s", lying_trials)
        if lying_trials:
            log.debug("### Convert them to list of points and their results.")
            points = list(
                map(
                    lambda trial: format_trials.trial_to_tuple(
                        trial, self.space), lying_trials))
            results = list(map(format_trials.get_trial_results, lying_trials))

            log.debug("### Observe them.")
            self.naive_trials_history.update(lying_trials)
            self.naive_algorithm.observe(points, results)
示例#15
0
    def test_suggest_ei_candidates(self, tpe: TPE):
        """Test suggest with no shape dimensions"""
        tpe.n_initial_points = 2
        tpe.n_ei_candidates = 0

        results = numpy.random.random(2)
        for i in range(2):
            trials = tpe.suggest(1)
            assert trials is not None
            assert len(trials) == 1
            points = [format_trials.trial_to_tuple(trials[0], space=tpe.space)]
            assert len(points[0]) == 3
            assert not isinstance(points[0][0], tuple)
            trials[0] = _add_result(trials[0], results[i])
            tpe.observe(trials)

        trials = tpe.suggest(1)
        assert not trials

        tpe.n_ei_candidates = 24
        trials = tpe.suggest(1)
        assert trials is not None
        assert len(trials) > 0
示例#16
0
def _trial_to_array(trial: Trial, space: Space) -> np.ndarray:
    return np.array(format_trials.trial_to_tuple(trial, space=space))
示例#17
0
 def suggest(pool_size):
     return [
         trial_to_tuple(experiment.fetch_trials()[-1], experiment.space)
     ]
示例#18
0
def test_partial_dependency_grid(hspace):
    """Test the computation of the averages and stds"""

    flattened_space = flatten_space(hspace)

    n_points = 5
    n_samples = 20
    samples = [
        format_trials.trial_to_tuple(trial, flattened_space)
        for trial in flattened_space.sample(n_samples)
    ]
    samples = pd.DataFrame(samples, columns=flattened_space.keys())

    params = ["x", "y[0]", "y[2]", "z"]

    # Test for 1 param
    grid, averages, stds = partial_dependency_grid(flattened_space,
                                                   mock_model(), ["x"],
                                                   samples,
                                                   n_points=n_points)

    assert list(grid.keys()) == ["x"]
    assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6]
    assert averages.shape == (n_points, )
    assert stds.shape == (n_points, )
    assert averages[0] == numpy.arange(n_samples).mean()
    assert (averages[4] == numpy.arange(n_samples * (n_points - 1),
                                        n_samples * n_points).mean())
    assert stds[0] == numpy.arange(n_samples).std()

    # Test for 2 param
    grid, averages, stds = partial_dependency_grid(flattened_space,
                                                   mock_model(), ["x", "y[0]"],
                                                   samples,
                                                   n_points=n_points)

    assert list(grid.keys()) == ["x", "y[0]"]
    assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6]
    # assert list(grid["y[0]"]) == [0, 0.75, 1.5, 2.25, 3]
    numpy.testing.assert_almost_equal(
        grid["y[0]"],
        numpy.linspace(
            numpy.log(float(f"{numpy.e}")),
            numpy.log(float(f"{numpy.e}")**4),
            num=n_points,
        ),
        decimal=4,
    )

    assert averages.shape == (n_points, n_points)
    assert stds.shape == (n_points, n_points)
    assert averages[0, 0] == numpy.arange(n_samples).mean()
    assert (averages[4, 4] == numpy.arange(
        n_samples * n_points * n_points - n_samples,
        n_samples * n_points * n_points).mean())
    assert stds[0, 0] == numpy.arange(n_samples).std()
    assert stds[4, 4] == numpy.arange(n_samples).std()

    # Test for 2 param with one categorical, with less categories then n_points
    grid, averages, stds = partial_dependency_grid(flattened_space,
                                                   mock_model(), ["x", "z"],
                                                   samples,
                                                   n_points=n_points)

    assert list(grid.keys()) == ["x", "z"]
    assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6]
    assert list(grid["z"]) == [0, 1, 2]
    assert averages.shape == (3, n_points)
    assert stds.shape == (3, n_points)

    assert averages[0, 0] == numpy.arange(n_samples).mean()
    assert (averages[2,
                     4] == numpy.arange(n_samples * 3 * n_points - n_samples,
                                        n_samples * 3 * n_points).mean())
    assert stds[0, 0] == numpy.arange(n_samples).std()
    assert stds[2, 4] == numpy.arange(n_samples).std()
示例#19
0
    def reserve_trial(self, score_handle=None, _depth=1):
        """Find *new* trials that exist currently in database and select one of
        them based on the highest score return from `score_handle` callable.

        :param score_handle: A way to decide which trial out of the *new* ones to
           to pick as *reserved*, defaults to a random choice.
        :type score_handle: callable
        :param _depth: recursion depth only used for logging purposes can be ignored
        :return: selected `Trial` object, None if could not find any.
        """
        log.debug('%s reserving trial with (score: %s)', '<' * _depth,
                  score_handle)
        if score_handle is not None and not callable(score_handle):
            raise ValueError(
                "Argument `score_handle` must be callable with a `Trial`.")

        self.fix_lost_trials()

        query = dict(experiment=self._id,
                     status={'$in': ['new', 'suspended', 'interrupted']})

        new_trials = self.fetch_trials(query)
        log.debug('%s Fetched (trials: %s)', '<' * _depth, len(new_trials))

        if not new_trials:
            log.debug('%s no new trials found', '<' * _depth)
            return None

        if score_handle is not None and self.space:
            scores = list(
                map(score_handle,
                    map(lambda x: trial_to_tuple(x, self.space), new_trials)))
            scored_trials = zip(scores, new_trials)
            best_trials = filter(lambda st: st[0] == max(scores),
                                 scored_trials)
            new_trials = list(zip(*best_trials))[1]
        elif score_handle is not None:
            log.warning(
                "While reserving trial: `score_handle` was provided, but "
                "parameter space has not been defined yet.")

        selected_trial = random.sample(new_trials, 1)[0]
        log.debug('%s selected (trial: %s)', '<' * _depth, selected_trial)

        update = dict(status='reserved', heartbeat=datetime.datetime.utcnow())

        if selected_trial.status == 'new':
            update["start_time"] = datetime.datetime.utcnow()

        # Query on status to ensure atomicity. If another process change the
        # status meanwhile, update will fail, because query will fail.
        # This relies on the atomicity of document updates.

        log.debug('%s trying to reverse trial', '<' * _depth)
        reserved = self._storage.update_trial(
            selected_trial, **update, where={'status': selected_trial.status})

        if not reserved:
            selected_trial = self.reserve_trial(score_handle=score_handle,
                                                _depth=_depth + 1)
        else:
            log.debug('%s found suitable trial', '<' * _depth)
            selected_trial = self.fetch_trials({'_id': selected_trial.id})[0]

        log.debug('%s reserved trial (trial: %s)', '<' * _depth,
                  selected_trial)
        return selected_trial
示例#20
0
def partial_dependency(trials,
                       space,
                       params=None,
                       model="RandomForestRegressor",
                       n_grid_points=10,
                       n_samples=50,
                       **kwargs):
    """
    Calculates the partial dependency of parameters in a collection of
    :class:`orion.core.worker.trial.Trial`.

    Parameters
    ----------
    trials: DataFrame or dict
        A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
        equivalent.

    space: Space object
        A space object from an experiment.

    params: list of str, optional
        The parameters to include in the computation. All parameters are included by default.

    model: str
        Name of the regression model to use. Can be one of
        - AdaBoostRegressor
        - BaggingRegressor
        - ExtraTreesRegressor
        - GradientBoostingRegressor
        - RandomForestRegressor (Default)

    n_grid_points: int
        Number of points in the grid to compute partial dependency. Default is 10.

    n_samples: int
        Number of samples to randomly generate the grid used to compute the partial dependency.
        Default is 50.

    **kwargs
        Arguments for the regressor model.

    Returns
    -------
    dict
        Dictionary of DataFrames. Each combination of parameters as keys (dim1.name, dim2.name)
        and for each parameters individually (dim1.name). Columns are
        (dim1.name, dim2.name, objective) or (dim1.name, objective).

    """
    params = flatten_params(space, params)

    flattened_space = build_required_space(
        space,
        dist_requirement="linear",
        type_requirement="numerical",
        shape_requirement="flattened",
    )

    if trials.empty or trials.shape[0] == 0:
        return {}

    data = to_numpy(trials, space)
    data = flatten_numpy(data, flattened_space)
    model = train_regressor(model, data, **kwargs)

    data = [
        format_trials.trial_to_tuple(trial, flattened_space)
        for trial in flattened_space.sample(n_samples)
    ]
    data = pandas.DataFrame(data, columns=flattened_space.keys())

    partial_dependencies = dict()
    for x_i, x_name in enumerate(params):
        grid, averages, stds = partial_dependency_grid(flattened_space, model,
                                                       [x_name], data,
                                                       n_grid_points)
        grid = reverse(flattened_space, grid)
        partial_dependencies[x_name] = (grid, averages, stds)
        for y_i in range(x_i + 1, len(params)):
            y_name = params[y_i]
            grid, averages, stds = partial_dependency_grid(
                flattened_space, model, [x_name, y_name], data, n_grid_points)
            grid = reverse(flattened_space, grid)
            partial_dependencies[(x_name, y_name)] = (grid, averages, stds)

    return partial_dependencies
示例#21
0
def test_hierarchical_trial_to_tuple(hierarchical_space, hierarchical_trial,
                                     params_tuple):
    """Check if hierarchical trial is correctly created from a sample/tuple."""
    data = trial_to_tuple(hierarchical_trial, hierarchical_space)
    assert data == params_tuple
示例#22
0
def test_hierarchical_trial_to_tuple(hierarchical_space, hierarchical_trial,
                                     fixed_suggestion):
    """Check if hierarchical trial is correctly created from a sample/tuple."""
    data = trial_to_tuple(hierarchical_trial, hierarchical_space)
    assert data == fixed_suggestion