Exemplo n.º 1
0
    def grid_scores_(self):
        grid_scores = list()

        scores_per_config = defaultdict(list)
        config_list = list()

        for run_key in self.runhistory_.data:
            run_value = self.runhistory_.data[run_key]

            config_id = run_key.config_id
            cost = run_value.cost

            if config_id not in config_list:
                config_list.append(config_id)

            scores_per_config[config_id].append(cost)

        for config_id in config_list:
            scores = [1 - score for score in scores_per_config[config_id]]
            mean_score = np.mean(scores)
            config = self.runhistory_.ids_config[config_id]

            grid_score = _CVScoreTuple(config.get_dictionary(), mean_score,
                                       scores)
            grid_scores.append(grid_score)

        return grid_scores
Exemplo n.º 2
0
    def grid_scores_(self):
        warnings.warn(
            "The grid_scores_ attribute was deprecated in version 0.18"
            " in favor of the more elaborate cv_results_ attribute."
            " The grid_scores_ attribute will not be available from 0.20",
            DeprecationWarning)

        check_is_fitted(self, 'cv_results_')
        grid_scores = list()

        for i, (params, mean, std) in enumerate(
                zip(self.cv_results_['params'],
                    self.cv_results_['mean_test_score'],
                    self.cv_results_['std_test_score'])):
            scores = np.array(list(self.cv_results_['split%d_test_score' %
                                                    s][i]
                                   for s in range(self.n_splits_)),
                              dtype=np.float64)
            grid_scores.append(_CVScoreTuple(params, mean, scores))

        return grid_scores
Exemplo n.º 3
0
    def grid_scores_(self):
        import numpy as np
        if self.multimetric_:
            raise AttributeError("grid_scores_ attribute is not available for"
                                 " multi-metric evaluation.")
        warnings.warn(
            "The grid_scores_ attribute was deprecated in version 0.18"
            " in favor of the more elaborate cv_results_ attribute."
            " The grid_scores_ attribute will not be available from 0.20",
            DeprecationWarning)

        grid_scores = list()

        for i, (params, mean, std) in enumerate(
                zip(self.cv_results_['params'],
                    self.cv_results_['mean_test_score'],
                    self.cv_results_['std_test_score'])):
            scores = np.array(list(self.cv_results_['split%d_test_score' %
                                                    s][i]
                                   for s in range(self.n_splits)),
                              dtype=np.float64)
            grid_scores.append(_CVScoreTuple(params, mean, scores))

        return grid_scores
Exemplo n.º 4
0
    def fit(self, X, y=None, labels=None):
        #return self._fit(
        #    X, y, labels,
        #    parameter_iterable # parameter_iterable, \in Sized, it actually does len(parameter_iterable) in _fit
        #)

        # FIXME code duplication from BaseSearchCV._fit
        estimator = self.estimator
        cv = _split.check_cv(self.cv, y, classifier=is_classifier(estimator))
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        n_samples = _num_samples(X)
        X, y, labels = indexable(X, y, labels)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                  'of samples (%i) than data (X: %i samples)'
                                  % (len(y), n_samples))

        n_splits = cv.get_n_splits(X, y, labels)

        if self.verbose > 0 and isinstance(parameter_iterable, Sized):
            n_candidates = len(parameter_iterable)
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch
        # FIXME how to handle pre_dispatch


        # FIXME recursively getting new parameters to evaluate

#        parameter_iterable = ...  # the magic
#
#        # The evaluation (Parallel) stuff
#        out = Parallel(
#            n_jobs=self.n_jobs, verbose=self.verbose,
#            pre_dispatch=pre_dispatch
#        )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
#                                  train, test, self.verbose, parameters,
#                                  self.fit_params, return_parameters=True,
#                                  error_score=self.error_score)
#            for parameters in parameter_iterable
#            for train, test in cv.split(X, y, labels))
#

        # n_fits on each (train, test)
        def cross_validation(raw_parameters):
            parameters = dict(zip(
                self.param_grid.keys(), raw_parameters
            ))  # TODO more robust way of doing this
            print(parameters)

            return Parallel(
                n_jobs=self.n_jobs, verbose=self.verbose,
                pre_dispatch=pre_dispatch
            )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
                                      train, test, self.verbose, parameters,
                                      self.fit_params, return_parameters=True,
                                      error_score=self.error_score)
               for train, test in cv.split(X, y, labels))

        x = cartesian_product(*self.param_grid.values())

        # FIXME implement as non-recursive
        def bo_(x_obs, y_obs, n_iter):
            if n_iter > 0:
                kernel = kernels.Matern() + kernels.WhiteKernel()
                gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16)
                gp.fit(x_obs, 1-y_obs)

                a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs)

                argmax_f_x_ = x[np.argmax(a(x))]

                # heavy evaluation
                f_argmax_f_x_ = cross_validation(argmax_f_x_)

                y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T

                return f_argmax_f_x_ + bo_(
                    x_obs=np.vstack((x_obs, argmax_f_x_)),
                    y_obs=np.vstack((y_obs, y_ob)),
                    n_iter=n_iter-1,
                )

            else:
                return []


        # FIXME (most informative) decision like Numerical Probabilistics stuff for integrations
        # sobol initilization?

        sampled_x_ind = np.random.choice(
            x.shape[0],
            size=self.n_initial_points,
            replace=False,
        )
        print(sampled_x_ind)

        x_obs = x[sampled_x_ind]
        f_x_obs = list(map(cross_validation, x_obs))

        y_obs = np.atleast_2d(list(map(mean_mean_validation_scores, f_x_obs))).T

        out = sum(f_x_obs, []) + bo_(x_obs, y_obs, n_iter=self.n_iter)

        n_fits = len(out)

        scores = list()
        grid_scores = list()
        for grid_start in range(0, n_fits, n_splits):
            n_test_samples = 0
            score = 0
            all_scores = []
            for this_score, this_n_test_samples, _ , parameters in \
                    out[grid_start:grid_start + n_splits]:
                all_scores.append(this_score)
                if self.iid:
                    this_score *= this_n_test_samples
                    n_test_samples += this_n_test_samples
                score += this_score
            if self.iid:
                score /= float(n_test_samples)
            else:
                score /= float(n_splits)
            scores.append((score, parameters))

            grid_scores.append(_search._CVScoreTuple(
                parameters,
                score,
                np.array(all_scores)))

        self.grid_scores_ = grid_scores

        best = sorted(grid_scores, key=lambda x: x.mean_validation_score,
                      reverse=True)[0]

        self.best_params_ = best.parameters
        self.best_score_ = best.mean_validation_score

        if self.refit:
            best_estimator = clone(base_estimator).set_params(
                **best.parameters)
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self
Exemplo n.º 5
0
    def _fit(self, Z, parameter_iterable):
        """Actual fitting,  performing the search over parameters."""
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        cv = self.cv
        cv = _check_cv(cv, Z)

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            pre_dispatch=pre_dispatch,
            backend="threading")(
                delayed(_fit_and_score)(clone(base_estimator),
                                        Z,
                                        self.scorer_,
                                        train,
                                        test,
                                        self.verbose,
                                        parameters,
                                        self.fit_params,
                                        return_parameters=True,
                                        error_score=self.error_score)
                for parameters in parameter_iterable for train, test in cv)

        # Out is a list of triplet: score, estimator, n_test_samples
        n_fits = len(out)
        n_folds = len(cv)

        scores = list()
        grid_scores = list()
        for grid_start in range(0, n_fits, n_folds):
            n_test_samples = 0
            score = 0
            all_scores = []
            for this_score, this_n_test_samples, _, parameters in \
                    out[grid_start:grid_start + n_folds]:
                all_scores.append(this_score)
                if self.iid:
                    this_score *= this_n_test_samples
                    n_test_samples += this_n_test_samples
                score += this_score
            if self.iid:
                score /= float(n_test_samples)
            else:
                score /= float(n_folds)
            scores.append((score, parameters))
            # TODO: shall we also store the test_fold_sizes?
            grid_scores.append(
                _CVScoreTuple(parameters, score, np.array(all_scores)))
        # Store the computed scores
        self.grid_scores_ = grid_scores

        # Find the best parameters by comparing on the mean validation score:
        # note that `sorted` is deterministic in the way it breaks ties
        best = sorted(grid_scores,
                      key=lambda x: x.mean_validation_score,
                      reverse=True)[0]
        self.best_params_ = best.parameters
        self.best_score_ = best.mean_validation_score

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best.parameters)
            best_estimator.fit(Z, **self.fit_params)
            self.best_estimator_ = best_estimator
        return self