def set_params(self, **params):
        """Set parameters for the wrapper and the wrapped estimator.

        This method is required for compatibility with GridSearchCV.
        :param params: A dictionary of parameters for the wrapper and wrapped estimator.
         If a key doesn't match the name of a wrapper parameter, it is assumed to be
         for the wrapped estimator.
         TODO: it would be better to do what sklearn's pipeline does and provide some
         namespacing in case the wrapper and wrapped class share a parameter name
        :return: self
        """

        if not params:
            return self
        valid_params = self.get_params(deep=True)
        model_params = self.model_params
        wrapper_params = {}
        for key, value in params.iteritems():
            if key in valid_params:
                wrapper_params[key] = value
            else:
                model_params[key] = value

        wrapper_params['model_params'] = model_params
        BaseEstimator.set_params(self, **wrapper_params)
        return self
示例#2
0
    def set_params(self, **params):
        """
        Custom setting of parameters for generative models.
        All parameters that start with 'x_prep', 'y_prep', 'y_post' are
        delegated to respective preprocessors.
        """

        elements = {'augm', 'X_prep', 'Y_prep', 'Y_post', 'model'}

        self_params = {
            k: v
            for k, v in params.items()
            if not any(k.startswith(p.lower()) for p in elements)
        }

        BaseEstimator.set_params(self, **self_params)

        # set attributes of elements
        for e in elements:
            element = getattr(self, e)

            if isinstance(element, BaseEstimator):
                subprm = {
                    k[len(e) + 2:]: v
                    for k, v in params.items() if k.startswith(e.lower())
                }

                element.set_params(**subprm)

        return self
示例#3
0
文件: pipe.py 项目: dmbee/seglearn
    def set_params(self, **params):
        """
        Set the parameters of this estimator.
        Valid parameter keys can be listed with ``get_params()``.

        Returns
        -------
        self
        """
        items = self.steps
        names, _ = zip(*items)

        keys = list(params.keys())

        for name in keys:
            if '__' not in name and name in names:
                # replace an estimator
                self._replace_estimator('steps', name, params.pop(name))

            if callable(params[name]):
                # use a callable or function to set parameters
                params[name] = params[name](params)

            elif params[name] in keys:
                # set one arg from another
                params[name] = params[params[name]]

        BaseEstimator.set_params(self, **params)
        return self
示例#4
0
    def set_params(self, **kwargs):
        """Set the parameters of this class.

        Valid parameter keys can be listed with ``get_params()``.

        Returns
        -------
        self

        """
        normal_params, special_params = {}, {}
        for key, val in kwargs.items():
            if any(key.startswith(prefix) for prefix in self.prefixes_):
                special_params[key] = val
            else:
                normal_params[key] = val
        BaseEstimator.set_params(self, **normal_params)

        for key, val in special_params.items():
            if key.endswith('_'):
                raise ValueError("Not sure: Should this ever happen?")
            else:
                setattr(self, key, val)

        if any(key.startswith('criterion') for key in special_params):
            self.initialize_criterion()
        if any(key.startswith('callbacks') for key in special_params):
            self.initialize_callbacks()
        if any(key.startswith('module') for key in special_params):
            self.initialize_module()
            self.initialize_optimizer()
        if any(key.startswith('optimizer') for key in special_params):
            self.initialize_optimizer()

        return self
    def set_params(self, **params):
        """Set parameters for the wrapper and the wrapped estimator.

        This method is required for compatibility with GridSearchCV.
        :param params: A dictionary of parameters for the wrapper and wrapped estimator.
         If a key doesn't match the name of a wrapper parameter, it is assumed to be
         for the wrapped estimator.
         TODO: it would be better to do what sklearn's pipeline does and provide some
         namespacing in case the wrapper and wrapped class share a parameter name
        :return: self
        """

        if not params:
            return self
        valid_params = self.get_params(deep=True)
        model_params = self.model_params
        wrapper_params = {}
        for key, value in params.iteritems():
            if key in valid_params:
                wrapper_params[key] = value
            else:
                model_params[key] = value

        wrapper_params['model_params'] = model_params
        BaseEstimator.set_params(self, **wrapper_params)
        return self
示例#6
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds,
                            degree_range, lambda_range):
        """
        Cross-validate to find best hyperparameters with k-fold CV.
        :param X: Training data.
        :param y: Training targets.
        :param model: sklearn model.
        :param lambda_range: Range of values for the regularization hyperparam.
        :param degree_range: Range of values for the degree hyperparam.
        :param k_folds: Number of folds for splitting the training data into.
        :return: A dict containing the best model parameters,
            with some of the keys as returned by model.get_params()
        """

        # TODO: Do K-fold cross validation to find the best hyperparameters
        #  Notes:
        #  - You can implement it yourself or use the built in sklearn utilities
        #    (recommended). See the docs for the sklearn.model_selection package
        #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
        #  - If your model has more hyperparameters (not just lambda and degree)
        #    you should add them to the search.
        #  - Use get_params() on your model to see what hyperparameters is has
        #    and their names. The parameters dict you return should use the same
        #    names as keys.
        #  - You can use MSE or R^2 as a score.

        # ====== YOUR CODE: ======
        #raise NotImplementedError()
        # ========================

        kf = sklearn.model_selection.KFold(k_folds)
        smallest_loss = np.inf
        best_params = {"bostonfeaturestransformer__degree": 1, "linearregressor__reg_lambda": 0.2}
        count = 0


        for lam in lambda_range:
            for deg in degree_range:
                model.set_params(linearregressor__reg_lambda=lam, bostonfeaturestransformer__degree=deg)
                avg_mse = 0.0
                count += 1

                for train_i, test_i in kf.split(X):
                    x_train = X[train_i]
                    y_train = y[train_i]
                    model.fit(x_train, y_train)
                    y_pred = model.predict(X[test_i])
                    avg_mse += np.square(y[test_i] - y_pred).sum() / (2 * X.shape[0])

                avg_mse /= k_folds

                #check if the current params are the best
                if avg_mse <= smallest_loss:
                    smallest_loss = avg_mse
                    best_params = {"linearregressor__reg_lambda": lam, "bostonfeaturestransformer__degree": deg}
                    # ========================
        print(count)
        return best_params
示例#7
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    params = {
        'linearregressor__reg_lambda': lambda_range,
        'bostonfeaturestransformer__degree': degree_range
    }

    kf = KFold(n_splits=k_folds)
    best_params = ParameterGrid(params)[0]
    best_mse = np.inf
    best_r_2 = 0.0

    for p_dict in ParameterGrid(params):
        cur_acc = 0.0
        curr_r_2 = 0.0
        model.set_params(**p_dict)
        for train_index, test_index in kf.split(X):
            model.fit(X[train_index], y=y[train_index])
            mse, rsq = evaluate_accuracy(y[test_index],
                                         model.predict(X[test_index]))
            cur_acc += mse
            curr_r_2 += rsq

        cur_acc /= k_folds
        curr_r_2 /= k_folds

        if curr_r_2 > best_r_2:
            best_r_2 = curr_r_2
            best_params = p_dict
    # ========================

    return best_params
示例#8
0
def set_cpu_params(estimator: BaseEstimator, num_cpus: int) -> None:
    """Sets all CPU-related params to num_cpus (incl. nested)."""
    cpu_params = {
        param: num_cpus
        for param in estimator.get_params(deep=True) if any(
            param.endswith(cpu_param_name)
            for cpu_param_name in SKLEARN_CPU_PARAM_NAMES)
    }
    estimator.set_params(**cpu_params)
示例#9
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    best_params = {
        'bostonfeaturestransformer__degree': 0,
        'linearregressor__reg_lambda': 0
    }
    best_score = 0

    for 𝜆 in lambda_range:
        for deg in degree_range:
            hypers = {
                'bostonfeaturestransformer__degree': deg,
                'linearregressor__reg_lambda': 𝜆
            }
            model.set_params(**hypers)

            scores = sklearn.model_selection.cross_validate(model,
                                                            X,
                                                            y,
                                                            scoring='r2',
                                                            cv=k_folds)
            mean_score = np.mean(scores['test_score'])

            if mean_score > best_score:
                best_score = mean_score
                best_params['linearregressor__reg_lambda'] = 𝜆
                best_params['bostonfeaturestransformer__degree'] = deg
    # ========================

    return best_params
示例#10
0
    def set_params(self, **kwargs):
        """Update the parameters of the feature extractor."""

        # We don't want non-functional arguments polluting kwargs
        params = kwargs.copy()
        for k in ['function', 'target']:
            params.pop(k, None)

        self.kwargs.update(params)
        BaseEstimator.set_params(self, **kwargs)
示例#11
0
    def set_params(self, **kwargs):
        """Update the parameters of the feature extractor."""

        # We don't want non-functional arguments polluting kwargs
        params = kwargs.copy()
        for k in ['function', 'target']:
            params.pop(k, None)

        self.kwargs.update(params)
        BaseEstimator.set_params(self, **kwargs)
示例#12
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    kf = sklearn.model_selection.KFold(n_splits=k_folds)
    best_params = 0
    min_mse = np.inf
    for curr_degree in degree_range:
        for curr_lambda in lambda_range:
            params = dict(linearregressor__reg_lambda=curr_lambda,
                          bostonfeaturestransformer__degree=curr_degree)
            model.set_params(**params)
            mse = 0
            counter = 0
            for train_index, test_index in kf.split(X):
                counter = counter + 1
                model.fit(X[train_index], y[train_index])
                y_pred = model.predict(X[test_index])
                mse = mse + np.mean((y[test_index] - y_pred)**2)

            avg_mse = mse / counter
            print("avg_mse:", avg_mse, " labmda:", curr_lambda, " degree:",
                  curr_degree)
            if avg_mse < min_mse:
                best_params = params
                min_mse = avg_mse
    # ========================

    return best_params
示例#13
0
    def set_params(self, **params):
        if 'use_feature_selection' in params:
            if not params['use_feature_selection']:
                print("FS is disabled")
                BaseEstimator.set_params(self, k='all')
                # params['k'] = 'all'
                # print(params['use_feature_selection'])
        # print(params['k'])
        print(params)

        # super(BaseEstimator, self).set_params(**params)
        BaseEstimator.set_params(self, **params)
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    kf = sklearn.model_selection.KFold(k_folds)
    params_grid = sklearn.model_selection.ParameterGrid({
        'bostonfeaturestransformer__degree':
        degree_range,
        'linearregressor__reg_lambda':
        lambda_range
    })
    best_loss = 0
    for param in params_grid:
        model.set_params(**param)
        avg_score = 0.0
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            avg_score += r2_score(y_test, y_pred)
        avg_score /= k_folds
        if avg_score > best_loss:
            best_loss = avg_score
            best_params = param

        # ========================

    return best_params
示例#15
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    # params = model.get_params()
    kf = sklearn.model_selection.KFold(n_splits=k_folds)
    params_grid = {
        'bostonfeaturestransformer__degree': degree_range,
        'linearregressor__reg_lambda': lambda_range
    }
    min_acc = np.inf

    for params in list(sklearn.model_selection.ParameterGrid(params_grid)):
        model.set_params(**params)
        curr_acc = 0
        for train_idx, test_idx in kf.split(X):
            train_x, train_y = X[train_idx], y[train_idx]
            test_x, test_y = X[test_idx], y[test_idx]
            model.fit(train_x, train_y)
            y_pred = model.predict(test_x)
            curr_acc += mse_score(test_y, y_pred)
        mean = curr_acc / k_folds
        if mean < min_acc:
            min_acc = mean
            best_params = params
    # ========================

    return best_params
示例#16
0
    def set_params(self, **kwargs):
        """Set the parameters of this class.

        Valid parameter keys can be listed with ``get_params()``.

        Returns
        -------
        self

        """
        self._check_deprecated_params(**kwargs)
        normal_params, cb_params, special_params = {}, {}, {}
        for key, val in kwargs.items():
            if key.startswith('callbacks'):
                cb_params[key] = val
            elif any(key.startswith(prefix) for prefix in self.prefixes_):
                special_params[key] = val
            else:
                normal_params[key] = val

        BaseEstimator.set_params(self, **normal_params)

        for key, val in special_params.items():
            if key.endswith('_'):
                raise ValueError("Not sure: Should this ever happen?")
            else:
                setattr(self, key, val)

        if cb_params:
            # callbacks need special treatmeant since they are list of tuples
            self.initialize_callbacks()
            self._set_params_callback(**cb_params)
        if any(key.startswith('criterion') for key in special_params):
            self.initialize_criterion()
        if any(key.startswith('module') for key in special_params):
            self.initialize_module()
            self.initialize_optimizer()
        if any(key.startswith('optimizer') for key in special_params):
            # Model selectors such as GridSearchCV will set the
            # parameters before .initialize() is called, therefore we
            # need to make sure that we have an initialized model here
            # as the optimizer depends on it.
            if not hasattr(self, 'module_'):
                self.initialize_module()
            self.initialize_optimizer()

        vars(self).update(kwargs)

        return self
示例#17
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    DEGREE_PARAM = "bostonfeaturestransformer__degree"
    LAMBDA_PARAM = "linearregressor__reg_lambda"

    results = {}
    for degree in degree_range:
        for reg_lambda in lambda_range:
            params = model.get_params()
            params[DEGREE_PARAM] = degree
            params[LAMBDA_PARAM] = reg_lambda
            model.set_params(**params)
            scores = sklearn.model_selection.cross_val_score(
                model, X, y, scoring="neg_mean_squared_error", cv=k_folds)
            score = np.mean(scores)
            results[score] = params

    best_params = max(results.items(), key=lambda x: x[0])[1]

    # ========================

    return best_params
示例#18
0
文件: blending.py 项目: nyuge/xgboost
 def __init__(self, blending_regressor: BaseEstimator, model_name: str,
              params: dict):
     super().__init__(model_name, params)
     self.blend_model = BlendingRegressor(
         blending_regressor.set_params(**params))
     self.MODELS_SERIALIZING_BASEPATH = self.path.join(
         self.MODELS_SERIALIZING_BASEPATH, MACHINE_LEARNING_TECHNIQUE_NAME)
     self.SERIALIZE_FILENAME_PREFIX = SERIALIZE_FILENAME_PREFIX
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds,
                        degree_range, lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    from sklearn.model_selection import cross_validate
    best_score = None
    best_lambda = None
    best_degree = None
    for current_degree in degree_range:
        for current_lambda in lambda_range:
            model.set_params(linearregressor__reg_lambda=current_lambda,bostonfeaturestransformer__degree=current_degree)
            scores = cross_validate(model, X, y, cv=k_folds, scoring=('r2'))
            if best_score is None:
                best_score = np.average(scores['test_score'])
            elif np.average(scores['test_score']) > best_score:
                best_degree = current_degree
                best_lambda = current_lambda
                best_score = np.average(scores['test_score'])

    best_params = {}
    best_params['bostonfeaturestransformer__degree'] = best_degree
    best_params['linearregressor__reg_lambda'] = best_lambda
    return best_params
示例#20
0
    def set_params(self, **params):
        BaseEstimator.set_params(self, **params)

        print(params)
        if 'use_feature_selection' in params:
            if not params['use_feature_selection']:
                print("FS is disabled")
                BaseEstimator.set_params(self, k='all')
        # # params['k'] = 'all'
        #         # print(params['use_feature_selection'])
        #     BaseEstimator.set_params(self, **params)
        # if 'k' in params:
        #     print(params['k'])
        #     if self.use_feature_selection:
        # # super(BaseEstimator, self).set_params(**params)
        #         BaseEstimator.set_params(self, **params)
        #     else:
        #         print("don't set k, use_feature_select is false")
        print(BaseEstimator.get_params(self))
示例#21
0
    def set_params(self, **params):
        """Set the parameters of this estimator.

        The optimizer works on simple estimators as well as on nested objects
        (such as pipelines). The latter have parameters of the form
        ``<component>__<parameter>`` so that it's possible to update each
        component of a nested object.

        Returns
        -------
        self
        """

        G_agg = params.pop('G_agg', None)
        if G_agg == 'full' and self.G_agg != 'full':
            if hasattr(self, 'components_'):
                self.G_ = self.components_.dot(self.components_.T)
            self.G_agg = 'full'
        BaseEstimator.set_params(self, **params)
示例#22
0
    def set_params(self, **params):
        """Set the parameters of this estimator.

        Valid parameter keys can be listed with ``get_params()``.

        Returns
        -------
        self
        """

        return BaseEstimator.set_params(self, **params)
示例#23
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #  Notes:
    #  - You can implement it yourself or use the built in sklearn utilities
    #    (recommended). See the docs for the sklearn.model_selection package
    #    http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    #  - If your model has more hyperparameters (not just lambda and degree)
    #    you should add them to the search.
    #  - Use get_params() on your model to see what hyperparameters is has
    #    and their names. The parameters dict you return should use the same
    #    names as keys.
    #  - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    from copy import deepcopy as dc
    score_optimal = float('-inf')

    parameter = model.get_params()
    best_params = parameter.copy()

    for d in degree_range:
        for l in lambda_range:
            parameter['bostonfeaturestransformer'].degree, parameter[
                'bostonfeaturestransformer__degree'], parameter[
                    'linearregressor'].reg_lambda, parameter[
                        'linearregressor__reg_lambda'] = d, d, l, l
            score = np.mean(
                sklearn.model_selection.cross_validate(
                    model.set_params(**parameter),
                    X,
                    y,
                    cv=k_folds,
                    scoring='neg_mean_squared_error')['test_score'])
            score_optimal = score if (score > score_optimal) else score_optimal
            best_params = dc(parameter) if (
                score > score_optimal) else best_params

    # raise NotImplementedError()
    # ========================

    return best_params
示例#24
0
def train_test_model(model: BaseEstimator,
                     param_dict,
                     data_dict,
                     pp_dict=PP_DICT,
                     savefig=None,
                     save_model=True):
    X_train, X_test, y_train, y_test = DataManager.load_tts_data(**data_dict)
    log_tf = pp_dict.get('log_tf', True)

    models = []
    if pp_dict:
        if pp_dict.get('std_scale', False):
            with_mean = pp_dict.get('with_mean', True)
            models.append(StandardScaler(with_mean=with_mean))
        n_comp = pp_dict.get('n_components', -1)
        if n_comp > 0:
            models.append(PCA(n_components=n_comp))

    model.set_params(**param_dict)
    models.append(model)

    pipeline = make_pipeline(*models)

    y_tf = (y_train if not log_tf else DataManager.log_tf(y_train)).iloc[:, 0]

    pipeline.fit(X_train, y_tf)

    m_err, r2 = compute_regression_result(pipeline,
                                          X_train,
                                          X_test,
                                          y_train,
                                          y_test,
                                          log_tf=log_tf)

    if save_model:
        store_pipeline(pipeline, param_dict, data_dict, pp_dict, m_err, r2)

    return pipeline, m_err, r2
示例#25
0
    def set_params(self, **params):
        """ Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects
        (such as pipelines). The latter have parameters of the form
        ``<component>__<parameter>`` so that it's possible to update each
        component of a nested object.

        Returns
        -------
        self
        """

        if self.compat:
            BaseEstimator.set_params(self, **params)

        else:
            for p in self._get_param_names():
                if p in params:
                    setattr(self, p, params.pop(p))

            self.estimator.set_params(**params)

        return self
示例#26
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds,
                        degree_range, lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    kf = sklearn.model_selection.KFold(n_splits=k_folds)

    best_params = {}

    best_mse = None
    best_degree = None
    best_lambda = None

    for degree in degree_range:
        for lambda_r in lambda_range:
            mse = 0
            cnt = 0

            model.set_params(bostonfeaturestransformer__degree=degree, linearregressor__reg_lambda=lambda_r)
            # model = sklearn.pipeline.make_pipeline(
            #     BiasTrickTransformer(),
            #     BostonFeaturesTransformer(degree),
            #     LinearRegressor(lambda_r)
            # )

            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model.fit(X_train, y_train)

                y_test_pred = model.predict(X_test)
                mse += np.sum((y_test - y_test_pred) ** 2)
                cnt += y_test.shape[0]
            mse /= cnt
            if best_mse is None or best_mse > mse:
                best_mse = mse
                best_degree = degree
                best_lambda = lambda_r
    best_params['bostonfeaturestransformer__degree'] = best_degree
    best_params['linearregressor__reg_lambda'] = best_lambda
    # ========================

    return best_params
示例#27
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.
    best_params = None

    # ====== YOUR CODE: ======
    best_accr = np.inf

    # Splitting the data k-fold
    k_folder = sklearn.model_selection.KFold(k_folds)
    # Iterating over all parameters
    for degree_param in degree_range:
        for lambda_param in lambda_range:

            # Defying current params and setting the model
            params = {
                'bostonfeaturestransformer__degree': degree_param,
                'linearregressor__reg_lambda': lambda_param
            }
            model.set_params(**params)

            avg_accur = 0

            # Checking params on all k folds
            for train_indices, val_indices in k_folder.split(X):
                train_X, train_y = X[train_indices], y[train_indices]
                val_X, val_y = X[val_indices], y[val_indices]

                # Training model on training set
                model.fit(train_X, train_y)

                # Evaluate accuracy on validation set
                y_pred = model.predict(val_X)
                mse = np.mean((val_y - y_pred)**2)
                avg_accur += mse

            # Calculating avg of all k_folds
            avg_accur = avg_accur / k_folds

            # Updating Best params
            if avg_accur < best_accr:
                best_accr = avg_accur
                best_params = params

    # ========================
    return best_params
示例#28
0
def cv_best_hyperparams(model: BaseEstimator, X, y, k_folds, degree_range,
                        lambda_range):
    """
    Cross-validate to find best hyperparameters with k-fold CV.
    :param X: Training data.
    :param y: Training targets.
    :param model: sklearn model.
    :param lambda_range: Range of values for the regularization hyperparam.
    :param degree_range: Range of values for the degree hyperparam.
    :param k_folds: Number of folds for splitting the training data into.
    :return: A dict containing the best model parameters,
        with some of the keys as returned by model.get_params()
    """

    # TODO: Do K-fold cross validation to find the best hyperparameters
    #
    # Notes:
    # - You can implement it yourself or use the built in sklearn utilities
    #   (recommended). See the docs for the sklearn.model_selection package
    #   http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection
    # - If your model has more hyperparameters (not just lambda and degree)
    #   you should add them to the search.
    # - Use get_params() on your model to see what hyperparameters is has
    #   and their names. The parameters dict you return should use the same
    #   names as keys.
    # - You can use MSE or R^2 as a score.

    # ====== YOUR CODE: ======
    best_params = None
    best_loss = -1

    for degree in degree_range:
        for lambda_val in lambda_range:
            # create the hyper-parameters for the model
            params = dict(linearregressor__reg_lambda=lambda_val,
                          bostonfeaturestransformer__degree=degree)

            model.set_params(**params)

            # k-fold split
            kf = KFold(n_splits=k_folds)

            loss = 0
            for fold_ind_train, fold_ind_test in kf.split(X):

                # getting the fold values
                train_X_fold = X[fold_ind_train]
                train_y_fold = y[fold_ind_train]

                test_X_fold = X[fold_ind_test]
                test_y_fold = y[fold_ind_test]

                model.fit(train_X_fold, train_y_fold)

                y_pred = model.predict(test_X_fold)

                mse = np.mean((test_y_fold - y_pred)**2)

                loss += mse

            # first loop - initialization
            if best_loss < 0:
                best_params = params
                best_loss = loss

            else:
                best_loss = min(loss, best_loss)
                if loss > best_loss:
                    best_params = best_params
                else:
                    best_params = params

    # ========================

    return best_params
示例#29
0
def _train_and_evaluate(estimator: BaseEstimator,
                        params: Dict[str, Any],
                        model_id: int,
                        grid_search_context: Dict[str, Any]) -> None:
    # Unpack the grid search context.
    output_dir = grid_search_context['output_dir']
    cross_validation = grid_search_context['cross_validation']
    validation_file = grid_search_context['validation_file']
    target_col = grid_search_context['target_col']
    training_file = grid_search_context['training_file']
    
    param_str = ", ".join(
           ["{}={}".format(param_name, param_value)
            for param_name, param_value in params.items()])
    logger.info("Training and evaluating model {}: {}"\
                .format(model_id, param_str))
    
    model_file = "{}/model_{}.pkl".format(output_dir, model_id)
    results_file = "{}/results_{}.json".format(output_dir, model_id)
        
    # If the results file already exists, skip this pass.
    if os.path.exists(results_file):
        logger.info("Model {} already exists, skipping.".format(model_id))
        return

    # Initialize the estimator with the params.
    estimator.set_params(**params)

    cv_results = {}
    # Perform cross validation if selected.
    if cross_validation is not None:
        logger.info("Cross validating model {} for {} folds.".format(
            model_id, cross_validation))

        cv_results = \
            _cross_validate(estimator,
                            model_id,
                            grid_search_context)
       
    logger.info(
        "Training model {} and evaluating the model on the training set."\
        .format(model_id))
    estimator, training_results = \
        _train_model(estimator, grid_search_context)
    
    logger.info(
        "Model {} trained in {:.3f} seconds.".format(
            model_id, training_results["training_time_total"]))
    logger.info(
        "Model {} training set prediction time: {:.3f} for {} records.".format(
            model_id, 
            training_results["training_total_prediction_time"],
            training_results["training_total_prediction_records"]))

    # If the validation set is defined, use _evaluate_model to evaluate the 
    # model. Otherwise this is an empty dict.
    if validation_file is not None:
        logger.info(
            "Evaluating model {} on the validation set.".format(model_id))
    validation_results = \
            _evaluate_model(estimator,
                        grid_search_context['X_validation'],
                        grid_search_context['y_validation'], 
                        grid_search_context, 
                        "validation") \
        if validation_file is not None else {}

    if len(validation_results) > 0:
        logger.info(
            "Model {} validation set evaluation time: {:.3f} for {} records."\
            .format(model_id, 
                    validation_results["validation_total_prediction_time"],
                    validation_results["validation_total_prediction_records"]))
    
    # Construct and write the results for this run.
    results = {
        "training_file": training_file,
        "target": target_col,
        "model_file": model_file,
        "model_id": model_id,
        **cv_results,
        **training_results,
        **validation_results,
        **params
    }

    # Add the validation set file if present.
    if validation_file:
        results["validation_file"] = validation_file

    # Write the results _after_ the model.
    logger.info("Writing estimator for model {} to {}."\
                .format(model_id, model_file))
    joblib.dump(estimator, model_file)
    
    logger.info("Writing results for model {} to {}."\
                .format(model_id, results_file))
    with open(results_file, 'w') as results_out:
        results_out.write(
            json.dumps(results) + "\n")
示例#30
0
文件: base.py 项目: vykhand/skorch
 def set_params(self, **params):
     BaseEstimator.set_params(self, **params)
示例#31
0
    def __init__(self,
                 base_regressor: BaseEstimator,
                 quantiles: list = None,
                 quantile_range: Tuple = None,
                 step: float = None,
                 **base_params):
        """Initializes the QuantileRegressor instance.
        Initializes the quantile regressor by supplying the underlying
        sklearn estimator as well as fixed quantiles or a quantile range
        and step size.
        Args:
            base_regressor: The underlying sklearn estimator.
              Must implement a fit and predict method as well as accept loss and alpha parameters.
            fit_quantiles, optional: List of quantiles on which the model should be trained on.
              If no list is provided, the model falls back on the quantile_range and step parameters.
            quantile_range, optional: Tuple with a lower and higher quantile bound which 
              provide a range for quantiles on which the model should be trained on.
            step, optional: Step size which is used to create the model quantile range.
            **base_params: Optional keyword arguments which will be passed on
            to the ``base_model``.
        Examples:
            The below example illustrates how an instance of the 
            QuantileRegressor class can be initialized with a trained 
            sklearn GradientBoostingRegressor instance.
            >>> gbr = GradientBoostingRegressor()
            >>> quantile_reg = QuantileRegressor(gbr, fit_quantiles=[0.4, 0.5, 0.55])
        """

        assert {'loss', 'alpha'}.issubset(base_regressor.get_params().keys()), \
                'Provided base_regressor instance doesn\'t accept quantile loss function.'

        assert quantiles is not None or (quantile_range is not None and step is not None), \
                'The variable fit_quantiles or the variables quantile_range and step must be specified.'

        params = {'loss': 'quantile', 'alpha': 0.5}

        base_regressor = clone(base_regressor)
        base_regressor.set_params(**base_params)
        base_regressor.set_params(**params)

        self.base_regressor = base_regressor
        self.fit_quantiles = quantiles
        self.quantile_range = quantile_range
        self.step = step

        model_dict = {}
        self._quantiles = [0.5]
        model_dict['0.5'] = base_regressor
        self.model_dict = model_dict

        quantiles = self.__quantile_creator() if self.fit_quantiles is None and quantile_range is not None and step is not None \
                                             else self.fit_quantiles

        all_models = [self._create_model_from_quantile(q) for q in quantiles]

        for i in range(0, len(quantiles)):
            if quantiles[i] not in self.model_dict.keys():
                self.model_dict['{}'.format(quantiles[i])] = all_models[i]

        quantiles = self._quantiles + quantiles
        quantiles = list(set(quantiles))
        self._quantiles = sorted(quantiles)
示例#32
0
 def set_params(self, **params):
     BaseEstimator.set_params(self, **params)