Пример #1
0
    def update_event(self, input_called=-1):
        if input_called == 0:
            estimator = self.input(2)
            selector = RFE(estimator)
            if self.input(1) != None:
                selector.set_params(**self.input(1))
            try:
                X = self.input(3)
                y = self.input(4)

                selector.fit(X, y)
            except:
                pass
            
            self.set_output_val(1, selector)
            self.exec_output(0)
Пример #2
0
    rfe = RFE(
        LogisticRegression(
            penalty="l2",
            dual=True,
            C=1.0,
            fit_intercept=True,  # approx 2mm starting out
            intercept_scaling=10.0,
            class_weight="auto",
            verbose=1,
        )
    )

    ## logistic regression

    rfe.set_params(n_features_to_select=50000, step=0.1)
    rfe.fit(model_mat_train[:27000], ACTION[:27000])

    lr = LogisticRegression(penalty="l2", dual=True, C=10.0, intercept_scaling=10.0, class_weight="auto")
    lr.fit(model_mat_train[:27000, np.where(rfe.support_)[0]], ACTION[:27000])
    pred = lr.predict_proba(model_mat_train[27000:, np.where(rfe.support_)[0]])
    auc_score(ACTION[27000:], pred[:, 1])

    lr = LogisticRegression(penalty="l2", dual=True, C=10.0, intercept_scaling=10.0, class_weight="auto")
    lr.fit(model_mat_train[:27000], ACTION[:27000])
    pred = lr.predict_proba(model_mat_train[27000:])
    auc_score(ACTION[27000:], pred[:, 1])

    lr.fit(model_mat_train[:, np.where(rfe.support_)[0]], ACTION)
    pred = lr.predict_proba(model_mat_test[:, np.where(rfe.support_)[0]])
    pd.DataFrame({"Id": test_data.index, "Action": pred[:, 1]}).to_csv(
Пример #3
0
    def fit(self, X, y=None):
        """
        Fits the RFECV with the wrapped model to the specified data and draws
        the rfecv curve with the optimal number of features found.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape (n_samples) or (n_samples, n_features), optional
            Target relative to X for classification or regression.

        Returns
        -------
        self : instance
            Returns the instance of the RFECV visualizer.
        """
        X, y = check_X_y(X, y, "csr")
        n_features = X.shape[1]

        # This check is kind of unnecessary since RFE will do it, but it's
        # nice to get it out of the way ASAP and raise a meaningful error.
        if 0.0 < self.step < 1.0:
            step = int(max(1, self.step * n_features))
        else:
            step = int(self.step)

        if step < 0:
            raise YellowbrickValueError("step must be >0")

        # Create the RFE model
        rfe = RFE(self.estimator, step=step)
        n_feature_subsets = np.arange(1, n_features+1)

        # Create the cross validation params
        # TODO: handle random state
        cv_params = {
            key: self.get_params()[key]
            for key in ('groups', 'cv', 'scoring')
        }

        # Perform cross-validation for each feature subset
        scores = []
        for n_features_to_select in n_feature_subsets:
            rfe.set_params(n_features_to_select=n_features_to_select)
            scores.append(cross_val_score(rfe, X, y, **cv_params))

        # Convert scores to array
        self.cv_scores_ = np.array(scores)

        # Find the best RFE model
        bestidx = self.cv_scores_.mean(axis=1).argmax()
        self.n_features_ = n_feature_subsets[bestidx]

        # Fit the final RFE model for the number of features
        self.rfe_estimator_ = rfe
        self.rfe_estimator_.set_params(n_features_to_select=self.n_features_)
        self.rfe_estimator_.fit(X, y)

        # Rewrap the visualizer to use the rfe estimator
        self._wrapped = self.rfe_estimator_

        # Hoist the RFE params to the visualizer
        self.support_ = self.rfe_estimator_.support_
        self.ranking_ = self.rfe_estimator_.ranking_

        self.draw()
        return self