Exemplo n.º 1
0
    def score_predictions(self, metric, fitter, X, y, support):
        """Score, according to some metric, predictions provided by a model.

        The resulting score will be negated if an information criterion is
        specified.

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).

        fitter : Poisson object
            The Poisson object that has been fit to the data with the
            respective hyperparameters.

        X : nd-array
            The design matrix.

        y : nd-array
            The response vector.

        support: array-like
            The indices of the non-zero features.

        Returns
        -------
        score : float
            The score.
        """
        # for Poisson, use predict_mean to calculate the "predicted" values
        y_pred = fitter.predict_mean(X[:, support])
        # calculate the log-likelihood
        ll = utils.log_likelihood_glm(model='poisson', y_true=y, y_pred=y_pred)
        if metric == 'log':
            score = ll
        # information criteria
        else:
            n_features = np.count_nonzero(support)
            if fitter.intercept_ != 0:
                n_features += 1
            n_samples = y.size
            if metric == 'BIC':
                score = utils.BIC(ll, n_features, n_samples)
            elif metric == 'AIC':
                score = utils.AIC(ll, n_features)
            elif metric == 'AICc':
                score = utils.AICc(ll, n_features, n_samples)
            else:
                raise ValueError(metric + ' is not a valid metric.')
            # negate the score since lower information criterion is
            # preferable
            score = -score

        return score
Exemplo n.º 2
0
    def score_predictions(metric, fitter, X, y, support):
        """Score, according to some metric, predictions provided by a model.

        the resulting score will be negated if an information criterion is
        specified

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).

        y_true : array-like
            The true response variables.

        y_pred : array-like
            The predicted response variables.

        supports: array-like
            The value of the supports for the model that was used to generate
            *y_pred*.

        Returns
        -------
        score : float
            The score.
        """
        if metric == 'acc':
            y_pred = fitter.predict(X[:, support])
            score = accuracy_score(y, y_pred)
        else:
            y_pred = fitter.predict_proba(X[:, support])
            ll = -log_loss(y, y_pred)
            if metric == 'log':
                score = ll
            else:
                n_features = np.count_nonzero(support)
                n_samples = y.size
                if metric == 'BIC':
                    score = utils.BIC(ll, n_features, n_samples)
                elif metric == 'AIC':
                    score = utils.AIC(ll, n_features)
                elif metric == 'AICc':
                    score = utils.AICc(ll, n_features, n_samples)
                else:
                    raise ValueError(metric + ' is not a valid metric.')
                # negate the score since lower information criterion is
                # preferable
                score = -score

        return score
Exemplo n.º 3
0
    def score_predictions(metric, y_true, y_pred, supports):
        """Score, according to some metric, predictions provided by a model.

        the resulting score will be negated if an information criterion is
        specified

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).

        y_true : array-like
            The true response variables.

        y_pred : array-like
            The predicted response variables.

        supports: array-like
            The value of the supports for the model that was used to generate
            *y_pred*.

        Returns
        -------
        score : float
            The score.
        """
        if metric == 'r2':
            score = r2_score(y_true, y_pred)
        else:
            n_features = np.count_nonzero(supports)
            if metric == 'BIC':
                score = utils.BIC(y_true, y_pred, n_features)
            elif metric == 'AIC':
                score = utils.AIC(y_true, y_pred, n_features)
            elif metric == 'AICc':
                score = utils.AICc(y_true, y_pred, n_features)
            else:
                raise ValueError(metric + ' is not a valid option.')
            score = -score
        return score
Exemplo n.º 4
0
    def _score_predictions(self, metric, fitter, X, y, support, boot_idxs):
        """Score, according to some metric, predictions provided by a model.

        The resulting score will be negated if an information criterion is
        specified.

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).
        fitter : object
            Must contain .predict and .predict_proba methods.
        X : array-like
            The design matrix.
        y : array-like
            Response vector.
        supports : array-like
            The value of the supports for the model
        boot_idxs : 2-tuple of array-like objects
            Tuple of (train_idxs, test_idxs) generated from a bootstrap
            sample. If this is specified, then the appropriate set of
            data will be used for evaluating scores: test data for r^2,
            and training data for information criteria

        Returns
        -------
        score : float
            The score.
        """

        # Select the data relevant for the estimation_score
        X = X[boot_idxs[self._estimation_target]]
        y = y[boot_idxs[self._estimation_target]]

        if metric == 'acc':
            if self.shared_support:
                y_pred = fitter.predict(X[:, support])
            else:
                y_pred = fitter.predict(X)
            score = accuracy_score(y, y_pred)
        else:

            if self.shared_support:
                y_pred = fitter.predict_proba(X[:, support])
            else:
                y_pred = fitter.predict_proba(X)
            ll = -log_loss(y, y_pred, labels=self.classes_)
            if metric == 'log':
                score = ll
            else:
                n_features = np.count_nonzero(support)
                n_samples = X.shape[0]
                if metric == 'BIC':
                    score = utils.BIC(n_samples * ll, n_features, n_samples)
                elif metric == 'AIC':
                    score = utils.AIC(n_samples * ll, n_features)
                elif metric == 'AICc':
                    score = utils.AICc(n_samples * ll, n_features, n_samples)
                else:
                    raise ValueError(metric + ' is not a valid metric.')
                # negate the score since lower information criterion is
                # preferable
                score = -score

        return score
Exemplo n.º 5
0
    def _score_predictions(self, metric, fitter, X, y, support, boot_idxs):
        """Score, according to some metric, predictions provided by a model.

        The resulting score will be negated if an information criterion is
        specified.

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).
        fitter : object
            Must contain .predict and .predict_proba methods.
        X : array-like
            The design matrix.
        y : array-like
            Response vector.
        supports : array-like
            The value of the supports for the model
        boot_idxs : 2-tuple of array-like objects
            Tuple of (train_idxs, test_idxs) generated from a bootstrap
            sample. If this is specified, then the appropriate set of
            data will be used for evaluating scores: test data for r^2,
            and training data for information criteria

        Returns
        -------
        score : float
            The score.
        """

        # Select the data relevant for the estimation_score
        X = X[boot_idxs[self._estimation_target]]
        y = y[boot_idxs[self._estimation_target]]

        if y.ndim == 2:
            if y.shape[1] > 1:
                raise ValueError('y should either have shape ' +
                                 '(n_samples, ) or (n_samples, 1).')
            y = np.squeeze(y)
        elif y.ndim > 2:
            raise ValueError('y should either have shape ' +
                             '(n_samples, ) or (n_samples, 1).')

        y_pred = fitter.predict(X[:, support])
        if y.shape != y_pred.shape:
            raise ValueError('Targets and predictions are not the same shape.')

        if metric == 'r2':
            score = r2_score(y, y_pred)
        else:
            ll = utils.log_likelihood_glm(model='normal',
                                          y_true=y,
                                          y_pred=y_pred)
            n_features = np.count_nonzero(support)
            n_samples = X.shape[0]
            if metric == 'BIC':
                score = utils.BIC(ll, n_features, n_samples)
            elif metric == 'AIC':
                score = utils.AIC(ll, n_features)
            elif metric == 'AICc':
                score = utils.AICc(ll, n_features, n_samples)
            else:
                raise ValueError(metric + ' is not a valid option.')
            # negate the score since lower information criterion is preferable
            score = -score
        return score
Exemplo n.º 6
0
    def _score_predictions(self, metric, fitter, X, y, support, boot_idxs=None):
        """Score, according to some metric, predictions provided by a model.

        The resulting score will be negated if an information criterion is
        specified.

        Parameters
        ----------
        metric : string
            The type of score to run on the prediction. Valid options include
            'r2' (explained variance), 'BIC' (Bayesian information criterion),
            'AIC' (Akaike information criterion), and 'AICc' (corrected AIC).
        fitter : Poisson object
            The Poisson object that has been fit to the data with the
            respective hyperparameters.
        X : ndarray, shape (n_samples, n_features)
            The design matrix.
        y : ndarray, shape (n_samples,)
            The response vector.
        support: ndarray
            The indices of the non-zero features.
        boot_idxs : 2-tuple of array-like objects
            Tuple of (train_idxs, test_idxs) generated from a bootstrap
            sample. If this is specified, then the appropriate set of
            data will be used for evaluating scores: test data for r^2,
            and training data for information criteria

        Returns
        -------
        score : float
            The score.
        """

        # Select the train data
        if boot_idxs is not None:
            X = X[boot_idxs[self._estimation_target]]
            y = y[boot_idxs[self._estimation_target]]

        # for Poisson, use predict_mean to calculate the "predicted" values
        y_pred = fitter.predict_mean(X[:, support])
        # calculate the log-likelihood
        ll = utils.log_likelihood_glm(model='poisson', y_true=y, y_pred=y_pred)
        if metric == 'log':
            score = ll
        # information criteria
        else:
            n_features = np.count_nonzero(support)
            if fitter.intercept_ != 0:
                n_features += 1
            n_samples = X.shape[0]
            if metric == 'BIC':
                score = utils.BIC(n_samples * ll, n_features, n_samples)
            elif metric == 'AIC':
                score = utils.AIC(n_samples * ll, n_features)
            elif metric == 'AICc':
                score = utils.AICc(n_samples * ll, n_features, n_samples)
            else:
                raise ValueError(metric + ' is not a valid metric.')
            # negate the score since lower information criterion is preferable
            score = -score

        return score