Пример #1
0
def short_error(y_true, y_pred, normalize=True, sample_weight=None):
    """
    Error of short classification. False positive rate (FPR)

    Parameters
    ----------
    y_true : 1d array-like, or label indicator array / sparse matrix
        Ground truth (correct) labels.

    y_pred : 1d array-like, or label indicator array / sparse matrix
        Predicted labels, as returned by a classifier.

    normalize : bool, optional (default=True)
        If False, return the number of misclassified samples.
        Otherwise, return the fraction of misclassified samples.

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    Returns
    -------
    error : float
        If normalize == True, return the misclassified samples
        (float), else it returns the number of misclassified samples
        (int).

        The best performance is 0
    """
    short_true = y_true[y_true == 0]
    short_pred = y_pred[y_true == 0]
    score = short_pred != short_true

    return _weighted_sum(score, sample_weight, normalize)
Пример #2
0
def my_log_loss(y_true,
                y_pred,
                eps=1e-15,
                normalize=True,
                labels=None,
                encoder=None,
                weights_targets=None):
    transformed_labels = encoder.transform(y_true)
    y_pred = numpy.clip(y_pred, eps, 1 - eps)
    y_pred /= y_pred.sum(axis=1)[:, numpy.newaxis]

    sample_weight = weights_targets[y_true]
    loss = (transformed_labels * numpy.log(y_pred)).sum(axis=1)
    return _weighted_sum(loss, sample_weight, normalize)
Пример #3
0
def accuracy_rounding_score(y_true,
                            y_pred,
                            normalize=True,
                            sample_weight=None):
    for p in range(0, y_pred.size):
        labels = [
            500., 1000., 1500., 2000., 2500., 3000., 3500., 4000., 4500.,
            5000., 6000., 7000., 8000., 9000., 10000., 12500., 15000.
        ]
        y_pred[p] = min(labels, key=lambda x: abs(x - y_pred[p]))
    # Compute accuracy for each possible representation
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    check_consistent_length(y_true, y_pred, sample_weight)
    if y_type.startswith('multilabel'):
        differing_labels = count_nonzero(y_true - y_pred, axis=1)
        score = differing_labels == 0
    else:
        score = y_true == y_pred

    return _weighted_sum(score, sample_weight, normalize)
Пример #4
0
    def decision_function(self, X):

        """ Compute the (weighted) sum of votes.
        Parameters
        ----------
        X : np.ndarray, List
            Data in the form of rows x columns = samples x features
        Returns
        -------
        The (weighted) sum of votes.
        """
        if self.centroid_weighting:
            votes = robust_scale(abs(X - self.averages[0, :]) - abs(X - self.averages[1, :]),
                                 with_centering=False, axis=1)
        else:
            votes = (abs(X - self.averages[0, :]) > abs(X - self.averages[1, :])) - 0.5
        if self.stats_weighting is None:
            dec = np.sum(votes, 1) / votes.shape[1]
        else:
            dec = _weighted_sum(votes, self.feature_importances_) / votes.shape[1]
        return dec
Пример #5
0
def degree_of_agreement(y_true, y_pred, normalize=True, sample_weight=None):
    # Compute accuracy for each possible representation
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    check_consistent_length(y_true, y_pred, sample_weight)

    if y_type.startswith('multilabel'):
        with np.errstate(divide='ignore', invalid='ignore'):
            # oddly, we may get an "invalid" rather than a "divide" error here
            pred_and_true = count_nonzero(
                y_true.multiply(y_pred), axis=1)  # cez inters eez
            len_true = count_nonzero(y_true)

            not_ytrue = 1 - y_true
            pred_and_nottrue = count_nonzero(not_ytrue.multiply(y_pred), axis=1)
            len_nottrue = count_nonzero(not_ytrue)

            pred_or_true = count_nonzero(y_true + y_pred, axis=1)

            # compute the doa statistic
            score = pred_and_true / len_true - pred_and_nottrue / len_nottrue
            # score = pred_and_true / pred_or_true
            print(score)
            score[pred_or_true == 0.0] = 1.0
            print(score)
    else:
        # oddly, we may get an "invalid" rather than a "divide" error here
        pred_and_true = np.count_nonzero(
            y_true * y_pred, axis=0)  # cez inters eez
        len_true = np.count_nonzero(y_true)

        not_ytrue = np.subtract(1, y_true)
        pred_and_nottrue = np.count_nonzero(not_ytrue * y_pred, axis=0)
        len_nottrue = np.count_nonzero(not_ytrue)

        # compute the doa statistic
        score = pred_and_true / len_true - pred_and_nottrue / len_nottrue

    return _weighted_sum(score, sample_weight, normalize)
Пример #6
0
    def decision_function(self, X):
        """ Compute the (weighted) sum of votes.

        Args:
            X (np.ndarray, List): Data in the form of rows x columns = samples x features.

        Returns:
            np.ndarray: The (weighted) sum of votes for each sample in the form 1 x samples.
        """

        X = np.array(X)

        if self.distance_type == 'manhattan':
            if self.centroid_weighting:
                self.votes_ = abs(X - self.prototypes_[0, :]) - abs(X - self.prototypes_[1, :])
            else:
                self.votes_ = (abs(X - self.prototypes_[0, :]) > abs(X - self.prototypes_[1, :])) - 0.5
            dec = _weighted_sum(self.votes_, self.feature_importances_) / self.votes_.shape[1]
        elif self.distance_type == 'euclidean':
            dec = np.sum((self.feature_importances_ * (X - self.prototypes_[0, :])) ** 2, axis=1) - \
                  np.sum((self.feature_importances_ * (X - self.prototypes_[1, :])) ** 2, axis=1)

        return dec