Exemplo n.º 1
0
    def _score_one_vs_rest(self, Kyx, cy):
        """ Score each class against the rest 15. I don't have a NULL class for
        the testing set at the moment.

        """
        cy = np.array(cy)
        ap_scores = np.zeros(self.nr_classes - 1)

        print
        for ii in xrange(self.nr_classes - 1):
            # Scenario 1. Each class vs rest classes
            # good_idxs = cy != self.null_class_idx
            # Scenario 2. Each class vs NULL
            good_idxs = (cy == ii) | (cy == self.null_class_idx)
            K_good_idxs = np.ix_(good_idxs, self.cx_idxs[ii])
            # Get a +1, -1 vector of labels.
            cy_ = map(lambda label: +1 if label == ii else -1, cy[good_idxs])
            # Predict.
            confidence_values = self.clf[ii].predict_proba(Kyx[K_good_idxs])[:,
                                                                             1]
            ap_scores[ii] = rff.get_ap(confidence_values, cy_)

            print "Score for class %d as positive is %2.4f MAP." % (
                ii, ap_scores[ii])

        return mean(ap_scores) * 100
Exemplo n.º 2
0
 def _crossvalidate_C_one_vs_rest(self, K, cc, idx_clf):
     # TODO Try to avoid duplication of some of this code.
     # 1. Split Gram matrix and labels into a training set and a validation
     # set.
     pp = 0.3  # Proportion of examples used for cross-validation.
     M, N = K.shape
     assert M == N, 'K is not Gram matrix.'
     classes = list(set(cc))
     nr_classes = len(classes)
     assert nr_classes == 2, 'Number of classes is not two.'
     # Randomly pick a subset of the data for cross-validation, but enforce
     # to get a proportion of pp points from each of the two classes.
     idxs_0 = [ii for ii, ci in enumerate(cc) if ci == classes[0]]
     idxs_1 = [ii for ii, ci in enumerate(cc) if ci == classes[1]]
     rand_idxs_0 = np.random.permutation(idxs_0)
     rand_idxs_1 = np.random.permutation(idxs_1)
     P0 = ceil(pp * len(rand_idxs_0))
     P1 = ceil(pp * len(rand_idxs_1))
     cv_idxs = np.hstack((rand_idxs_0[:P0], rand_idxs_1[:P1]))
     tr_idxs = np.hstack((rand_idxs_0[P0:], rand_idxs_1[P1:]))
     # Get indices in numpy format.
     cv_ix_ = np.ix_(cv_idxs, tr_idxs)
     tr_ix_ = np.ix_(tr_idxs, tr_idxs)
     # Slice Gram matrix K.
     cv_K = K[cv_ix_]
     tr_K = K[tr_ix_]
     # Get corresponding labels.
     cc = array(cc)
     cv_cc = cc[cv_idxs]
     tr_cc = cc[tr_idxs]
     # 2. Try different values for the regularization term C and pick the
     # one that yields the best score on the cross-validation set.
     log3cs = arange(-2, 8)  # Vary C on an exponantional scale.
     best_score = -Inf
     best_C = 0
     for log3c in log3cs:
         self.clf[idx_clf].C = 3**log3c
         weight = np.ones(len(tr_cc))
         weight[tr_cc == +1] *= len(tr_cc[tr_cc == -1])
         self.clf[idx_clf].fit(tr_K, tr_cc, sample_weight=weight)
         confidence_values = self.clf[idx_clf].predict_proba(cv_K)[:, 1]
         score = rff.get_ap(confidence_values, cv_cc)
         if score >= best_score:
             best_score = score
             best_C = self.clf[idx_clf].C
     print "Best score for class %d as positive is %2.4f MAP." % (
         idx_clf, best_score)
     return best_C
 def _crossvalidate_C_one_vs_rest(self, K, cc, idx_clf):
     # TODO Try to avoid duplication of some of this code.
     # 1. Split Gram matrix and labels into a training set and a validation
     # set.
     pp = 0.3  # Proportion of examples used for cross-validation.
     M, N = K.shape
     assert M == N, "K is not Gram matrix."
     classes = list(set(cc))
     nr_classes = len(classes)
     assert nr_classes == 2, "Number of classes is not two."
     # Randomly pick a subset of the data for cross-validation, but enforce
     # to get a proportion of pp points from each of the two classes.
     idxs_0 = [ii for ii, ci in enumerate(cc) if ci == classes[0]]
     idxs_1 = [ii for ii, ci in enumerate(cc) if ci == classes[1]]
     rand_idxs_0 = np.random.permutation(idxs_0)
     rand_idxs_1 = np.random.permutation(idxs_1)
     P0 = ceil(pp * len(rand_idxs_0))
     P1 = ceil(pp * len(rand_idxs_1))
     cv_idxs = np.hstack((rand_idxs_0[:P0], rand_idxs_1[:P1]))
     tr_idxs = np.hstack((rand_idxs_0[P0:], rand_idxs_1[P1:]))
     # Get indices in numpy format.
     cv_ix_ = np.ix_(cv_idxs, tr_idxs)
     tr_ix_ = np.ix_(tr_idxs, tr_idxs)
     # Slice Gram matrix K.
     cv_K = K[cv_ix_]
     tr_K = K[tr_ix_]
     # Get corresponding labels.
     cc = array(cc)
     cv_cc = cc[cv_idxs]
     tr_cc = cc[tr_idxs]
     # 2. Try different values for the regularization term C and pick the
     # one that yields the best score on the cross-validation set.
     log3cs = arange(-2, 8)  # Vary C on an exponantional scale.
     best_score = -Inf
     best_C = 0
     for log3c in log3cs:
         self.clf[idx_clf].C = 3 ** log3c
         weight = np.ones(len(tr_cc))
         weight[tr_cc == +1] *= len(tr_cc[tr_cc == -1])
         self.clf[idx_clf].fit(tr_K, tr_cc, sample_weight=weight)
         confidence_values = self.clf[idx_clf].predict_proba(cv_K)[:, 1]
         score = rff.get_ap(confidence_values, cv_cc)
         if score >= best_score:
             best_score = score
             best_C = self.clf[idx_clf].C
     print "Best score for class %d as positive is %2.4f MAP." % (idx_clf, best_score)
     return best_C
    def _score_one_vs_rest(self, Kyx, cy):
        """ Score each class against the rest 15. I don't have a NULL class for
        the testing set at the moment.

        """
        cy = np.array(cy)
        ap_scores = np.zeros(self.nr_classes - 1)

        print
        for ii in xrange(self.nr_classes - 1):
            # Scenario 1. Each class vs rest classes
            # good_idxs = cy != self.null_class_idx
            # Scenario 2. Each class vs NULL
            good_idxs = (cy == ii) | (cy == self.null_class_idx)
            K_good_idxs = np.ix_(good_idxs, self.cx_idxs[ii])
            # Get a +1, -1 vector of labels.
            cy_ = map(lambda label: +1 if label == ii else -1, cy[good_idxs])
            # Predict.
            confidence_values = self.clf[ii].predict_proba(Kyx[K_good_idxs])[:, 1]
            ap_scores[ii] = rff.get_ap(confidence_values, cy_)

            print "Score for class %d as positive is %2.4f MAP." % (ii, ap_scores[ii])

        return mean(ap_scores) * 100
Exemplo n.º 5
0
def average_precision(y_true, y_pred):
    """ Swaps arguments for Adrien's function, so it is compatible with
    sklearn.

    """
    return result_file_functions.get_ap(y_pred, y_true)
Exemplo n.º 6
0
def average_precision(y_true, y_pred):
    """ Swaps arguments for Adrien's function, so it is compatible with
    sklearn.

    """
    return result_file_functions.get_ap(y_pred, y_true)