def adjust_count(y_pred, cm): ncm = cm / cm.sum(1, dtype=numpy.float64)[:, numpy.newaxis] tpr = ncm[1,1] fpr = ncm[0,1] pp = DE.to_bin_dist(y_pred)[1] if tpr - fpr < .25: raise TooLittleDifferenceException new_pos = (pp - fpr) / float(tpr - fpr) dist_est = numpy.array([1-new_pos, new_pos]) dist_est[dist_est<0] = 0 dist_est[dist_est>1] = 1 return dist_est / dist_est.sum()
def adjust_count(self, y_pred, cm): ncm = cm / cm.sum(1, dtype=numpy.float64)[:, numpy.newaxis] tpr = ncm[1,1] fpr = ncm[0,1] pp = DE.to_bin_dist(y_pred)[1] if tpr == fpr: new_pos = .5 else: new_pos = (pp - fpr) / float(tpr - fpr) dist_est = numpy.array([1-new_pos, new_pos]) if self.cap: dist_est[dist_est<0] = 0 dist_est[dist_est>1] = 1 return dist_est / dist_est.sum()
def predict_binary(self, X_population, params): clf, pos2neg, X, y = params cost_fp = 1.0 cost_fn = 1.0 for i in range(self.itr_count): new_label = clf.predict(X_population) cn = Counter(new_label) # We add a small prior (1) to prevent divide by zero error. This is # not mentioned in the original paper, but we add this for fair # comparison. pos2neg_new = (cn[1] + 1) / float(cn[0] + 1) cost_fp = pos2neg / pos2neg_new # High cost means the examples are important, thus we should weigh # them more. clf = self.base_clf_class(class_weight = {0: cost_fn, 1:cost_fp}) clf.fit(X, y) y_pred = clf.predict(X_population) return DE.to_bin_dist(y_pred)
def predict_binary(self, X_population, params): ''' prediction outcome is length-2 array. ''' clf = params y_pred = clf.predict(X_population) return DE.to_bin_dist(y_pred)