def get_tree_loc_auc(self, data, i): if "loc" not in data: return 8 # self.predict will add/modify the 'prediction' column to the data self.predict(data, i) sorted_data = data.sort_values(by=["prediction", "loc"], ascending=[False, True]) return get_auc(sorted_data)
def eval_range_split(self, level, cur_selected, cur_performance, data, cue, indexes, interval, decision): TP, FP, TN, FN = cur_performance pos = data.iloc[indexes] neg = data.iloc[~data.index.isin(indexes)] if decision == 1: decided = pos undecided = neg else: pos, neg = neg, pos decided = neg undecided = pos # get auc for loc. if "loc" in data: sorted_data = pd.concat([ df.sort_values(by=["loc"], ascending=True) for df in [pos, neg] ]) loc_auc = get_auc(sorted_data) else: loc_auc = 0 tp = pos.loc[pos[self.target] == 1] fp = pos.loc[pos[self.target] == 0] tn = neg.loc[neg[self.target] == 0] fn = neg.loc[neg[self.target] == 1] metrics = map(len, [tp, fp, tn, fn]) tp, fp, tn, fn = self.update_metrics(level, self.max_depth, decision, metrics) # if the decision lead to no data, punish the score if sum([tp, fp, tn, fn]) == 0: score = float('inf') elif self.criteria == "LOC_AUC": score = loc_auc else: score = get_score(self.criteria, [TP + tp, FP + fp, TN + tn, FN + fn]) if not cur_selected or score < cur_selected['score']: direction = "inside" if decision else "outside" cur_selected = {'rule': (cue, direction, interval, decision),\ 'undecided': undecided,\ 'metrics': [TP + tp, FP + fp, TN + tn, FN + fn],\ 'score': score} ''' if self.sorted_cues: self.sorted_cues = [cur_selected].extend(self.sorted_cues) else: self.sorted_cues = [cur_selected] else: direction = "inside" if decision else "outside" new_cue = {'rule': (cue, direction, interval, decision), 'undecided': undecided, 'metrics': [TP + tp, FP + fp, TN + tn, FN + fn], 'score': score} self.sorted_cues.append(new_cue) ''' return cur_selected
def eval_decision(self, data, cue, direction, threshold, decision): try: if type(threshold) != type(np.ndarray(1)): if direction == ">": decided, undecided = data.loc[ data[cue] > threshold], data.loc[ data[cue] <= threshold] else: decided, undecided = data.loc[ data[cue] < threshold], data.loc[ data[cue] >= threshold] else: decided = data.loc[(data[cue] >= threshold[0]) & (data[cue] < threshold[1])] undecided = data.loc[(data[cue] < threshold[0]) | (data[cue] >= threshold[1])] except: print "Exception" return 1, 2, 3 if decision == 1: pos, neg = decided, undecided else: pos, neg = undecided, decided # get auc for loc. if "loc" in data: sorted_data = pd.concat([ df.sort_values(by=["loc"], ascending=True) for df in [pos, neg] ]) loc_auc = get_auc(sorted_data) else: loc_auc = 0 tp = pos.loc[pos[self.target] == 1] fp = pos.loc[pos[self.target] == 0] tn = neg.loc[neg[self.target] == 0] fn = neg.loc[neg[self.target] == 1] # pre, rec, spec, fpr, npv, acc, f1 = get_performance([tp, fp, tn, fn]) # return undecided, [tp, fp, tn, fn, pre, rec, spec, fpr, npv, acc, f1] return undecided, map(len, [tp, fp, tn, fn]), loc_auc