def one_fold(schema, train, test, s_option, nt): pred_a = np.zeros(len(test)) pred_c = np.zeros(len(test)) bag_cnt = 0 alpha_idx = 0 while True: # bagging newdata = sampling(train, s_option) # base tree tree = dt.create_decision_tree(newdata, schema, 1.0, DEPTH) pred = dt.apply_rules(test, schema, tree) pred_c = pred_c + pred # alpha variation pred_down, alpha_cnt_down = alpha_variation(schema, newdata, test, tree, False) pred_up, alpha_cnt_up = alpha_variation(schema, newdata, test, tree, True) pred_a = pred_a + ((pred_down + pred_up + pred) / (alpha_cnt_down + alpha_cnt_up + 1.0)) alpha_idx = alpha_idx + alpha_cnt_down + alpha_cnt_up bag_cnt = bag_cnt + 1 if bag_cnt > nt: break pred_a = pred_a / (bag_cnt) pred_c = pred_c / (bag_cnt) label = test[:,-1] roc_a = st.auc(pred_a, label) roc_c = st.auc(pred_c, label) return roc_a, roc_c, (roc_a/roc_c), (float(alpha_idx)/nt)
def leat_ai_raw(schema, train, test, s_option, nt, lift, z_beta): cov_c45 = 0.0 cov_leat = 0.0 data = np.vstack((train,test)) base_prob = dt.laplace_smoothing(data) obj_prob = lift * base_prob pred = np.zeros(len(data)) alpha_list = [-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0] # base tree output = [] nt = 1 for alpha in alpha_list: tree = dt.create_decision_tree(data, schema, alpha, -1, True, obj_prob, z_beta) pred_new = dt.apply_rules(data, schema, tree) pred_added = pred + pred_new cov_new = float(np.sum(pred_added > 0))/len(data) cov_orig = float(np.sum(pred > 0))/len(data) if cov_new > cov_orig: pred = pred_added output.append([nt,cov_new]) nt = nt + 1 print nt, cov_new return output
def correlation(schema, train, test): pred_a = np.zeros(len(test)) pred_c = np.zeros(len(test)) bag_cnt = 0 alpha_idx = 0 corr_c45 = [] corr_beat = [] # bagging base_data = sampling(train, "None") # base tree tree = dt.create_decision_tree(base_data, schema, 1.0, DEPTH) base_pred = dt.apply_rules(test, schema, tree) base_pred = base_pred - np.mean(base_pred) base_cov = np.sqrt(np.sum(base_pred * base_pred)) for ii in range(10): newdata = sampling(train, "Normal") tree = dt.create_decision_tree(newdata, schema, 1.0, DEPTH) pred = dt.apply_rules(test, schema, tree) pred_c = pred - np.mean(pred) cov_c = np.sqrt(np.sum(pred_c*pred_c)) corr_c45.append(np.dot(base_pred,pred_c)/base_cov/cov_c) # alpha variation pred_down, alpha_cnt_down = alpha_variation(schema, newdata, test, tree, False) pred_up, alpha_cnt_up = alpha_variation(schema, newdata, test, tree, True) if alpha_cnt_down > 0: pred_a = pred_down pred_a = pred_a - np.mean(pred_a) cov_a = np.sqrt(np.sum(pred_a*pred_a)) corr_beat.append(np.dot(base_pred,pred_a)/base_cov/cov_a) if alpha_cnt_up > 0: pred_a = pred_up pred_a = pred_a - np.mean(pred_a) cov_a = np.sqrt(np.sum(pred_a*pred_a)) corr_beat.append(np.dot(base_pred,pred_a)/base_cov/cov_a) if alpha_cnt_up==0 and alpha_cnt_down==0: corr_beat.append(np.dot(base_pred,pred_c/base_cov/cov_c)) return corr_beat, corr_c45
def leat(schema, train, test, s_option, nt, lift, z_beta): cov_c45 = 0.0 cov_leat = 0.0 data = np.vstack((train,test)) base_prob = dt.laplace_smoothing(data) obj_prob = lift * base_prob pred_a = np.zeros(len(data)) pred_c = np.zeros(len(data)) bag_cnt = 0 alpha_list = [-1.0, -0.75, -0.5,-0.25, 0.0, 0.25, 0.5, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0] while True: # bagging newdata = sampling(data, s_option) # base tree tree = dt.create_decision_tree(newdata, schema, 1.0, -1, True, obj_prob, z_beta) pred = dt.apply_rules(data, schema, tree) pred_c = pred_c + pred pred_a = pred_a + pred for alpha in alpha_list: tree = dt.create_decision_tree(newdata, schema, alpha, -1, True, obj_prob, z_beta) pred = dt.apply_rules(data, schema, tree) pred_a = pred_a + pred bag_cnt = bag_cnt + 1 if bag_cnt > nt: break cov_c45 = float(np.sum(pred_c > 0))/len(data) cov_leat = float(np.sum(pred_a > 0))/len(data) return cov_c45, cov_leat
def alpha_variation(schema, train, test, base_tree, direction): alpha_cnt = 0 pred = np.zeros(len(test)) alpha = 1.0 alpha_tree = base_tree while True: alpha = select_alpha(alpha_tree, train, schema, alpha, direction) if alpha != 1.0: alpha_tree = dt.create_decision_tree(train, schema, alpha, DEPTH) pred = pred + dt.apply_rules(test, schema, alpha_tree) alpha_cnt = alpha_cnt + 1 else: break return pred, alpha_cnt