示例#1
0
def one_fold(schema, train, test, s_option, nt):
   
    pred_a = np.zeros(len(test))
    pred_c = np.zeros(len(test))
    bag_cnt = 0 
    alpha_idx = 0
    
    while True:
        # bagging   
        newdata = sampling(train, s_option) 
        # base tree 
        tree = dt.create_decision_tree(newdata, schema, 1.0, DEPTH)
        pred = dt.apply_rules(test, schema, tree)
        pred_c = pred_c + pred
        
        # alpha variation 
        pred_down, alpha_cnt_down = alpha_variation(schema, newdata, test, 
                                                        tree, False)
        pred_up, alpha_cnt_up = alpha_variation(schema, newdata, test, 
                                                        tree, True)
        pred_a = pred_a + ((pred_down + pred_up + pred) / 
                        (alpha_cnt_down + alpha_cnt_up + 1.0))
        alpha_idx = alpha_idx + alpha_cnt_down + alpha_cnt_up 
        bag_cnt = bag_cnt + 1
        if bag_cnt > nt:
            break     
    
    pred_a = pred_a / (bag_cnt)
    pred_c = pred_c / (bag_cnt)
    
    label = test[:,-1]
    roc_a =  st.auc(pred_a, label) 
    roc_c =  st.auc(pred_c, label) 
    
    return roc_a, roc_c, (roc_a/roc_c), (float(alpha_idx)/nt)
示例#2
0
def leat_ai_raw(schema, train, test, s_option, nt, lift, z_beta):
   
   
    cov_c45 = 0.0
    cov_leat = 0.0

    data = np.vstack((train,test))
    base_prob = dt.laplace_smoothing(data)
    obj_prob = lift * base_prob    
    
    pred = np.zeros(len(data))
 
    alpha_list = [-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0]
    # base tree 
       
    output = [] 
    nt = 1
    for alpha in alpha_list:
        tree = dt.create_decision_tree(data, schema, alpha, -1, 
                                            True, obj_prob, z_beta)
        pred_new = dt.apply_rules(data, schema, tree)
        pred_added = pred + pred_new
          
        cov_new = float(np.sum(pred_added > 0))/len(data)
        cov_orig = float(np.sum(pred > 0))/len(data)
        
        if cov_new > cov_orig:
            pred = pred_added         
            output.append([nt,cov_new])
            nt = nt + 1
            print nt, cov_new
 
    return output
示例#3
0
def correlation(schema, train, test):
   
    pred_a = np.zeros(len(test))
    pred_c = np.zeros(len(test))
    bag_cnt = 0 
    alpha_idx = 0

    corr_c45 = []
    corr_beat = []
    
    # bagging   
    base_data = sampling(train, "None") 
    # base tree 
    tree = dt.create_decision_tree(base_data, schema, 1.0, DEPTH)
    base_pred = dt.apply_rules(test, schema, tree)
    base_pred = base_pred - np.mean(base_pred)
    base_cov = np.sqrt(np.sum(base_pred * base_pred))

    for ii in range(10): 
        newdata = sampling(train, "Normal") 
        tree = dt.create_decision_tree(newdata, schema, 1.0, DEPTH)
        pred = dt.apply_rules(test, schema, tree)
        
        pred_c = pred - np.mean(pred)
        cov_c = np.sqrt(np.sum(pred_c*pred_c))
        corr_c45.append(np.dot(base_pred,pred_c)/base_cov/cov_c)
        # alpha variation 
        pred_down, alpha_cnt_down = alpha_variation(schema, newdata, test, 
                                                        tree, False)
        pred_up, alpha_cnt_up = alpha_variation(schema, newdata, test, 
                                                        tree, True)
        if alpha_cnt_down > 0: 
            pred_a = pred_down 
            pred_a = pred_a - np.mean(pred_a)
            cov_a = np.sqrt(np.sum(pred_a*pred_a))
            corr_beat.append(np.dot(base_pred,pred_a)/base_cov/cov_a)
        if alpha_cnt_up > 0: 
            pred_a = pred_up
            pred_a = pred_a - np.mean(pred_a)
            cov_a = np.sqrt(np.sum(pred_a*pred_a))
            corr_beat.append(np.dot(base_pred,pred_a)/base_cov/cov_a)
        if alpha_cnt_up==0 and alpha_cnt_down==0:
            corr_beat.append(np.dot(base_pred,pred_c/base_cov/cov_c))
                 
    return corr_beat, corr_c45
示例#4
0
def leat(schema, train, test, s_option, nt, lift, z_beta):
   
   
    cov_c45 = 0.0
    cov_leat = 0.0

    data = np.vstack((train,test))
    base_prob = dt.laplace_smoothing(data)
    obj_prob = lift * base_prob    
    
    pred_a = np.zeros(len(data))
    pred_c = np.zeros(len(data))
 
    bag_cnt = 0 
    alpha_list = [-1.0, -0.75, -0.5,-0.25, 0.0, 0.25, 0.5, 1.5, 1.75, 2.0,2.25, 2.5, 2.75, 3.0]
    while True:
        # bagging   
        newdata = sampling(data, s_option) 
        # base tree 
        tree = dt.create_decision_tree(newdata, schema, 1.0, -1, 
                                            True, obj_prob, z_beta)
        pred = dt.apply_rules(data, schema, tree)
        pred_c = pred_c + pred
        pred_a = pred_a + pred
        
        for alpha in alpha_list:
            tree = dt.create_decision_tree(newdata, schema, alpha, -1, 
                                            True, obj_prob, z_beta)
            pred = dt.apply_rules(data, schema, tree)
            pred_a = pred_a + pred 
        
        bag_cnt = bag_cnt + 1
        if bag_cnt > nt:
            break     
    
    cov_c45 = float(np.sum(pred_c > 0))/len(data)
    cov_leat = float(np.sum(pred_a > 0))/len(data)
    
    return cov_c45, cov_leat
示例#5
0
def alpha_variation(schema, train, test, base_tree, direction):
    
    alpha_cnt = 0
    pred = np.zeros(len(test))
    alpha = 1.0
    alpha_tree = base_tree
    while True:
        alpha = select_alpha(alpha_tree, train, schema, alpha, direction)
        if alpha != 1.0:
            alpha_tree = dt.create_decision_tree(train, schema, alpha, DEPTH)
            pred = pred + dt.apply_rules(test, schema, alpha_tree)
            alpha_cnt = alpha_cnt + 1
        else:
            break
    return pred, alpha_cnt