def nn_feature_selection_wrap(training_feature,training_label,alpha):
    
    Result_List = CalParList (3,"alpha","feature_0","feature_1")
    
    skf = StratifiedKFold(n_splits=10,shuffle=True)
    skf.get_n_splits(training_feature,training_label['label'])
    
    feature_avaliable = ['feature0','feature1','feature2','feature3','feature4']
    feature_choice=list(itertools.combinations(feature_avaliable ,2))
   
    for i in range(len(feature_choice)):
        
        Cal_Result_List = CalList ()
        for train_index, test_index in skf.split(training_feature,training_label['label']):
            X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index]
            y_train, y_val = training_label.loc[train_index], training_label.loc[test_index]
            X_train=X_train.loc[:,[feature_choice[i][0],feature_choice[i][1]]]
            X_val=X_val.loc[:,[feature_choice[i][0],feature_choice[i][1]]]
            dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train)
            y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val)
            Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label'])           
            Cal_Result_List.list_append(Precall,f1_score,BER,FPR)

        Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal()
        Result_List.list_append (Precall,f1_score,BER,FPR,alpha,feature_choice[i][0],feature_choice[i][1])
    
    result = Result_List.return_result()
    return result
Пример #2
0
def nn_validation(X_train, y_train, X_val, y_val, alpha):
    dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train)
    y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val)
    Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label'])
    print("TPR:" + str(Precall) + "   f1 score:" + str(f1_score) + "   FPR:" +
          str(FPR) + "   BER:" + str(BER))
    return y_pred_temp
Пример #3
0
def kNN_k_parameter_adjust (X_train,y_train,X_val,y_val):
    start=time.time()
    k_value=5
    K_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    time_list=[]
    while (k_value<300):
        start1=time.time()
        neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance')
        neigh.fit(X_train, y_train['label'])
        y_pred = neigh.predict(X_val)
        y_pred = {"label_pred":y_pred}
        y_pred = pd.DataFrame(data=y_pred)
        Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label']) 
        K_list.append(k_value)
        tpr_list.append(Precall)
        fpr_list.append(FPR)
        BER_list.append(BER)
        f1_score_list.append(f1_score)
        temp=(time.time()-start1)/60
        time_list.append(temp)
        #print("current k_value:%d ,fit time:%5.1fminute"%(k_value,(time.time()-start1)/60))
        if (k_value<=20):
            k_value=k_value+1
        else:
            k_value=k_value+3
        #print("                 ")
    print("the total executing time:%5.1fminute"%((time.time()-start)/60))
    result = {"k_value":K_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list}
    columns = ["k_value","f1_score","TPR","FPR","BER","time"]
    result = pd.DataFrame (data=result,columns=columns)
    return result
Пример #4
0
def nn_predict_with_distance_adjust_presion(X_train, y_train, X_val, y_val,
                                            alpha_lower_bound,
                                            alpha_higher_bound):
    dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train)
    alpha = alpha_lower_bound
    alpha_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    while (alpha <= alpha_higher_bound):
        #print ("current alpha:"+str(alpha))
        y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val)
        Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label'])
        alpha_list.append(alpha)
        tpr_list.append(Precall)
        fpr_list.append(FPR)
        BER_list.append(BER)
        f1_score_list.append(f1_score)
        alpha = alpha + 0.001
    result = {
        "alpha": alpha_list,
        "TPR": tpr_list,
        "FPR": fpr_list,
        "f1_score": f1_score_list,
        "BER": BER_list
    }
    columns = ["alpha", "f1_score", "TPR", "FPR", "BER"]
    result = pd.DataFrame(data=result, columns=columns)
    return result
Пример #5
0
def best_result_choosen(dis_1, dis_2, X_val, y_val):
    alpha = 0.47
    alpha_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    while (alpha <= 0.57):
        #print ("current alpha:"+str(alpha))
        y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val)
        Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label'])
        alpha_list.append(alpha)
        tpr_list.append(Precall)
        fpr_list.append(FPR)
        BER_list.append(BER)
        f1_score_list.append(f1_score)
        alpha = alpha + 0.002

    max_f1_score_index = f1_score_list.index(max(f1_score_list))
    result = {
        "alpha": alpha_list,
        "TPR": tpr_list,
        "FPR": fpr_list,
        "f1_score": f1_score_list,
        "BER": BER_list
    }
    columns = ["alpha", "f1_score", "TPR", "FPR", "BER"]
    result = pd.DataFrame(data=result, columns=columns)
    print(result.loc[result['f1_score'].idxmax()])
    return 0
Пример #6
0
def SVM_base_fuction (X_train,y_train,X_val,y_val):
    svc_clf=SVC(kernel="rbf",degree=len(X_train))
    svc_clf.fit(X_train,y_train['label'])
    y_pred=svc_clf.predict(X_val)
    y_pred = {"label_pred":y_pred}
    y_pred = pd.DataFrame(data=y_pred)
    Precall,f1_score,BER,FPR= cal_score(y_pred,y_val['label'])
    return Precall,f1_score,BER,FPR,y_pred
Пример #7
0
def kNN_base_function(X_train, y_train, X_val, y_val, k_value):
    neigh = KNeighborsClassifier(n_neighbors=k_value,
                                 algorithm='auto',
                                 weights='distance')
    neigh.fit(X_train, y_train['label'])
    y_pred = neigh.predict(X_val)
    y_pred = {"label_pred": y_pred}
    y_pred = pd.DataFrame(data=y_pred)
    Precall, f1_score, BER, FPR = cal_score(y_pred, y_val['label'])
    return Precall, f1_score, BER, FPR, y_pred
Пример #8
0
def nn_predict_with_distance_adjust(training_feature,
                                    training_label):  #,#X_val,y_val):
    alpha_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    skf = StratifiedKFold(n_splits=10, shuffle=True)
    skf.get_n_splits(training_feature, training_label['label'])
    alpha = 0.10

    while (alpha <= 0.9):
        tpr_list_temp = []
        fpr_list_temp = []
        BER_list_temp = []
        f1_score_list_temp = []
        for train_index, test_index in skf.split(training_feature,
                                                 training_label['label']):
            X_train, X_val = training_feature.loc[
                train_index], training_feature.loc[test_index]
            y_train, y_val = training_label.loc[
                train_index], training_label.loc[test_index]

            dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train)

            #print ("current alpha:"+str(alpha))
            y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val)
            Precall, f1_score, BER, FPR = cal_score(y_pred_temp,
                                                    y_val['label'])
            tpr_list_temp.append(Precall)
            fpr_list_temp.append(FPR)
            BER_list_temp.append(BER)
            f1_score_list_temp.append(f1_score)

        alpha_list.append(alpha)
        tpr_list.append(sum(tpr_list_temp) / len(tpr_list_temp))
        fpr_list.append(sum(fpr_list_temp) / len(fpr_list_temp))
        BER_list.append(sum(BER_list_temp) / len(BER_list_temp))
        f1_score_list.append(sum(f1_score_list_temp) / len(f1_score_list_temp))
        if (0.4 <= alpha <= 0.6):
            alpha = alpha + 0.01
        else:
            alpha = alpha + 0.1
    result = {
        "alpha": alpha_list,
        "TPR": tpr_list,
        "FPR": fpr_list,
        "f1_score": f1_score_list,
        "BER": BER_list
    }
    columns = ["alpha", "f1_score", "TPR", "FPR", "BER"]
    result = pd.DataFrame(data=result, columns=columns)
    return result
Пример #9
0
def kNN_k_parameter_adjust_with_bisaes_data (X_train,y_train,X_val,y_val,data_ratio):
    start=time.time()
    k_value=5
    K_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    time_list=[]
    train_data = {'feature0':X_train['feature0'],'feature1':X_train['feature1'],'feature2':X_train['feature2'],'feature3':X_train['feature3'],'feature4':X_train['feature4'],'label':y_train['label']}
    train_data = pd.DataFrame(data=train_data)
    Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],int(8000*data_ratio))
    Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],8000)
    res = [Class1_sample, Class2_sample]
    train_com = pd.concat(res)
    #print(train_com)
    sample_label = pd.DataFrame(train_com['label'])
    sample_feature=train_com.drop(["label"],axis=1)
   
    while (k_value<300):
        
        start1=time.time()
        neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance')
        neigh.fit(sample_feature, sample_label['label'])
        y_pred = neigh.predict(X_val)
        y_pred = {"label_pred":y_pred}
        y_pred = pd.DataFrame(data=y_pred)
        Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label']) 
        K_list.append(k_value)
        tpr_list.append(Precall)
        fpr_list.append(FPR)
        BER_list.append(BER)
        f1_score_list.append(f1_score)
        temp=(time.time()-start1)/60
        time_list.append(temp)
        #print("current k_value:%d ,fit time:%5.1fminute"%(k_value,(time.time()-start1)/60))
        if (k_value<=20):
            k_value=k_value+1
        else:
            k_value=k_value+3
        #print("                 ")
    print("the total executing time:%5.1fminute"%((time.time()-start)/60))
    result = {"k_value":K_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list}
    columns = ["k_value","f1_score","TPR","FPR","BER","time"]
    result = pd.DataFrame (data=result,columns=columns)
    return result
Пример #10
0
def parameter_adjust_presion (X_train,y_train,X_val,y_val,sample_amount,data_ratio):
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    time_list=[]
    gamma_exp_list=[]
    C_exp_list=[]
    label_1_amount = int(sample_amount * (data_ratio/(data_ratio+1)))
    label_2_amount = int(sample_amount-label_1_amount)
    C_exp = -5
    while (C_exp <3):
        gamma_exp = -C_exp-12
        start1=time.time();
        train_data = pd.concat([X_train,y_train['label']],axis=1,join='outer')
        train_data = pd.DataFrame(train_data)
        Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],label_1_amount)
        Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],label_2_amount)
        res = [Class1_sample, Class2_sample]
        train_com = pd.concat(res)
        sample_label = pd.DataFrame(train_com['label'])
        sample_feature=train_com.drop(["label"],axis=1)

        svc_clf=SVC(kernel="rbf",degree=len(sample_feature),gamma=2**gamma_exp,C=2**C_exp)
        svc_clf.fit(sample_feature,sample_label['label'])
        y_pred=svc_clf.predict(X_val)
        y_pred = {"label_pred":y_pred}
        y_pred = pd.DataFrame(data=y_pred)
        #print ("curren gamma_exp:"+str(gamma_exp))
        #print ("current C_exp:"+str(C_exp))
        Precall,f1_score,BER,FPR= cal_score(y_pred,y_val['label'])
        temp=(time.time()-start1)/60
        time_list.append(temp)
        tpr_list.append(Precall)
        f1_score_list.append(f1_score)
        BER_list.append(BER)
        fpr_list.append(FPR)
        gamma_exp_list.append(gamma_exp)
        C_exp_list.append(C_exp)
        #print("fit time:%5.1fminute"%(temp))
    
    result = {"gamma_exp":gamma_exp_list,"C_exp":C_exp_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list}
    columns = ["gamma_exp","C_exp","f1_score","TPR","FPR","BER","time"]
    result = pd.DataFrame (data=result,columns=columns)
    return result
def NN_cross_validation(training_feature,training_label,alpha):
    skf = StratifiedKFold(n_splits=10,shuffle=True)
    skf.get_n_splits(training_feature,training_label['label'])
    Cal_Result_List = CalList ()
    skf = StratifiedKFold(n_splits=10,shuffle=True)
    skf.get_n_splits(training_feature,training_label['label'])
   
    for train_index, test_index in skf.split(training_feature,training_label['label']):

        X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index]
        y_train, y_val = training_label.loc[train_index], training_label.loc[test_index]
        
        dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train)
        y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val)
        Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label'])
        Cal_Result_List.list_append(Precall,f1_score,BER,FPR)
        
    Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal()
    
    return Precall,FPR,BER,f1_score
Пример #12
0
def nn_feature_selection_wrap(training_feature, training_label, alpha):
    feature_0_list = []
    feature_1_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    alpha_list = []

    skf = StratifiedKFold(n_splits=10, shuffle=True)
    skf.get_n_splits(training_feature, training_label['label'])

    feature_avaliable = [
        'feature0', 'feature1', 'feature2', 'feature3', 'feature4'
    ]
    feature_choice = list(itertools.combinations(feature_avaliable, 2))

    for i in range(len(feature_choice)):
        tpr_list_temp = []
        fpr_list_temp = []
        BER_list_temp = []
        f1_score_list_temp = []
        for train_index, test_index in skf.split(training_feature,
                                                 training_label['label']):
            X_train, X_val = training_feature.loc[
                train_index], training_feature.loc[test_index]
            y_train, y_val = training_label.loc[
                train_index], training_label.loc[test_index]
            X_train = X_train.loc[:,
                                  [feature_choice[i][0], feature_choice[i][1]]]
            X_val = X_val.loc[:, [feature_choice[i][0], feature_choice[i][1]]]
            dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train)
            y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val)
            Precall, f1_score, BER, FPR = cal_score(y_pred_temp,
                                                    y_val['label'])
            tpr_list_temp.append(Precall)
            fpr_list_temp.append(FPR)
            BER_list_temp.append(BER)
            f1_score_list_temp.append(f1_score)

        alpha_list.append(alpha)
        feature_0_list.append(feature_choice[i][0])
        feature_1_list.append(feature_choice[i][1])
        tpr_list.append(sum(tpr_list_temp) / len(tpr_list_temp))
        fpr_list.append(sum(fpr_list_temp) / len(fpr_list_temp))
        BER_list.append(sum(BER_list_temp) / len(BER_list_temp))
        f1_score_list.append(sum(f1_score_list_temp) / len(f1_score_list_temp))
        alpha = alpha + 0.001

    result = {
        "alpha": alpha_list,
        "feature_0": feature_0_list,
        "feature_1": feature_1_list,
        "TPR": tpr_list,
        "FPR": fpr_list,
        "f1_score": f1_score_list,
        "BER": BER_list
    }
    columns = [
        "alpha", "feature_0", "feature_1", "f1_score", "TPR", "FPR", "BER"
    ]
    result = pd.DataFrame(data=result, columns=columns)
    return result
Пример #13
0
def kNN_data_ratio_adjust (X_train,y_train,X_val,y_val,k_value):
    start=time.time()
    label_1_amount = 40000
    label_1_amount_list = []
    label_2_amount_list = []
    ratio_list = []
    tpr_list = []
    fpr_list = []
    BER_list = []
    f1_score_list = []
    time_list=[]
    while label_1_amount > 9000:
        count=0
        tpr_list_temp = []
        fpr_list_temp = []
        BER_list_temp = []
        f1_score_list_temp = []
        time_list_temp=[]
        while (count<3):
            train_data = {'feature0':X_train['feature0'],'feature1':X_train['feature1'],'feature2':X_train['feature2'],'feature3':X_train['feature3'],'feature4':X_train['feature4'],'label':y_train['label']}
            train_data = pd.DataFrame(data=train_data)
            Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],label_1_amount)
            Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],20000)
            res = [Class1_sample, Class2_sample]
            train_com = pd.concat(res)
            #print(train_com)
            sample_label = pd.DataFrame(train_com['label'])
            sample_feature=train_com.drop(["label"],axis=1)
            start1=time.time()

            neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance')
            neigh.fit(sample_feature, sample_label['label'])
            y_pred = neigh.predict(X_val)
            y_pred = {"label_pred":y_pred}
            y_pred = pd.DataFrame(data=y_pred)

            Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label'])           
            tpr_list_temp.append(Precall)
            fpr_list_temp.append(FPR)
            BER_list_temp.append(BER)
            f1_score_list_temp.append(f1_score)
            temp=(time.time()-start1)/60
            time_list_temp.append(temp)
            count=count+1
        
        label_1_amount_list.append(label_1_amount)
        label_2_amount_list.append(20000)
        ratio_list.append(label_1_amount/20000)
        tpr_list.append(sum(tpr_list_temp)/len(tpr_list_temp))
        fpr_list.append(sum(fpr_list_temp)/len(fpr_list_temp))
        BER_list.append(sum(BER_list_temp)/len(BER_list_temp))
        f1_score_list.append(sum(f1_score_list_temp)/len(f1_score_list_temp))
        time_list.append(sum(time_list_temp)/len(time_list_temp))
        #print("current data labe 1 size:%d ,fit time:%5.1fminute"%(t,(time.time()-start1)/60))
        label_1_amount = label_1_amount-2000
    
    print("the total executing time:%5.1fminute"%((time.time()-start)/60))       
    result = {"label_1_amount":label_1_amount_list,"label_2_amount":label_2_amount_list,"label 1: label 2 ratio":ratio_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list}
    columns = ["label_1_amount","label_2_amount","label 1: label 2 ratio","f1_score","TPR","FPR","BER","time"]
    result = pd.DataFrame (data=result,columns=columns)
    return result