def nn_feature_selection_wrap(training_feature,training_label,alpha):
    
    Result_List = CalParList (3,"alpha","feature_0","feature_1")
    
    skf = StratifiedKFold(n_splits=10,shuffle=True)
    skf.get_n_splits(training_feature,training_label['label'])
    
    feature_avaliable = ['feature0','feature1','feature2','feature3','feature4']
    feature_choice=list(itertools.combinations(feature_avaliable ,2))
   
    for i in range(len(feature_choice)):
        
        Cal_Result_List = CalList ()
        for train_index, test_index in skf.split(training_feature,training_label['label']):
            X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index]
            y_train, y_val = training_label.loc[train_index], training_label.loc[test_index]
            X_train=X_train.loc[:,[feature_choice[i][0],feature_choice[i][1]]]
            X_val=X_val.loc[:,[feature_choice[i][0],feature_choice[i][1]]]
            dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train)
            y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val)
            Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label'])           
            Cal_Result_List.list_append(Precall,f1_score,BER,FPR)

        Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal()
        Result_List.list_append (Precall,f1_score,BER,FPR,alpha,feature_choice[i][0],feature_choice[i][1])
    
    result = Result_List.return_result()
    return result
Пример #2
0
def parameter_adjust(X_train, y_train, sample_amount, data_ratio):
    start = time.time()
    label_1_amount = int(sample_amount * (data_ratio / (data_ratio + 1)))
    label_2_amount = int(sample_amount - label_1_amount)
    Result_List = CalParList(3, "gamma_exp", "C_exp", "time")

    for gamma_exp in [-15, -13, -11, -9, -7, -5, -3, -1, 1, 3]:
        for C_exp in [-5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15]:
            start1 = time.time()

            sample_feature, sample_label = choose_data_seperately(
                X_train, y_train, label_1_amount, label_2_amount)

            tpr, fpr, BER, f1_score, time_var = SVM_cross_validation(
                sample_feature, sample_label, 2**C_exp, 2**gamma_exp)

            Result_List.list_append(tpr, f1_score, BER, fpr, gamma_exp, C_exp,
                                    time_var)

            #print("fit time:%5.1fminute"%(temp))

    print("the total executing time:%5.1fminute" %
          ((time.time() - start) / 60))
    result = Result_List.return_result()
    return result
def nn_predict_with_distance_adjust_presion (training_feature,training_label,alpha_lower_bound,alpha_higher_bound):
    #dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train)
    alpha =alpha_lower_bound
    Result_List = CalParList (1,"alpha")
    
    while(alpha<=alpha_higher_bound):
        
        Precall,FPR,BER,f1_score = NN_cross_validation(training_feature,training_label,alpha)
        
        Result_List.list_append (Precall,f1_score,BER,FPR,alpha)

        alpha=alpha+0.001
    
    result = Result_List.return_result()
    return result
Пример #4
0
def kNN_data_ratio_adjust(training_feature, training_label, k_value):
    start = time.time()
    label_1_amount = 40000
    label_2_amount = 10000

    Result_List = CalParList(4, "label_1_amount", "label_2_amount", "ratio",
                             "time")
    iter_amount = 5
    train_data = pd.concat([training_feature, training_label['label']],
                           axis=1,
                           join='outer')

    while (label_1_amount > 2000):
        count = 0
        Cal_Result_List = CalList()
        time_list_temp = []

        while (count < iter_amount):
            start1 = time.time()

            sample_feature, sample_label = choose_data_seperately(
                training_feature, training_label, label_1_amount,
                label_2_amount)

            Precall, FPR, BER, f1_score, time_var = kNN_cross_validation(
                sample_feature, sample_label, k_value)
            Cal_Result_List.list_append(Precall, f1_score, BER, FPR)
            time_list_temp.append(time_var)
            count = count + 1

        Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal()
        time_ave = sum(time_list_temp) / len(time_list_temp)
        Result_List.list_append(Precall, f1_score, BER, FPR, label_1_amount,
                                label_2_amount,
                                label_1_amount / label_2_amount, time_ave)

        #print("current data labe 1 size:%d ,fit time:%5.1fminute"%(t,(time.time()-start1)/60))
        if (label_1_amount > 10000):
            label_1_amount = label_1_amount - 5000
        else:
            label_1_amount = label_1_amount - 2500

    print("the total executing time:%5.1fminute" %
          ((time.time() - start) / 60))
    result = Result_List.return_result()
    return result
def nn_predict_with_distance_adjust (training_feature,training_label):#,#X_val,y_val):
    
    Result_List = CalParList (1,"alpha")
    alpha = 0.10
    
    while(alpha<=0.9):
        
        Precall,FPR,BER,f1_score = NN_cross_validation(training_feature,training_label,alpha)
        
        Result_List.list_append (Precall,f1_score,BER,FPR,alpha)

        if (0.4<=alpha<=0.6):
            alpha=alpha+0.01
        else:
            alpha=alpha+0.05        
    
    result = Result_List.return_result()
    return result
Пример #6
0
def sample_amount_choice(X_train, y_train, sample_amount_upper_bound):
    start = time.time()
    Result_List = CalParList(2, "sample_amount", "time")
    sample_amount = sample_amount_upper_bound
    iter_amount = 3

    while (sample_amount > 800):
        count = 0
        Cal_Result_List = CalList()
        time_list_temp = []

        while (count < iter_amount):
            print("current sample amount:%d" % sample_amount)

            sample_feature, sample_label = choose_data_together(
                X_train, y_train, sample_amount)

            tpr, fpr, BER, f1_score, time_var = SVM_cross_validation(
                sample_feature, sample_label, 1, 'auto')

            Cal_Result_List.list_append(tpr, f1_score, BER, fpr)
            time_list_temp.append(time_var)
            count = count + 1

        Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal()
        time_ave = sum(time_list_temp) / len(time_list_temp)
        Result_List.list_append(Precall, f1_score, BER, FPR, sample_amount,
                                time_ave)

        if (sample_amount > 10000):
            sample_amount = int(sample_amount / 2)
        elif (sample_amount <= 2000):
            sample_amount = sample_amount - 100
            iter_amount = 10
        else:
            sample_amount = sample_amount - 2000
            iter_amount = 10

    print("the total executing time:%5.1fminute" %
          ((time.time() - start) / 60))
    result = Result_List.return_result()
    return result
Пример #7
0
def SVC_data_ratio_adjust(X_train, y_train, sample_amount):
    start = time.time()
    Result_List = CalParList(4, "label_1_amount", "label_2_amount", "ratio",
                             "time")
    iter_amount = 5
    data_ratio = 4

    while (data_ratio > 0.2):
        count = 0
        Cal_Result_List = CalList()
        time_list_temp = []
        label_1_amount = int(sample_amount * (data_ratio / (data_ratio + 1)))
        label_2_amount = int(sample_amount - label_1_amount)
        while (count < iter_amount):
            start1 = time.time()

            sample_feature, sample_label = choose_data_seperately(
                X_train, y_train, label_1_amount, label_2_amount)

            tpr, fpr, BER, f1_score, time_var = SVM_cross_validation(
                sample_feature, sample_label, 1, 'auto')
            time_list_temp.append(time_var)
            Cal_Result_List.list_append(tpr, f1_score, BER, fpr)
            count = count + 1

        Precall, FPR, BER, f1_score = Cal_Result_List.list_average_cal()
        time_ave = sum(time_list_temp) / len(time_list_temp)
        Result_List.list_append(Precall, f1_score, BER, FPR, label_1_amount,
                                label_2_amount,
                                label_1_amount / label_2_amount, time_ave)

        if (data_ratio > 2):
            data_ratio = data_ratio / 2
        elif (data_ratio < 0.8):
            data_ratio = data_ratio - 0.25
        else:
            data_ratio = data_ratio - 0.1

    print("the total executing time:%5.1fminute" %
          ((time.time() - start) / 60))
    result = Result_List.return_result()
    return result
Пример #8
0
def kNN_k_parameter_adjust(training_feature, training_label):
    #start=time.time()
    k_value = 5
    Result_List = CalParList(2, "k_value", "time")

    while (k_value < 10):

        Precall, FPR, BER, f1_score, time_var = kNN_cross_validation(
            training_feature, training_label, k_value)

        Result_List.list_append(Precall, f1_score, BER, FPR, k_value, time_var)

        if (k_value < 27):
            k_value = k_value + 2
        else:
            k_value = k_value + 5

    #print("the total executing time:%5.1fminute"%((time.time()-start)/60))
    result = Result_List.return_result()
    return result