def nn_feature_selection_wrap(training_feature,training_label,alpha): Result_List = CalParList (3,"alpha","feature_0","feature_1") skf = StratifiedKFold(n_splits=10,shuffle=True) skf.get_n_splits(training_feature,training_label['label']) feature_avaliable = ['feature0','feature1','feature2','feature3','feature4'] feature_choice=list(itertools.combinations(feature_avaliable ,2)) for i in range(len(feature_choice)): Cal_Result_List = CalList () for train_index, test_index in skf.split(training_feature,training_label['label']): X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index] y_train, y_val = training_label.loc[train_index], training_label.loc[test_index] X_train=X_train.loc[:,[feature_choice[i][0],feature_choice[i][1]]] X_val=X_val.loc[:,[feature_choice[i][0],feature_choice[i][1]]] dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train) y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val) Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label']) Cal_Result_List.list_append(Precall,f1_score,BER,FPR) Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal() Result_List.list_append (Precall,f1_score,BER,FPR,alpha,feature_choice[i][0],feature_choice[i][1]) result = Result_List.return_result() return result
def nn_validation(X_train, y_train, X_val, y_val, alpha): dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train) y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val) Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label']) print("TPR:" + str(Precall) + " f1 score:" + str(f1_score) + " FPR:" + str(FPR) + " BER:" + str(BER)) return y_pred_temp
def kNN_k_parameter_adjust (X_train,y_train,X_val,y_val): start=time.time() k_value=5 K_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] time_list=[] while (k_value<300): start1=time.time() neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance') neigh.fit(X_train, y_train['label']) y_pred = neigh.predict(X_val) y_pred = {"label_pred":y_pred} y_pred = pd.DataFrame(data=y_pred) Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label']) K_list.append(k_value) tpr_list.append(Precall) fpr_list.append(FPR) BER_list.append(BER) f1_score_list.append(f1_score) temp=(time.time()-start1)/60 time_list.append(temp) #print("current k_value:%d ,fit time:%5.1fminute"%(k_value,(time.time()-start1)/60)) if (k_value<=20): k_value=k_value+1 else: k_value=k_value+3 #print(" ") print("the total executing time:%5.1fminute"%((time.time()-start)/60)) result = {"k_value":K_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list} columns = ["k_value","f1_score","TPR","FPR","BER","time"] result = pd.DataFrame (data=result,columns=columns) return result
def nn_predict_with_distance_adjust_presion(X_train, y_train, X_val, y_val, alpha_lower_bound, alpha_higher_bound): dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train) alpha = alpha_lower_bound alpha_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] while (alpha <= alpha_higher_bound): #print ("current alpha:"+str(alpha)) y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val) Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label']) alpha_list.append(alpha) tpr_list.append(Precall) fpr_list.append(FPR) BER_list.append(BER) f1_score_list.append(f1_score) alpha = alpha + 0.001 result = { "alpha": alpha_list, "TPR": tpr_list, "FPR": fpr_list, "f1_score": f1_score_list, "BER": BER_list } columns = ["alpha", "f1_score", "TPR", "FPR", "BER"] result = pd.DataFrame(data=result, columns=columns) return result
def best_result_choosen(dis_1, dis_2, X_val, y_val): alpha = 0.47 alpha_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] while (alpha <= 0.57): #print ("current alpha:"+str(alpha)) y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val) Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label']) alpha_list.append(alpha) tpr_list.append(Precall) fpr_list.append(FPR) BER_list.append(BER) f1_score_list.append(f1_score) alpha = alpha + 0.002 max_f1_score_index = f1_score_list.index(max(f1_score_list)) result = { "alpha": alpha_list, "TPR": tpr_list, "FPR": fpr_list, "f1_score": f1_score_list, "BER": BER_list } columns = ["alpha", "f1_score", "TPR", "FPR", "BER"] result = pd.DataFrame(data=result, columns=columns) print(result.loc[result['f1_score'].idxmax()]) return 0
def SVM_base_fuction (X_train,y_train,X_val,y_val): svc_clf=SVC(kernel="rbf",degree=len(X_train)) svc_clf.fit(X_train,y_train['label']) y_pred=svc_clf.predict(X_val) y_pred = {"label_pred":y_pred} y_pred = pd.DataFrame(data=y_pred) Precall,f1_score,BER,FPR= cal_score(y_pred,y_val['label']) return Precall,f1_score,BER,FPR,y_pred
def kNN_base_function(X_train, y_train, X_val, y_val, k_value): neigh = KNeighborsClassifier(n_neighbors=k_value, algorithm='auto', weights='distance') neigh.fit(X_train, y_train['label']) y_pred = neigh.predict(X_val) y_pred = {"label_pred": y_pred} y_pred = pd.DataFrame(data=y_pred) Precall, f1_score, BER, FPR = cal_score(y_pred, y_val['label']) return Precall, f1_score, BER, FPR, y_pred
def nn_predict_with_distance_adjust(training_feature, training_label): #,#X_val,y_val): alpha_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] skf = StratifiedKFold(n_splits=10, shuffle=True) skf.get_n_splits(training_feature, training_label['label']) alpha = 0.10 while (alpha <= 0.9): tpr_list_temp = [] fpr_list_temp = [] BER_list_temp = [] f1_score_list_temp = [] for train_index, test_index in skf.split(training_feature, training_label['label']): X_train, X_val = training_feature.loc[ train_index], training_feature.loc[test_index] y_train, y_val = training_label.loc[ train_index], training_label.loc[test_index] dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train) #print ("current alpha:"+str(alpha)) y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val) Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label']) tpr_list_temp.append(Precall) fpr_list_temp.append(FPR) BER_list_temp.append(BER) f1_score_list_temp.append(f1_score) alpha_list.append(alpha) tpr_list.append(sum(tpr_list_temp) / len(tpr_list_temp)) fpr_list.append(sum(fpr_list_temp) / len(fpr_list_temp)) BER_list.append(sum(BER_list_temp) / len(BER_list_temp)) f1_score_list.append(sum(f1_score_list_temp) / len(f1_score_list_temp)) if (0.4 <= alpha <= 0.6): alpha = alpha + 0.01 else: alpha = alpha + 0.1 result = { "alpha": alpha_list, "TPR": tpr_list, "FPR": fpr_list, "f1_score": f1_score_list, "BER": BER_list } columns = ["alpha", "f1_score", "TPR", "FPR", "BER"] result = pd.DataFrame(data=result, columns=columns) return result
def kNN_k_parameter_adjust_with_bisaes_data (X_train,y_train,X_val,y_val,data_ratio): start=time.time() k_value=5 K_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] time_list=[] train_data = {'feature0':X_train['feature0'],'feature1':X_train['feature1'],'feature2':X_train['feature2'],'feature3':X_train['feature3'],'feature4':X_train['feature4'],'label':y_train['label']} train_data = pd.DataFrame(data=train_data) Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],int(8000*data_ratio)) Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],8000) res = [Class1_sample, Class2_sample] train_com = pd.concat(res) #print(train_com) sample_label = pd.DataFrame(train_com['label']) sample_feature=train_com.drop(["label"],axis=1) while (k_value<300): start1=time.time() neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance') neigh.fit(sample_feature, sample_label['label']) y_pred = neigh.predict(X_val) y_pred = {"label_pred":y_pred} y_pred = pd.DataFrame(data=y_pred) Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label']) K_list.append(k_value) tpr_list.append(Precall) fpr_list.append(FPR) BER_list.append(BER) f1_score_list.append(f1_score) temp=(time.time()-start1)/60 time_list.append(temp) #print("current k_value:%d ,fit time:%5.1fminute"%(k_value,(time.time()-start1)/60)) if (k_value<=20): k_value=k_value+1 else: k_value=k_value+3 #print(" ") print("the total executing time:%5.1fminute"%((time.time()-start)/60)) result = {"k_value":K_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list} columns = ["k_value","f1_score","TPR","FPR","BER","time"] result = pd.DataFrame (data=result,columns=columns) return result
def parameter_adjust_presion (X_train,y_train,X_val,y_val,sample_amount,data_ratio): tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] time_list=[] gamma_exp_list=[] C_exp_list=[] label_1_amount = int(sample_amount * (data_ratio/(data_ratio+1))) label_2_amount = int(sample_amount-label_1_amount) C_exp = -5 while (C_exp <3): gamma_exp = -C_exp-12 start1=time.time(); train_data = pd.concat([X_train,y_train['label']],axis=1,join='outer') train_data = pd.DataFrame(train_data) Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],label_1_amount) Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],label_2_amount) res = [Class1_sample, Class2_sample] train_com = pd.concat(res) sample_label = pd.DataFrame(train_com['label']) sample_feature=train_com.drop(["label"],axis=1) svc_clf=SVC(kernel="rbf",degree=len(sample_feature),gamma=2**gamma_exp,C=2**C_exp) svc_clf.fit(sample_feature,sample_label['label']) y_pred=svc_clf.predict(X_val) y_pred = {"label_pred":y_pred} y_pred = pd.DataFrame(data=y_pred) #print ("curren gamma_exp:"+str(gamma_exp)) #print ("current C_exp:"+str(C_exp)) Precall,f1_score,BER,FPR= cal_score(y_pred,y_val['label']) temp=(time.time()-start1)/60 time_list.append(temp) tpr_list.append(Precall) f1_score_list.append(f1_score) BER_list.append(BER) fpr_list.append(FPR) gamma_exp_list.append(gamma_exp) C_exp_list.append(C_exp) #print("fit time:%5.1fminute"%(temp)) result = {"gamma_exp":gamma_exp_list,"C_exp":C_exp_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list} columns = ["gamma_exp","C_exp","f1_score","TPR","FPR","BER","time"] result = pd.DataFrame (data=result,columns=columns) return result
def NN_cross_validation(training_feature,training_label,alpha): skf = StratifiedKFold(n_splits=10,shuffle=True) skf.get_n_splits(training_feature,training_label['label']) Cal_Result_List = CalList () skf = StratifiedKFold(n_splits=10,shuffle=True) skf.get_n_splits(training_feature,training_label['label']) for train_index, test_index in skf.split(training_feature,training_label['label']): X_train, X_val = training_feature.loc[train_index], training_feature.loc[test_index] y_train, y_val = training_label.loc[train_index], training_label.loc[test_index] dis_1,dis_2=nn_distance_calculate(X_val,X_train,y_train) y_pred_temp=nn_predict(dis_1,dis_2,alpha,X_val) Precall,f1_score,BER,FPR = cal_score (y_pred_temp,y_val['label']) Cal_Result_List.list_append(Precall,f1_score,BER,FPR) Precall,FPR,BER,f1_score = Cal_Result_List.list_average_cal() return Precall,FPR,BER,f1_score
def nn_feature_selection_wrap(training_feature, training_label, alpha): feature_0_list = [] feature_1_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] alpha_list = [] skf = StratifiedKFold(n_splits=10, shuffle=True) skf.get_n_splits(training_feature, training_label['label']) feature_avaliable = [ 'feature0', 'feature1', 'feature2', 'feature3', 'feature4' ] feature_choice = list(itertools.combinations(feature_avaliable, 2)) for i in range(len(feature_choice)): tpr_list_temp = [] fpr_list_temp = [] BER_list_temp = [] f1_score_list_temp = [] for train_index, test_index in skf.split(training_feature, training_label['label']): X_train, X_val = training_feature.loc[ train_index], training_feature.loc[test_index] y_train, y_val = training_label.loc[ train_index], training_label.loc[test_index] X_train = X_train.loc[:, [feature_choice[i][0], feature_choice[i][1]]] X_val = X_val.loc[:, [feature_choice[i][0], feature_choice[i][1]]] dis_1, dis_2 = nn_distance_calculate(X_val, X_train, y_train) y_pred_temp = nn_predict(dis_1, dis_2, alpha, X_val) Precall, f1_score, BER, FPR = cal_score(y_pred_temp, y_val['label']) tpr_list_temp.append(Precall) fpr_list_temp.append(FPR) BER_list_temp.append(BER) f1_score_list_temp.append(f1_score) alpha_list.append(alpha) feature_0_list.append(feature_choice[i][0]) feature_1_list.append(feature_choice[i][1]) tpr_list.append(sum(tpr_list_temp) / len(tpr_list_temp)) fpr_list.append(sum(fpr_list_temp) / len(fpr_list_temp)) BER_list.append(sum(BER_list_temp) / len(BER_list_temp)) f1_score_list.append(sum(f1_score_list_temp) / len(f1_score_list_temp)) alpha = alpha + 0.001 result = { "alpha": alpha_list, "feature_0": feature_0_list, "feature_1": feature_1_list, "TPR": tpr_list, "FPR": fpr_list, "f1_score": f1_score_list, "BER": BER_list } columns = [ "alpha", "feature_0", "feature_1", "f1_score", "TPR", "FPR", "BER" ] result = pd.DataFrame(data=result, columns=columns) return result
def kNN_data_ratio_adjust (X_train,y_train,X_val,y_val,k_value): start=time.time() label_1_amount = 40000 label_1_amount_list = [] label_2_amount_list = [] ratio_list = [] tpr_list = [] fpr_list = [] BER_list = [] f1_score_list = [] time_list=[] while label_1_amount > 9000: count=0 tpr_list_temp = [] fpr_list_temp = [] BER_list_temp = [] f1_score_list_temp = [] time_list_temp=[] while (count<3): train_data = {'feature0':X_train['feature0'],'feature1':X_train['feature1'],'feature2':X_train['feature2'],'feature3':X_train['feature3'],'feature4':X_train['feature4'],'label':y_train['label']} train_data = pd.DataFrame(data=train_data) Class1_sample =pd.DataFrame.sample(train_data[train_data['label']==1],label_1_amount) Class2_sample =pd.DataFrame.sample(train_data[train_data['label']==2],20000) res = [Class1_sample, Class2_sample] train_com = pd.concat(res) #print(train_com) sample_label = pd.DataFrame(train_com['label']) sample_feature=train_com.drop(["label"],axis=1) start1=time.time() neigh = KNeighborsClassifier(n_neighbors=k_value,algorithm='auto',weights ='distance') neigh.fit(sample_feature, sample_label['label']) y_pred = neigh.predict(X_val) y_pred = {"label_pred":y_pred} y_pred = pd.DataFrame(data=y_pred) Precall,f1_score,BER,FPR = cal_score (y_pred,y_val['label']) tpr_list_temp.append(Precall) fpr_list_temp.append(FPR) BER_list_temp.append(BER) f1_score_list_temp.append(f1_score) temp=(time.time()-start1)/60 time_list_temp.append(temp) count=count+1 label_1_amount_list.append(label_1_amount) label_2_amount_list.append(20000) ratio_list.append(label_1_amount/20000) tpr_list.append(sum(tpr_list_temp)/len(tpr_list_temp)) fpr_list.append(sum(fpr_list_temp)/len(fpr_list_temp)) BER_list.append(sum(BER_list_temp)/len(BER_list_temp)) f1_score_list.append(sum(f1_score_list_temp)/len(f1_score_list_temp)) time_list.append(sum(time_list_temp)/len(time_list_temp)) #print("current data labe 1 size:%d ,fit time:%5.1fminute"%(t,(time.time()-start1)/60)) label_1_amount = label_1_amount-2000 print("the total executing time:%5.1fminute"%((time.time()-start)/60)) result = {"label_1_amount":label_1_amount_list,"label_2_amount":label_2_amount_list,"label 1: label 2 ratio":ratio_list,"TPR":tpr_list,"FPR":fpr_list,"f1_score":f1_score_list,"BER":BER_list,"time":time_list} columns = ["label_1_amount","label_2_amount","label 1: label 2 ratio","f1_score","TPR","FPR","BER","time"] result = pd.DataFrame (data=result,columns=columns) return result