def process_svm_result(fileTest,filePredict): #测试集的类别标签 test_y=[] f_test=open(fileTest,'r') for line in f_test.readlines(): lineSet=line.strip().split() test_y.append(lineSet[0]) #预测的标签 predict=[] f_predict=open(filePredict,'r') for line in f_predict.readlines(): predict.append(line.strip()) #评价指标为准确率、召回率、F值 class_dict={'1':'1','2':"2"} precision_dict=performance.calc_precision(predict,test_y,class_dict) recall_dict=performance.calc_recall(predict,test_y,class_dict) fscore_dict=performance.calc_fscore(predict,test_y,class_dict) print("有情绪微博——————————————————") print("准确率:%r"%(precision_dict['2'])) print("召回率:%r"%(recall_dict['2'])) print("F值: %r"%(fscore_dict['2'])) print("无情绪微博——————————————————") print("准确率:%r"%(precision_dict['1'])) print("召回率:%r"%(recall_dict['1'])) print("F值: %r"%(fscore_dict['1'])) accuracy = metrics.accuracy_score(test_y, predict) print 'accuracy: %.2f%%' % (100 * accuracy)
def evaluation(fileTest,filePredict): #测试集的类别标签 test_y=[] f_test=open(fileTest,'r') for line in f_test.readlines(): lineSet=line.strip().split() test_y.append(lineSet[2]) #预测的标签 predict=[] f_predict=open(filePredict,'r') for line in f_predict.readlines(): predict.append(line.strip()) #评价指标为准确率、召回率、F值 accuracy = metrics.accuracy_score(test_y, predict) print 'accuracy: %.2f%%' % (100 * accuracy) #输出各类指标 print("weibo情绪识别任务------------") #宏平均 class_dict1={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'} macro_dict1=performance.calc_macro_average(predict,test_y,class_dict1) #微平均 micro_dict1=performance.calc_micro_average(predict,test_y,class_dict1) #每一类情绪 class_dict={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise', 'none':'none'} #precision precision_dict=performance.calc_precision(predict,test_y,class_dict) print("macro_precision——%r"%(macro_dict1['macro_p'])) print("micro_precision——%r"%(micro_dict1['micro_p'])) for i in class_dict: print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]])) #recall recall_dict=performance.calc_recall(predict,test_y,class_dict) print("macro_recall——%r"%(macro_dict1['macro_r'])) print("micro_recall——%r"%(micro_dict1['micro_r'])) for i in class_dict: print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]])) #f-measure fscore_dict=performance.calc_fscore(predict,test_y,class_dict) print("macro_fscore——%r"%(macro_dict1['macro_f1'])) print("micro_fscore——%r"%(micro_dict1['micro_f1'])) for i in class_dict: print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]])) print("-------------------------")
def score(result): #人工标注 label = [] f1 = open('weibo_label.txt', 'r') for line in f1.readlines(): lineSet = line.strip().split() label.append(lineSet[1]) #输出各类指标 class_dict = {'Y': 'Y'} print("weibo情绪判断任务------------") precision_dict = performance.calc_precision(result, label, class_dict) print("precision:%r" % (precision_dict['Y'])) recall_dict = performance.calc_recall(result, label, class_dict) print("recall:%r" % (recall_dict['Y'])) fscore_dict = performance.calc_fscore(result, label, class_dict) print("f1:%r" % (fscore_dict['Y'])) print("-------------------------")
def score(result): #人工标注 label=[] f1=open('label.txt','r') for line in f1.readlines(): lineSet=line.strip().split() label.append(lineSet[1]) #输出各类指标 class_dict={'Y':'Y'} print("weibo情绪判断任务------------") precision_dict=performance.calc_precision(result,label,class_dict) print("precision:%r"%(precision_dict['Y']) ) recall_dict=performance.calc_recall(result,label,class_dict) print("recall:%r"%(recall_dict['Y']) ) fscore_dict=performance.calc_fscore(result,label,class_dict) print("f1:%r"%(fscore_dict['Y'])) print("-------------------------")
def merge_svm_GBDT(): merge_result = [] threshold_minus = 1.496 threshold_plus = 1.65 #svm的结果 svm_predict = [] f_svm = open('temporary/svm_predict', 'r') for line in f_svm.readlines(): svm_predict.append(line.strip()) #GBDT的结果 f_GBDT = open('temporary/GBDT_predict1000', 'r') index = 0 for line in f_GBDT.readlines(): num = float(line.strip()) if num < threshold_minus: merge_result.append('1') elif num > threshold_plus: merge_result.append('2') else: merge_result.append(svm_predict[index]) index += 1 #测试集的类别标签 test_y = [] f_test = open('temporary/test.sample', 'r') for line in f_test.readlines(): lineSet = line.strip().split() test_y.append(lineSet[0]) #评价指标为准确率、召回率、F值 class_dict = {'1': '1', '2': "2"} precision_dict = performance.calc_precision(merge_result, test_y, class_dict) recall_dict = performance.calc_recall(merge_result, test_y, class_dict) fscore_dict = performance.calc_fscore(merge_result, test_y, class_dict) print("有情绪微博——————————————————") print("准确率:%r" % (precision_dict['2'])) print("召回率:%r" % (recall_dict['2'])) print("F值: %r" % (fscore_dict['2'])) print("无情绪微博——————————————————") print("准确率:%r" % (precision_dict['1'])) print("召回率:%r" % (recall_dict['1'])) print("F值: %r" % (fscore_dict['1'])) accuracy = metrics.accuracy_score(test_y, merge_result) print 'accuracy: %.2f%%' % (100 * accuracy)
def score_emotion(fileName1,fileName2): #标注结果 label=[] f=open(fileName1,'r') for line in f.readlines(): labelLine=line.strip().split() label.append(labelLine[2]) #预测结果 result=[] f1=open(fileName2,'r') for line in f1.readlines(): result.append(line.strip()) #输出各类指标 print("weibo情绪识别任务------------") #宏平均 class_dict1={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'} macro_dict1=performance.calc_macro_average(result,label,class_dict1) #每一类情绪 class_dict={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise', 'none':'none'} #precision precision_dict=performance.calc_precision(result,label,class_dict) print("macro_precision——%r"%(macro_dict1['macro_p'])) for i in class_dict: print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]])) #recall recall_dict=performance.calc_recall(result,label,class_dict) print("macro_recall——%r"%(macro_dict1['macro_r'])) for i in class_dict: print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]])) #f-measure fscore_dict=performance.calc_fscore(result,label,class_dict) print("macro_fscore——%r"%(macro_dict1['macro_f1'])) for i in class_dict: print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]])) print("-------------------------")
def merge_svm_GBDT(): merge_result=[] threshold_minus=1.496 threshold_plus=1.65 #svm的结果 svm_predict=[] f_svm=open('temporary/svm_predict','r') for line in f_svm.readlines(): svm_predict.append(line.strip()) #GBDT的结果 f_GBDT=open('temporary/GBDT_predict1000','r') index=0 for line in f_GBDT.readlines(): num=float(line.strip()) if num < threshold_minus: merge_result.append('1') elif num > threshold_plus: merge_result.append('2') else: merge_result.append(svm_predict[index]) index+=1 #测试集的类别标签 test_y=[] f_test=open('temporary/test.sample','r') for line in f_test.readlines(): lineSet=line.strip().split() test_y.append(lineSet[0]) #评价指标为准确率、召回率、F值 class_dict={'1':'1','2':"2"} precision_dict=performance.calc_precision(merge_result,test_y,class_dict) recall_dict=performance.calc_recall(merge_result,test_y,class_dict) fscore_dict=performance.calc_fscore(merge_result,test_y,class_dict) print("有情绪微博——————————————————") print("准确率:%r"%(precision_dict['2'])) print("召回率:%r"%(recall_dict['2'])) print("F值: %r"%(fscore_dict['2'])) print("无情绪微博——————————————————") print("准确率:%r"%(precision_dict['1'])) print("召回率:%r"%(recall_dict['1'])) print("F值: %r"%(fscore_dict['1'])) accuracy = metrics.accuracy_score(test_y, merge_result) print 'accuracy: %.2f%%' % (100 * accuracy)
def score_emotion(fileName1,fileName2): #标注结果 label=[] f=open(fileName1,'r') for line in f.readlines(): labelLine=line.strip().split() label.append(labelLine[2]) #预测结果 result=[] f1=open(fileName2,'r') for line in f1.readlines(): result.append(line.strip()) #输出各类指标 print("weibo情绪识别任务------------") #宏平均 class_dict1={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'} macro_dict1=performance.calc_macro_average(result,label,class_dict1) #每一类情绪 class_dict={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise', 'none':'none'} #precision precision_dict=performance.calc_precision(result,label,class_dict) print("macro_precision——%r"%(macro_dict1['macro_p'])) for i in class_dict: print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]])) #recall recall_dict=performance.calc_recall(result,label,class_dict) print("macro_recall——%r"%(macro_dict1['macro_r'])) for i in class_dict: print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]])) #f-measure fscore_dict=performance.calc_fscore(result,label,class_dict) print("macro_fscore——%r"%(macro_dict1['macro_f1'])) for i in class_dict: print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]])) print("-------------------------")
'surprise': 'surprise' } macro_dict1 = performance.calc_macro_average(result, label, class_dict1) #每一类情绪 class_dict = { 'happiness': 'happiness', 'like': 'like', 'anger': 'anger', 'sadness': 'sadness', 'fear': 'fear', 'disgust': 'disgust', 'surprise': 'surprise', 'none': 'none' } #precision precision_dict = performance.calc_precision(result, label, class_dict) print("macro_precision——%r" % (macro_dict1['macro_p'])) for i in class_dict: print("%r:%r" % (class_dict[i], precision_dict[class_dict[i]])) #recall recall_dict = performance.calc_recall(result, label, class_dict) print("macro_recall——%r" % (macro_dict1['macro_r'])) for i in class_dict: print("%r:%r" % (class_dict[i], recall_dict[class_dict[i]])) #f-measure fscore_dict = performance.calc_fscore(result, label, class_dict) print("macro_fscore——%r" % (macro_dict1['macro_f1'])) for i in class_dict: print("%r:%r" % (class_dict[i], fscore_dict[class_dict[i]])) print("-------------------------")
# label=readin_label('weibo_label.txt') #正确率acc accuracy=performance.calc_acc(result,label) print("正确率:%r"%(accuracy)) #宏平均 class_dict1={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'} macro_dict1=performance.calc_macro_average(result,label,class_dict1) #每一类情绪 class_dict={'happiness':'happiness','like':'like','anger':'anger', 'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise', 'none':'none'} #precision precision_dict=performance.calc_precision(result,label,class_dict) print("macro_precision——%r"%(macro_dict1['macro_p'])) for i in class_dict: print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]])) #recall recall_dict=performance.calc_recall(result,label,class_dict) print("macro_recall——%r"%(macro_dict1['macro_r'])) for i in class_dict: print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]])) #f-measure fscore_dict=performance.calc_fscore(result,label,class_dict) print("macro_fscore——%r"%(macro_dict1['macro_f1'])) for i in class_dict: print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]])) print("-------------------------")
def gradient_boosting_hhh(fname_samp_train, fname_samp_test,len_term_set): #for_train_x每一行是词典,train_y是list for_train_x,train_y=pytc.load_samps(fname_samp_train, fs_num=0) for_test_x,test_y=pytc.load_samps(fname_samp_test, fs_num=0) train_x=process_for_train_x_or_test_x(for_train_x,len_term_set) test_x=process_for_train_x_or_test_x(for_test_x,len_term_set) #回归 # train_x = np.float32(train_x) # train_y = np.float32(train_y) # test_x = np.float32(test_x) # test_y = np.float32(test_y) num_train,num_feat = np.shape(train_x) num_test,num_feat = np.shape(test_x) print '******************** 数据信息 *********************' print '#训练集数据: %d, #测试集数据: %d, 维度: %d' % (num_train, num_test, num_feat) start_time = time.time() #分类 model = gradient_boosting_classifier(train_x, train_y) #回归 # model = gradient_boosting_regressor(train_x, train_y) print '训练耗时 %fs!' % (time.time() - start_time) predict = model.predict(test_x) #输出预测标签 fw_predict=open('predict_classifier','w') for item in predict: print>>fw_predict,item # #设置阈值 # threshold= 1.539 # f_w=open('temporary/GBDT_predict1000','w') # f_w_label=open('temporary/GBDT_predict_label1000','w') # for item in predict: # print >>f_w,item # if item>=threshold: # item=2 # else: # item=1 # print>>f_w_label,item #测试集的类别标签 test_y=[] f_test=open('temporary/test.sample','r') for line in f_test.readlines(): lineSet=line.strip().split() test_y.append(lineSet[0]) # #预测的标签 # predict=[] # f_predict=open('temporary/GBDT_predict','r') # for line in f_predict.readlines(): # predict.append(line.strip()) #评价指标为准确率、召回率、F值 class_dict={'1':'1','2':"2"} precision_dict=performance.calc_precision(predict,test_y,class_dict) recall_dict=performance.calc_recall(predict,test_y,class_dict) fscore_dict=performance.calc_fscore(predict,test_y,class_dict) print("有情绪微博——————————————————") print("准确率:%r"%(precision_dict['2'])) print("召回率:%r"%(recall_dict['2'])) print("F值: %r"%(fscore_dict['2'])) print("无情绪微博——————————————————") print("准确率:%r"%(precision_dict['1'])) print("召回率:%r"%(recall_dict['1'])) print("F值: %r"%(fscore_dict['1'])) accuracy = metrics.accuracy_score(test_y, predict) print 'accuracy: %.2f%%' % (100 * accuracy)