def process_svm_result(fileTest,filePredict):
	#测试集的类别标签
	test_y=[]
	f_test=open(fileTest,'r')
	for line in f_test.readlines():
		lineSet=line.strip().split()
		test_y.append(lineSet[0])
	#预测的标签
	predict=[]
	f_predict=open(filePredict,'r')
	for line in f_predict.readlines():
		predict.append(line.strip())
	#评价指标为准确率、召回率、F值
	class_dict={'1':'1','2':"2"}
	precision_dict=performance.calc_precision(predict,test_y,class_dict)
	recall_dict=performance.calc_recall(predict,test_y,class_dict)
	fscore_dict=performance.calc_fscore(predict,test_y,class_dict)
	print("有情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['2']))
	print("召回率:%r"%(recall_dict['2']))
	print("F值:   %r"%(fscore_dict['2']))
	print("无情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['1']))
	print("召回率:%r"%(recall_dict['1']))
	print("F值:   %r"%(fscore_dict['1']))
	accuracy = metrics.accuracy_score(test_y, predict)  
	print 'accuracy: %.2f%%' % (100 * accuracy) 
def evaluation(fileTest,filePredict):
	#测试集的类别标签
	test_y=[]
	f_test=open(fileTest,'r')
	for line in f_test.readlines():
		lineSet=line.strip().split()
		test_y.append(lineSet[2])
	#预测的标签
	predict=[]
	f_predict=open(filePredict,'r')
	for line in f_predict.readlines():
		predict.append(line.strip())
	#评价指标为准确率、召回率、F值
	accuracy = metrics.accuracy_score(test_y, predict)  
	print 'accuracy: %.2f%%' % (100 * accuracy) 
	#输出各类指标
	print("weibo情绪识别任务------------")        	
	
	#宏平均
	class_dict1={'happiness':'happiness','like':'like','anger':'anger',
	'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'}
	macro_dict1=performance.calc_macro_average(predict,test_y,class_dict1)
	#微平均
	micro_dict1=performance.calc_micro_average(predict,test_y,class_dict1)

	
	#每一类情绪
	class_dict={'happiness':'happiness','like':'like','anger':'anger',
	'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise',
	'none':'none'}
	#precision
	precision_dict=performance.calc_precision(predict,test_y,class_dict)
	print("macro_precision——%r"%(macro_dict1['macro_p']))
	print("micro_precision——%r"%(micro_dict1['micro_p']))
	for i in class_dict:
	    print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]]))
	#recall
	recall_dict=performance.calc_recall(predict,test_y,class_dict)
	print("macro_recall——%r"%(macro_dict1['macro_r']))
	print("micro_recall——%r"%(micro_dict1['micro_r']))
	for i in class_dict:
	    print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]]))   
	#f-measure
	fscore_dict=performance.calc_fscore(predict,test_y,class_dict)
	print("macro_fscore——%r"%(macro_dict1['macro_f1']))
	print("micro_fscore——%r"%(micro_dict1['micro_f1']))
	for i in class_dict:
	    print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]]))
	print("-------------------------")
Пример #3
0
def score(result):
    #人工标注
    label = []
    f1 = open('weibo_label.txt', 'r')
    for line in f1.readlines():
        lineSet = line.strip().split()
        label.append(lineSet[1])
    #输出各类指标
    class_dict = {'Y': 'Y'}
    print("weibo情绪判断任务------------")
    precision_dict = performance.calc_precision(result, label, class_dict)
    print("precision:%r" % (precision_dict['Y']))
    recall_dict = performance.calc_recall(result, label, class_dict)
    print("recall:%r" % (recall_dict['Y']))
    fscore_dict = performance.calc_fscore(result, label, class_dict)
    print("f1:%r" % (fscore_dict['Y']))
    print("-------------------------")
def score(result):
    #人工标注
    label=[]
    f1=open('label.txt','r')
    for line in f1.readlines():
        lineSet=line.strip().split()
        label.append(lineSet[1])   
    #输出各类指标
    class_dict={'Y':'Y'}
    print("weibo情绪判断任务------------") 
    precision_dict=performance.calc_precision(result,label,class_dict)      
    print("precision:%r"%(precision_dict['Y']) )
    recall_dict=performance.calc_recall(result,label,class_dict)   
    print("recall:%r"%(recall_dict['Y']) )
    fscore_dict=performance.calc_fscore(result,label,class_dict)
    print("f1:%r"%(fscore_dict['Y']))   
    print("-------------------------")
def merge_svm_GBDT():
    merge_result = []
    threshold_minus = 1.496
    threshold_plus = 1.65
    #svm的结果
    svm_predict = []
    f_svm = open('temporary/svm_predict', 'r')
    for line in f_svm.readlines():
        svm_predict.append(line.strip())
    #GBDT的结果
    f_GBDT = open('temporary/GBDT_predict1000', 'r')
    index = 0
    for line in f_GBDT.readlines():
        num = float(line.strip())
        if num < threshold_minus:
            merge_result.append('1')
        elif num > threshold_plus:
            merge_result.append('2')
        else:
            merge_result.append(svm_predict[index])
        index += 1
    #测试集的类别标签
    test_y = []
    f_test = open('temporary/test.sample', 'r')
    for line in f_test.readlines():
        lineSet = line.strip().split()
        test_y.append(lineSet[0])
    #评价指标为准确率、召回率、F值
    class_dict = {'1': '1', '2': "2"}
    precision_dict = performance.calc_precision(merge_result, test_y,
                                                class_dict)
    recall_dict = performance.calc_recall(merge_result, test_y, class_dict)
    fscore_dict = performance.calc_fscore(merge_result, test_y, class_dict)
    print("有情绪微博——————————————————")
    print("准确率:%r" % (precision_dict['2']))
    print("召回率:%r" % (recall_dict['2']))
    print("F值:   %r" % (fscore_dict['2']))
    print("无情绪微博——————————————————")
    print("准确率:%r" % (precision_dict['1']))
    print("召回率:%r" % (recall_dict['1']))
    print("F值:   %r" % (fscore_dict['1']))
    accuracy = metrics.accuracy_score(test_y, merge_result)
    print 'accuracy: %.2f%%' % (100 * accuracy)
def score_emotion(fileName1,fileName2):
    #标注结果
    label=[]
    f=open(fileName1,'r')
    for line in f.readlines():
        labelLine=line.strip().split()
        label.append(labelLine[2])
    #预测结果
    result=[]
    f1=open(fileName2,'r')
    for line in f1.readlines():
        result.append(line.strip())
    #输出各类指标
    print("weibo情绪识别任务------------")        
 
    #宏平均
    class_dict1={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'}
    macro_dict1=performance.calc_macro_average(result,label,class_dict1)

    
    
    #每一类情绪
    class_dict={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise',
    'none':'none'}
    #precision
    precision_dict=performance.calc_precision(result,label,class_dict)
    print("macro_precision——%r"%(macro_dict1['macro_p']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]]))
    #recall
    recall_dict=performance.calc_recall(result,label,class_dict)
    print("macro_recall——%r"%(macro_dict1['macro_r']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]]))   
    #f-measure
    fscore_dict=performance.calc_fscore(result,label,class_dict)
    print("macro_fscore——%r"%(macro_dict1['macro_f1']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]]))
    print("-------------------------")
def merge_svm_GBDT():
	merge_result=[]
	threshold_minus=1.496
	threshold_plus=1.65
	#svm的结果
	svm_predict=[]
	f_svm=open('temporary/svm_predict','r')
	for line in f_svm.readlines():
		svm_predict.append(line.strip())
	#GBDT的结果
	f_GBDT=open('temporary/GBDT_predict1000','r')
	index=0
	for line in f_GBDT.readlines():
		num=float(line.strip())
		if num < threshold_minus:
			merge_result.append('1')
		elif num > threshold_plus:
			merge_result.append('2')
		else:
			merge_result.append(svm_predict[index])
		index+=1
	#测试集的类别标签
	test_y=[]
	f_test=open('temporary/test.sample','r')
	for line in f_test.readlines():
		lineSet=line.strip().split()
		test_y.append(lineSet[0])
	#评价指标为准确率、召回率、F值
	class_dict={'1':'1','2':"2"}
	precision_dict=performance.calc_precision(merge_result,test_y,class_dict)
	recall_dict=performance.calc_recall(merge_result,test_y,class_dict)
	fscore_dict=performance.calc_fscore(merge_result,test_y,class_dict)
	print("有情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['2']))
	print("召回率:%r"%(recall_dict['2']))
	print("F值:   %r"%(fscore_dict['2']))
	print("无情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['1']))
	print("召回率:%r"%(recall_dict['1']))
	print("F值:   %r"%(fscore_dict['1']))
	accuracy = metrics.accuracy_score(test_y, merge_result)  
	print 'accuracy: %.2f%%' % (100 * accuracy) 
Пример #8
0
def score_emotion(fileName1,fileName2):
    #标注结果
    label=[]
    f=open(fileName1,'r')
    for line in f.readlines():
        labelLine=line.strip().split()
        label.append(labelLine[2])
    #预测结果
    result=[]
    f1=open(fileName2,'r')
    for line in f1.readlines():
        result.append(line.strip())
    #输出各类指标
    print("weibo情绪识别任务------------")        
 
    #宏平均
    class_dict1={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'}
    macro_dict1=performance.calc_macro_average(result,label,class_dict1)

    
    
    #每一类情绪
    class_dict={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise',
    'none':'none'}
    #precision
    precision_dict=performance.calc_precision(result,label,class_dict)
    print("macro_precision——%r"%(macro_dict1['macro_p']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]]))
    #recall
    recall_dict=performance.calc_recall(result,label,class_dict)
    print("macro_recall——%r"%(macro_dict1['macro_r']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]]))   
    #f-measure
    fscore_dict=performance.calc_fscore(result,label,class_dict)
    print("macro_fscore——%r"%(macro_dict1['macro_f1']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]]))
    print("-------------------------")
Пример #9
0
        'surprise': 'surprise'
    }
    macro_dict1 = performance.calc_macro_average(result, label, class_dict1)

    #每一类情绪
    class_dict = {
        'happiness': 'happiness',
        'like': 'like',
        'anger': 'anger',
        'sadness': 'sadness',
        'fear': 'fear',
        'disgust': 'disgust',
        'surprise': 'surprise',
        'none': 'none'
    }
    #precision
    precision_dict = performance.calc_precision(result, label, class_dict)
    print("macro_precision——%r" % (macro_dict1['macro_p']))
    for i in class_dict:
        print("%r:%r" % (class_dict[i], precision_dict[class_dict[i]]))
    #recall
    recall_dict = performance.calc_recall(result, label, class_dict)
    print("macro_recall——%r" % (macro_dict1['macro_r']))
    for i in class_dict:
        print("%r:%r" % (class_dict[i], recall_dict[class_dict[i]]))
    #f-measure
    fscore_dict = performance.calc_fscore(result, label, class_dict)
    print("macro_fscore——%r" % (macro_dict1['macro_f1']))
    for i in class_dict:
        print("%r:%r" % (class_dict[i], fscore_dict[class_dict[i]]))
    print("-------------------------")
Пример #10
0
    # label=readin_label('weibo_label.txt')
    #正确率acc
    accuracy=performance.calc_acc(result,label)
    print("正确率:%r"%(accuracy))
 
    #宏平均
    class_dict1={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise'}
    macro_dict1=performance.calc_macro_average(result,label,class_dict1)    
    
    #每一类情绪
    class_dict={'happiness':'happiness','like':'like','anger':'anger',
    'sadness':'sadness','fear':'fear','disgust':'disgust','surprise':'surprise',
    'none':'none'}
    #precision
    precision_dict=performance.calc_precision(result,label,class_dict)
    print("macro_precision——%r"%(macro_dict1['macro_p']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],precision_dict[class_dict[i]]))
    #recall
    recall_dict=performance.calc_recall(result,label,class_dict)
    print("macro_recall——%r"%(macro_dict1['macro_r']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],recall_dict[class_dict[i]]))   
    #f-measure
    fscore_dict=performance.calc_fscore(result,label,class_dict)
    print("macro_fscore——%r"%(macro_dict1['macro_f1']))
    for i in class_dict:
        print("%r:%r"%(class_dict[i],fscore_dict[class_dict[i]]))
    print("-------------------------")
Пример #11
0
def gradient_boosting_hhh(fname_samp_train, fname_samp_test,len_term_set):
	#for_train_x每一行是词典,train_y是list
	for_train_x,train_y=pytc.load_samps(fname_samp_train, fs_num=0)
	for_test_x,test_y=pytc.load_samps(fname_samp_test, fs_num=0)
	train_x=process_for_train_x_or_test_x(for_train_x,len_term_set)
	test_x=process_for_train_x_or_test_x(for_test_x,len_term_set)
	#回归
	# train_x = np.float32(train_x)
	# train_y = np.float32(train_y)
	# test_x = np.float32(test_x)
	# test_y = np.float32(test_y)
	num_train,num_feat = np.shape(train_x)
	num_test,num_feat = np.shape(test_x)
	print '******************** 数据信息 *********************'  
	print '#训练集数据: %d, #测试集数据: %d, 维度: %d' % (num_train, num_test, num_feat)  
	start_time = time.time()  
	#分类
	model = gradient_boosting_classifier(train_x, train_y) 
	#回归
	# model = gradient_boosting_regressor(train_x, train_y) 
	print '训练耗时 %fs!' % (time.time() - start_time)  
	predict = model.predict(test_x) 
	#输出预测标签
	fw_predict=open('predict_classifier','w')
	for item in predict:
		print>>fw_predict,item
	# #设置阈值
	# threshold= 1.539
	# f_w=open('temporary/GBDT_predict1000','w')
	# f_w_label=open('temporary/GBDT_predict_label1000','w')
	# for item in predict:
	# 	print >>f_w,item
	# 	if item>=threshold:
	# 		item=2
	# 	else:
	# 		item=1
	# 	print>>f_w_label,item
	#测试集的类别标签
	test_y=[]
	f_test=open('temporary/test.sample','r')
	for line in f_test.readlines():
		lineSet=line.strip().split()
		test_y.append(lineSet[0])
	# #预测的标签
	# predict=[]
	# f_predict=open('temporary/GBDT_predict','r')
	# for line in f_predict.readlines():
	# 	predict.append(line.strip())
	#评价指标为准确率、召回率、F值
	class_dict={'1':'1','2':"2"}
	precision_dict=performance.calc_precision(predict,test_y,class_dict)
	recall_dict=performance.calc_recall(predict,test_y,class_dict)
	fscore_dict=performance.calc_fscore(predict,test_y,class_dict)
	print("有情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['2']))
	print("召回率:%r"%(recall_dict['2']))
	print("F值:   %r"%(fscore_dict['2']))
	print("无情绪微博——————————————————")
	print("准确率:%r"%(precision_dict['1']))
	print("召回率:%r"%(recall_dict['1']))
	print("F值:   %r"%(fscore_dict['1']))
	accuracy = metrics.accuracy_score(test_y, predict)  
	print 'accuracy: %.2f%%' % (100 * accuracy)