def evaluate(prediction,labels_test): labels_test = [0 if x=="neutral" else 1 if x=="positive" else -1 for x in labels_test] #logistic regression evaluation print "Average F1 : " +str(measures.avgF1(labels_test,prediction,-1,1)) #print "Baseline AverageF1 : " +str(measures.avgF1(labels_test,baseline_prediction)) print "Accuracy : " +str(measures.accuracy(labels_test,prediction)) #print "Baseline Accuracy : "+str(measures.accuracy(labels_test,baseline_prediction)) print "F1 negative : " +str(measures.F1(labels_test,prediction,-1)) print "F1 positive : " +str(measures.F1(labels_test,prediction,1)) print "Precision negative: " +str(measures.precision(labels_test,prediction,-1)) print "Precision positive: " +str(measures.precision(labels_test,prediction,1)) print "Recall negative : " +str(measures.recall(labels_test,prediction,-1)) print "Recall positive : " +str(measures.recall(labels_test,prediction,1))
def plot_recall_precision(length,features_train,labels_train,features_test,labels_test): #threshold=[0.1 ,0.2 ,0.3 ,0.4,0.5,0.6,0.7,0.8,0.9] threshold = [x / 1000.0 for x in range(0, 1001, 1)] step = length/3 colors=['b','r','g'] for i in range(0,3): #((i+1)*(step)) percent of train data f = features_train[0:((i+1)*(step))] l=labels_train[0:((i+1)*(step))] #train classifier for the specific subset of training set model = LogisticRegression.train(f,l) #recall-precision for every threshold value recall = [] precision=[] for t in threshold : prediction = LogisticRegression.predict(features_test,model,t) recall.append(measures.recall(labels_test,prediction,0)) precision.append(measures.precision(labels_test,prediction,0)) plt.plot(recall,precision,linewidth="2.0",label=str((i+1)*33)+"% of train data",color=colors[i]) plt.xlim(0,1) plt.ylim(0,1) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Negative tweets') plt.legend() plt.show()