def main(): """ Test the heldout dataset using the trained classifier and features """ prepare_data() # Get the pickled classifier model and features with open('svmclassifier.pkl', 'rb') as infile: model = pickle.load(infile) with open('label_names.pkl', 'rb') as lab_names: labels_names = pickle.load(lab_names) with open('count_vect.pkl', 'rb') as count_v: count_vect = pickle.load(count_v) test_resumes = data_dict['data'][:] test_counts = count_vect.transform(test_resumes) tfidf_test = tfidftransform(test_counts) predicted_score = model.predict(tfidf_test) predicted_decision = model.decision_function(tfidf_test) predicted = [] for i in range(1): predicted_dec_dup = predicted_decision[i] predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True) top_five_predictions = [] predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])]) for j in range(5): top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])]) print "Predicted top5: " + ", ".join(top_five_predictions)
def main(): """ Test the heldout dataset using the trained classifier and features """ prepare_data() # Get the pickled classifier model and features with open('svmclassifier_new_0418_h.pkl', 'rb') as infile: model = pickle.load(infile) with open('label_names_0418_h.pkl', 'rb') as lab_names: labels_names = pickle.load(lab_names) with open('count_vect_0418_h.pkl', 'rb') as count_v: count_vect = pickle.load(count_v) test_resumes = data_dict['data'][:] test_counts = count_vect.transform(test_resumes) tfidf_test = tfidftransform(test_counts) predicted_score = model.predict(tfidf_test) predicted_decision = model.decision_function(tfidf_test) predicted = [] for i in range(1): predicted_dec_dup = predicted_decision[i] predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True) top_five_predictions = [] predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])]) for j in range(5): top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])]) print "Predicted top5: " + ", ".join(top_five_predictions)
def main(): """ Test the heldout dataset using the trained classifier and features """ prepare_data() # Get the pickled classifier model and features with open('svmclassifier_new_0418_h.pkl', 'rb') as infile: model = pickle.load(infile) with open('label_names_0418_h.pkl', 'rb') as lab_names: labels_names = pickle.load(lab_names) with open('count_vect_0418_h.pkl', 'rb') as count_v: count_vect = pickle.load(count_v) test_resumes = data_dict['data'][:] test_labels = data_dict['label'][:] test_counts = count_vect.transform(test_resumes) tfidf_test = tfidftransform(test_counts) predicted_score = model.predict(tfidf_test) predicted_decision = model.decision_function(tfidf_test) predicted = [] actual_vs_predicted = [] for i in range(len(test_labels)): actual_label = test_labels[i] predicted_dec_dup = predicted_decision[i] predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True) top_five_predictions = [] predicted.append(labels_names[predicted_decision[i].tolist().index( predicted_dec_dup_sorted[0])]) for j in range(5): top_five_predictions.append( labels_names[predicted_decision[i].tolist().index( predicted_dec_dup_sorted[j])]) actual_vs_predicted.append([actual_label, top_five_predictions]) n = 0 for l in actual_vs_predicted: print "\nActual: " + l[0] print "Predicted: " + predicted[n] print "Predicted top5: " + ", ".join(l[1]) n += 1 accuracy_list = [] accuracy_list_top_5 = [] for i in range(len(test_labels)): accuracy_list.append(0) accuracy_list_top_5.append(0) for j in range(len(test_labels)): if actual_vs_predicted[j][0] in actual_vs_predicted[j][1]: accuracy_list_top_5[j] = 1 if predicted[j] == test_labels[j]: accuracy_list[j] = 1 print "Actual Accuracy: " + str(sum(accuracy_list) / len(accuracy_list)) print "New Accuracy (Label present in one of the 5 predictions): " + str( sum(accuracy_list_top_5) / len(accuracy_list_top_5))
def main(): """ Test the heldout dataset using the trained classifier and features """ prepare_data() # Get the pickled classifier model and features with open("svmclassifier_new_0418_h.pkl", "rb") as infile: model = pickle.load(infile) with open("label_names_0418_h.pkl", "rb") as lab_names: labels_names = pickle.load(lab_names) with open("count_vect_0418_h.pkl", "rb") as count_v: count_vect = pickle.load(count_v) test_resumes = data_dict["data"][:] test_labels = data_dict["label"][:] test_counts = count_vect.transform(test_resumes) tfidf_test = tfidftransform(test_counts) predicted_score = model.predict(tfidf_test) predicted_decision = model.decision_function(tfidf_test) predicted = [] actual_vs_predicted = [] for i in range(len(test_labels)): actual_label = test_labels[i] predicted_dec_dup = predicted_decision[i] predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True) top_five_predictions = [] predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])]) for j in range(5): top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])]) actual_vs_predicted.append([actual_label, top_five_predictions]) n = 0 for l in actual_vs_predicted: print "\nActual: " + l[0] print "Predicted: " + predicted[n] print "Predicted top5: " + ", ".join(l[1]) n += 1 accuracy_list = [] accuracy_list_top_5 = [] for i in range(len(test_labels)): accuracy_list.append(0) accuracy_list_top_5.append(0) for j in range(len(test_labels)): if actual_vs_predicted[j][0] in actual_vs_predicted[j][1]: accuracy_list_top_5[j] = 1 if predicted[j] == test_labels[j]: accuracy_list[j] = 1 print "Actual Accuracy: " + str(sum(accuracy_list) / len(accuracy_list)) print "New Accuracy (Label present in one of the 5 predictions): " + str( sum(accuracy_list_top_5) / len(accuracy_list_top_5) )