def main():
    """
    Test the heldout dataset using the trained classifier and features
    """
    prepare_data()

    # Get the pickled classifier model and features
    with open('svmclassifier.pkl', 'rb') as infile:
        model = pickle.load(infile)

    with open('label_names.pkl', 'rb') as lab_names:
        labels_names = pickle.load(lab_names)

    with open('count_vect.pkl', 'rb') as count_v:
        count_vect = pickle.load(count_v)

    test_resumes = data_dict['data'][:]

    test_counts = count_vect.transform(test_resumes)
    tfidf_test = tfidftransform(test_counts)
    predicted_score = model.predict(tfidf_test)
    predicted_decision = model.decision_function(tfidf_test)

    predicted = []

    for i in range(1):
        predicted_dec_dup = predicted_decision[i]
        predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True)
        top_five_predictions = []
        predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])])
        for j in range(5):
            top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])])

        print "Predicted top5: " + ", ".join(top_five_predictions)
示例#2
0
def main():
    """
    Test the heldout dataset using the trained classifier and features
    """
    prepare_data()

    # Get the pickled classifier model and features
    with open('svmclassifier_new_0418_h.pkl', 'rb') as infile:
        model = pickle.load(infile)

    with open('label_names_0418_h.pkl', 'rb') as lab_names:
        labels_names = pickle.load(lab_names)

    with open('count_vect_0418_h.pkl', 'rb') as count_v:
        count_vect = pickle.load(count_v)

    test_resumes = data_dict['data'][:]

    test_counts = count_vect.transform(test_resumes)
    tfidf_test = tfidftransform(test_counts)
    predicted_score = model.predict(tfidf_test)
    predicted_decision = model.decision_function(tfidf_test)

    predicted = []

    for i in range(1):
        predicted_dec_dup = predicted_decision[i]
        predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True)
        top_five_predictions = []
        predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])])
        for j in range(5):
            top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])])

        print "Predicted top5: " + ", ".join(top_five_predictions)
def main():
    """
    Test the heldout dataset using the trained classifier and features
    """
    prepare_data()

    # Get the pickled classifier model and features
    with open('svmclassifier_new_0418_h.pkl', 'rb') as infile:
        model = pickle.load(infile)

    with open('label_names_0418_h.pkl', 'rb') as lab_names:
        labels_names = pickle.load(lab_names)

    with open('count_vect_0418_h.pkl', 'rb') as count_v:
        count_vect = pickle.load(count_v)

    test_resumes = data_dict['data'][:]
    test_labels = data_dict['label'][:]

    test_counts = count_vect.transform(test_resumes)
    tfidf_test = tfidftransform(test_counts)
    predicted_score = model.predict(tfidf_test)
    predicted_decision = model.decision_function(tfidf_test)

    predicted = []

    actual_vs_predicted = []

    for i in range(len(test_labels)):
        actual_label = test_labels[i]
        predicted_dec_dup = predicted_decision[i]
        predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True)
        top_five_predictions = []
        predicted.append(labels_names[predicted_decision[i].tolist().index(
            predicted_dec_dup_sorted[0])])
        for j in range(5):
            top_five_predictions.append(
                labels_names[predicted_decision[i].tolist().index(
                    predicted_dec_dup_sorted[j])])

        actual_vs_predicted.append([actual_label, top_five_predictions])

    n = 0
    for l in actual_vs_predicted:
        print "\nActual: " + l[0]
        print "Predicted: " + predicted[n]
        print "Predicted top5: " + ", ".join(l[1])
        n += 1

    accuracy_list = []
    accuracy_list_top_5 = []

    for i in range(len(test_labels)):
        accuracy_list.append(0)
        accuracy_list_top_5.append(0)

    for j in range(len(test_labels)):
        if actual_vs_predicted[j][0] in actual_vs_predicted[j][1]:
            accuracy_list_top_5[j] = 1

        if predicted[j] == test_labels[j]:
            accuracy_list[j] = 1

    print "Actual Accuracy: " + str(sum(accuracy_list) / len(accuracy_list))

    print "New Accuracy (Label present in one of the 5 predictions): " + str(
        sum(accuracy_list_top_5) / len(accuracy_list_top_5))
def main():
    """
    Test the heldout dataset using the trained classifier and features
    """
    prepare_data()

    # Get the pickled classifier model and features
    with open("svmclassifier_new_0418_h.pkl", "rb") as infile:
        model = pickle.load(infile)

    with open("label_names_0418_h.pkl", "rb") as lab_names:
        labels_names = pickle.load(lab_names)

    with open("count_vect_0418_h.pkl", "rb") as count_v:
        count_vect = pickle.load(count_v)

    test_resumes = data_dict["data"][:]
    test_labels = data_dict["label"][:]

    test_counts = count_vect.transform(test_resumes)
    tfidf_test = tfidftransform(test_counts)
    predicted_score = model.predict(tfidf_test)
    predicted_decision = model.decision_function(tfidf_test)

    predicted = []

    actual_vs_predicted = []

    for i in range(len(test_labels)):
        actual_label = test_labels[i]
        predicted_dec_dup = predicted_decision[i]
        predicted_dec_dup_sorted = sorted(predicted_dec_dup, reverse=True)
        top_five_predictions = []
        predicted.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[0])])
        for j in range(5):
            top_five_predictions.append(labels_names[predicted_decision[i].tolist().index(predicted_dec_dup_sorted[j])])

        actual_vs_predicted.append([actual_label, top_five_predictions])

    n = 0
    for l in actual_vs_predicted:
        print "\nActual: " + l[0]
        print "Predicted: " + predicted[n]
        print "Predicted top5: " + ", ".join(l[1])
        n += 1

    accuracy_list = []
    accuracy_list_top_5 = []

    for i in range(len(test_labels)):
        accuracy_list.append(0)
        accuracy_list_top_5.append(0)

    for j in range(len(test_labels)):
        if actual_vs_predicted[j][0] in actual_vs_predicted[j][1]:
            accuracy_list_top_5[j] = 1

        if predicted[j] == test_labels[j]:
            accuracy_list[j] = 1

    print "Actual Accuracy: " + str(sum(accuracy_list) / len(accuracy_list))

    print "New Accuracy (Label present in one of the 5 predictions): " + str(
        sum(accuracy_list_top_5) / len(accuracy_list_top_5)
    )