示例#1
0
def run_argument_sets(text_clf, argument_sets):
    scores = []
    for s in argument_sets:
        (X_train, X_test, y_train, y_test) = s
        print("---Fitting model---")
        text_clf.fit(X_train, y_train)
        joblib.dump(text_clf, 'svm_trained.joblib')
        print("SVM with SGD")
        documents_predicted = []
        documents_target = []
        all_predicted_lines = []
        all_target_lines = []
        for doc in doc_test:
            predicted_lines = text_clf.predict(doc.data)
            all_predicted_lines += list(predicted_lines)
            all_target_lines += list(doc.targets)

            predicted_doc = utils.classify_doc(predicted_lines)
            documents_predicted.append(predicted_doc)
            documents_target.append(doc.category)

        scores += [text_clf.score(X_test, y_test)]

    print("Line by Line ")
    print("Confusion Matrix: \n{}".format(
        confusion_matrix(all_target_lines, all_predicted_lines)))
    accuracy = fbeta_score(all_target_lines,
                           all_predicted_lines,
                           average=None,
                           beta=2)
    print("Accuracy: {}".format(accuracy))
    print("Scores: ", scores)
    print("Scores:", np.mean(scores))
                                    shuffle=True))])

print("Training Model")
text_clf.fit(X_train, y_train)
print("SGD")
joblib.dump(text_clf, 'svm_trained.joblib')
documents_predicted = []
documents_target = []
all_predicted_lines = []
all_target_lines = []
for doc in doc_test:
    predicted_lines = text_clf.predict(doc.data)
    all_predicted_lines += list(predicted_lines)
    all_target_lines += list(doc.targets)

    predicted_doc = utils.classify_doc(predicted_lines)
    documents_predicted.append(predicted_doc)
    documents_target.append(doc.category)

print("Line by Line ")
print("Confusion Matrix: \n{}".format(
    confusion_matrix(all_target_lines, all_predicted_lines)))

accuracy = fbeta_score(all_target_lines,
                       all_predicted_lines,
                       average=None,
                       beta=2)
print("Accuracy: {}".format(accuracy))

doc_accuracy = fbeta_score(documents_target,
                           documents_predicted,