def run_argument_sets(text_clf, argument_sets): scores = [] for s in argument_sets: (X_train, X_test, y_train, y_test) = s print("---Fitting model---") text_clf.fit(X_train, y_train) joblib.dump(text_clf, 'svm_trained.joblib') print("SVM with SGD") documents_predicted = [] documents_target = [] all_predicted_lines = [] all_target_lines = [] for doc in doc_test: predicted_lines = text_clf.predict(doc.data) all_predicted_lines += list(predicted_lines) all_target_lines += list(doc.targets) predicted_doc = utils.classify_doc(predicted_lines) documents_predicted.append(predicted_doc) documents_target.append(doc.category) scores += [text_clf.score(X_test, y_test)] print("Line by Line ") print("Confusion Matrix: \n{}".format( confusion_matrix(all_target_lines, all_predicted_lines))) accuracy = fbeta_score(all_target_lines, all_predicted_lines, average=None, beta=2) print("Accuracy: {}".format(accuracy)) print("Scores: ", scores) print("Scores:", np.mean(scores))
shuffle=True))]) print("Training Model") text_clf.fit(X_train, y_train) print("SGD") joblib.dump(text_clf, 'svm_trained.joblib') documents_predicted = [] documents_target = [] all_predicted_lines = [] all_target_lines = [] for doc in doc_test: predicted_lines = text_clf.predict(doc.data) all_predicted_lines += list(predicted_lines) all_target_lines += list(doc.targets) predicted_doc = utils.classify_doc(predicted_lines) documents_predicted.append(predicted_doc) documents_target.append(doc.category) print("Line by Line ") print("Confusion Matrix: \n{}".format( confusion_matrix(all_target_lines, all_predicted_lines))) accuracy = fbeta_score(all_target_lines, all_predicted_lines, average=None, beta=2) print("Accuracy: {}".format(accuracy)) doc_accuracy = fbeta_score(documents_target, documents_predicted,