示例#1
0
文件: main.py 项目: Zissi/nlpexample
def classify_with_tf_idf(paths):
    sentences, labels, class_names = load_test_data(paths)
    sentences = np.array(sentences)
    labels = np.array(labels)
    average_precisions = []
    average_recalls = []
    for train_index, test_index in sklearn.cross_validation.StratifiedKFold(labels, n_folds=3):
        sentences_train, sentences_test = sentences[train_index], sentences[test_index]
        labels_train, labels_test = labels[train_index], labels[test_index]
        features_train, vocabulary, count_vectorizer = extract_features_and_vocabulary(sentences_train)
        tfidf_features_train = transform_to_tfidf(features_train)
        predicted = predict_with_svc(tfidf_features_train, labels_train, sentences_test, count_vectorizer)

        print('TF-IDF')
        average_precision, average_recall = evaluate_classification(predicted, labels_test, sentences, class_names)
        average_precisions.append(average_precision)
        average_recalls.append(average_recall)
    evaluate_complete_classification(average_precisions, average_recalls)
示例#2
0
def predict_with_svc(tfidf_features_train, labels_train, sentences_test, count_vectorizer):
    clf = LinearSVC().fit(tfidf_features_train, labels_train)
    features_test = extract_features_for_testing(sentences_test, count_vectorizer)
    tfidf_features_test = transform_to_tfidf(features_test)
    return clf.predict(tfidf_features_test)