svm.grid_search_cv(n_jobs=4, verbose=5)
    test_error = svm.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    svm = SVMClassifier(train_reviews, train_labels, ngram_range=(1,2),
                       test_texts=test_reviews, test_labels=test_labels,
                       compute_features=True)
    svm.grid_search_cv(n_jobs=4, verbose=5)
    test_error = svm.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    # Simple bag of words with a logistic classifier
    lr = LogisticClassifier(train_reviews, train_labels,
                            test_texts=test_reviews, test_labels=test_labels,
                            compute_features=True)
    lr.grid_search_cv(verbose=5, n_jobs=4)
    test_error = lr.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    lr = LogisticClassifier(train_reviews, train_labels, ngram_range=(1,2),
                            test_texts=test_reviews, test_labels=test_labels,
                            compute_features=True)
    lr.grid_search_cv(verbose=5, n_jobs=4)
    test_error = lr.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    # SGD up to 3-grams
Exemplo n.º 2
0
    print "Test error: " + str(test_error)

    print "SGD Classifier"
    sgd = SGDTextClassifier(train_reviews,
                            train_labels,
                            test_texts=test_reviews,
                            test_labels=test_labels)
    #train_error = sgd.get_training_error()
    #test_error = sgd.get_test_error()
    #print "Training error: " + str(train_error)
    #print "Test error: " + str(test_error)
    sgd.set_bag_of_ngrams()
    sgd.grid_search_cv(verbose=0, n_jobs=4)

    print "Logistic classifier"
    sgd = LogisticClassifier()
    sgd.set_training_data(train_reviews, train_labels)
    sgd.set_test_data(test_reviews, test_labels)
    sgd.set_bag_of_ngrams()

    sgd.train()
    train_error = sgd.get_training_error()
    test_error = sgd.get_test_error()
    print "Training error: " + str(train_error)
    print "Test error: " + str(test_error)

    print "SVM classifier"
    sgd = SVMClassifier()
    sgd.set_training_data(train_reviews, train_labels)
    sgd.set_test_data(test_reviews, test_labels)
    sgd.set_bag_of_ngrams()
    svm = SVMClassifier(train_reviews,
                        train_labels,
                        ngram_range=(1, 2),
                        test_texts=test_reviews,
                        test_labels=test_labels,
                        compute_features=True)
    svm.grid_search_cv(n_jobs=4, verbose=5)
    test_error = svm.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    # Simple bag of words with a logistic classifier
    lr = LogisticClassifier(train_reviews,
                            train_labels,
                            test_texts=test_reviews,
                            test_labels=test_labels,
                            compute_features=True)
    lr.grid_search_cv(verbose=5, n_jobs=4)
    test_error = lr.get_test_error()
    print "Test error in held out set: " + str(test_error)
    print "=" * 20

    lr = LogisticClassifier(train_reviews,
                            train_labels,
                            ngram_range=(1, 2),
                            test_texts=test_reviews,
                            test_labels=test_labels,
                            compute_features=True)
    lr.grid_search_cv(verbose=5, n_jobs=4)
    test_error = lr.get_test_error()
    nb.set_bag_of_ngrams() # Also can compute bag of words manually
    nb.grid_search_cv(n_jobs=4)

    # Now shit with bigrams too
    sgd = SGDTextClassifier(train_reviews, train_labels, ngram_range=(1,2),
                            test_texts=test_reviews, test_labels=test_labels,
                            compute_features=True)
    sgd.grid_search_cv(n_jobs=4, verbose=1)

    nb = NaiveBayesClassifier(train_reviews, train_labels, ngram_range=(1,2),
                            test_texts=test_reviews, test_labels=test_labels,
                            compute_features=True)
    nb.grid_search_cv(n_jobs=4, verbose=1)

    lr = LogisticClassifier(train_reviews, train_labels, ngram_range=(1,2),
                            test_texts=test_reviews, test_labels=test_labels,
                            compute_features=True)
    lr.grid_search_cv(verbose=5, n_jobs=4)

    # print "Naive Bayes"
    # nb = NaiveBayesClassifier()
    # nb.set_training_data(train_reviews, train_labels)
    # nb.set_test_data(test_reviews, test_labels)
    # nb.set_bag_of_ngrams()
    #
    # nb.train()
    # train_error = nb.get_training_error()
    # test_error = nb.get_test_error()
    # print "Training error: " + str(train_error)
    # print "Test error: " + str(test_error)