def test(self, test_data):
		"""
		Data should be nx(m+1) numpy matrix where n is the 
		number of examples and m is the number of features
		(recall that the first element of the vector is the label).

		You should print the accuracy, precision, and recall on the test data.
		"""
		if self.classifier_type == 'decision_tree':
			import decision_tree
			decision_tree.test(self.params, test_data)			

		if self.classifier_type == 'naive_bayes':
			import naive_bayes
			naive_bayes.test(self.params, test_data)			

		if self.classifier_type == 'neural_net':
			import neural_nets
			neural_nets.test(self.params, test_data)
Пример #2
0
    def test(self, test_data):
        """
		Data should be nx(m+1) numpy matrix where n is the 
		number of examples and m is the number of features
		(recall that the first element of the vector is the label).

		You should print the accuracy, precision, and recall on the test data.
		"""
        if self.classifier_type == 'decision_tree':
            import decision_tree
            decision_tree.test(self.params, test_data)

        if self.classifier_type == 'naive_bayes':
            import naive_bayes
            naive_bayes.test(self.params, test_data)

        if self.classifier_type == 'neural_net':
            import neural_nets
            neural_nets.test(self.params, test_data)
def main():
    training = read_data(argv[1])
    test = read_data(argv[2])

    classifiers_unfiltered = naive_bayes.train(training, False)
    accuracy_unfiltered_nb = naive_bayes.test(test, False,
                                              classifiers_unfiltered)
    print("Naive Bayes is", "{0:.6f}".format(accuracy_unfiltered_nb),
          "accurate with stop words unfiltered")

    classifiers_filtered = naive_bayes.train(training, True)
    accuracy_filtered_nb = naive_bayes.test(test, True, classifiers_filtered)
    print("Naive Bayes is", "{0:.6f}".format(accuracy_filtered_nb),
          "accurate with stop words filtered")

    for i in range(3, len(argv)):
        print()
        lambda_constant = float(argv[i])

        weights_unfiltered = logistic_regression.train(training, False, 25,
                                                       lambda_constant)
        accuracy_unfiltered_lr = logistic_regression.test(
            test, False, weights_unfiltered)
        print(
            "Logistic Regression is", "{0:.6f}".format(accuracy_unfiltered_lr),
            "accurate with stop words unfiltered and lambda constant equal to",
            lambda_constant)

        weights_filtered = logistic_regression.train(training, True, 25,
                                                     lambda_constant)
        accuracy_filtered_lr = logistic_regression.test(
            test, True, weights_filtered)
        print(
            "Logistic Regression is", "{0:.6f}".format(accuracy_filtered_lr),
            "accurate with stop words filtered and lambda constant equal to",
            lambda_constant)
Пример #4
0
def evaluate_on_each_doc(clf_name, clf, features_doc, labels_doc, phrase_idx_doc, phrase_list, true_keys_doc, N=10):
    precisions = []
    recalls = []
    # go through each document
    docid = 0
    for features, labels, phrase_indices, true_keys in zip(features_doc, labels_doc, phrase_idx_doc, true_keys_doc):
        ###
        print "*docid", docid
        ###
        docid += 1
        if clf_name == 'NB':
            pred_idx = NB.test(clf, N, features)
            pred_keys = []
            # collect all phrases that have pred label 1
            for idx in pred_idx:
                pred_keys.append(phrase_list[phrase_indices[idx]])
            ###
            print "--pred_keys:"
            print pred_keys
            print "--true keys:"
            print true_keys
            ###
            precisions.append(get_precision(true_keys, pred_keys))
            recalls.append(get_recall(true_keys, pred_keys))

        if clf_name == 'svm':
            pred_labels = clf.predict(features)
            confidence_scores = clf.decision_function(features)
            pred_keys = []
            ###
            print '--pred keys:', str(sum(pred_labels))
            ###
            # collect all phrases that has pred label 1
            predictions = zip(pred_labels, phrase_indices, confidence_scores)
            predictions.sort(key=lambda x: x[2], reverse=True)
            for label, idx in zip(pred_labels, phrase_indices):
                if label == 1:
                    pred_keys.append(phrase_list[idx])
            precisions.append(get_precision(true_keys, pred_keys))
            recalls.append(get_recall(true_keys, pred_keys))

    precision_avg = sum(precisions) / len(precisions)
    recall_avg = sum(recalls) / len(recalls)
    return precision_avg, recall_avg
Пример #5
0
def evaluate_one_doc(clf_name, clf, phrases, features, true_keys, N=10):
    pred_idx = []
    if clf_name == 'NB':
        pred_idx = NB.test(clf, N, features)
    if clf_name == 'svm':
        pred_idx= svm.test(clf, N, features)

    pred_keys = []
    print "# pred_keys", len(pred_keys)
    # get top N pred keys
    for idx in pred_idx:
        pred_keys.append(phrases[idx])
    ###
    print "--pred_keys:"
    print pred_keys
    print "--true keys:"
    print true_keys
    ###
    precision = get_precision(true_keys, pred_keys)
    recall = get_recall(true_keys, pred_keys)
    return precision, recall
Пример #6
0
    # sort on importance

    # Print the feature ranking
    #print("Feature ranking:")

    # Plot the feature importances of the forest
    # plt.figure()
    # plt.title("Feature importances")
    # plt.bar( range(nb_grams[features].shape[1]), importances[indices],
    #    color="r", yerr=std[indices], align="center")
    # plt.xticks(range(nb_grams[features].shape[1]), indices)
    # plt.xlim([-1, nb_grams[features].shape[1]])
    # plt.show()

    nb_correct, nb_incorrect, nb_tp, nb_fp, nb_tn, nb_fn, nb_predictions = naive_bayes.test(
        X_test_bal, y_test_bal, nb_classifier, nb_grams)

    # if emotion == 'joy':
    #     for i in range(len(nb_predictions)):
    #         if nb_predictions[i]:
    #             print i
    #     pdb.set_trace()

    for tweet in X_test:
        emotionFound = False

    # get overall counts
    all_correct = 0
    all_wrong = 0
    svm_correct = 0
    lr_correct = 0
"""
Example usage:
$ python predict_custom_input.py "new jerry seinfeld show releases"
new jerry seinfeld show releases
['news']
['e']

"""

import naive_bayes
import sys


if __name__ == '__main__':
	mysql_obj_identifier = naive_bayes.mysql_connection('news_identifier')
	mysql_obj_classifier = naive_bayes.mysql_connection('news_classifier')
	input_text = sys.argv[1]
	input_text = naive_bayes.normalize_text(input_text)
	print input_text
	identifier_output = naive_bayes.test([input_text], mysql_obj_identifier)
	print identifier_output
	if identifier_output[0] == 'news':
		print naive_bayes.test([input_text], mysql_obj_classifier)
Пример #8
0
import naive_bayes as nb
from models.company import *
import pickle
import datetime
import sys
import os


print datetime.datetime.now()
text = "Anyone can create an account and start explaining rap. Highlight any line to explain it yourself, suggest changes to existing explanations, and put up your favorite new songs."

text_2="Coupons.com is a provider of digital coupons, including online printable, coupon codes, save to loyalty card and mobile promotions. The company's products include Coupons.com as well as Grocery iQ and Coupons.com mobile applications."
vcs= db.session.query(VC).filter("id >= 0 AND id <=50").all()
print 'done query'
vc_names = []
vc_urls = []
percent = []
for vc in vcs:
	vc_name = vc.name
	vc_url = vc.url
	vc_model = pickle.loads(vc.nb_model)
	result = nb.test(text=text_2, model=vc_model)
	if result >= .55:
		percent.append(result)
		vc_names.append(vc_name)
		vc_urls.append(vc_url)

print percent
print vc_names
print datetime.datetime.now()
# print results
Пример #9
0
# 1. TRAIN
# 1.1. Load training data
print 'Load training data ...'
training_data = scipy.io.loadmat('spamTrain.mat')
#print training_data
X = training_data['X']
y = training_data['y']
print 'X.shape =', X.shape
print 'y.shape =', y.shape

# 1.2. Train Naive Bayes classifier
print 'Train Naive Bayes classifier ...'
phi, phi0, phi1 = naive_bayes.train(X, y)
print 'phi =', phi
print 'phi0[0:10] =', phi0[0:10]
print 'phi1[0:10] =', phi1[0:10]

# 2. TEST
# 2.1. Load test data
print 'Load test data ...'
test_data = scipy.io.loadmat('spamTest.mat')
X_test = test_data['Xtest']
y_test = test_data['ytest']
print 'X_test.shape =', X_test.shape
print 'y_test.shape =', y_test.shape

# 2.2. Test Naive Bayes classifier
print 'Test Naive Bayes classifier ...'
acc = naive_bayes.test(phi, phi0, phi1, X_test, y_test)
print 'Accuracy =', acc