class NBTest(unittest.TestCase): def setUp(self): self.mnb = NaiveBayes(multinomial=True) self.skmnb = MultinomialNB() self.bnb = NaiveBayes(bernoulli=True) self.skbnb = BernoulliNB() self.cnb = NaiveBayes(multinomial=True, cnb=True) self.wcnb = NaiveBayes(multinomial=True, wcnb=True) def test_count_vectorized(self): self.mnb.fit(X_count, train_targets) self.skmnb.fit(X_count, train_targets) self.assertEqual(self.mnb.score(X_count_test,test_targets),self.skmnb.score(X_count_test,test_targets)) def test_tfidf_vectorized(self): self.mnb.fit(X_tfidf, train_targets) self.skmnb.fit(X_tfidf, train_targets) self.assertEqual(self.mnb.score(X_tfidf_test, test_targets), self.skmnb.score(X_tfidf_test, test_targets)) def test_cnb(self): self.cnb.fit(X_count, train_targets) self.mnb.fit(X_count, train_targets) cnb_score = self.cnb.score(X_count_test, test_targets) mnb_score = self.mnb.score(X_count_test, test_targets) print "CNB: {}, MNB: {}".format(cnb_score, mnb_score) assert (cnb_score - mnb_score) > -0.1 def test_wcnb(self): self.wcnb.fit(X_count, train_targets) self.mnb.fit(X_count, train_targets) wcnb_score = self.wcnb.score(X_count_test, test_targets) mnb_score = self.mnb.score(X_count_test, test_targets) print "WCNB: {}, MNB: {}".format(wcnb_score, mnb_score) assert (wcnb_score - mnb_score) > -0.5
"""Prediction on the data set""" import pandas as pd from classifiers.naive_bayes import NaiveBayes from classifiers.util import train_test_split, confusion_matrix, accuracy_score # reading the data set df = pd.read_csv("./dataset/processed.csv") X = df.drop("Outcome", axis=1).values y = df["Outcome"].values # making sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20) # training the model nb = NaiveBayes(X_train, y_train) nb.fit(X_train, y_train) # getting the predictions y_predictions = nb.predict(X_test) print(f"The accuracy score :: {accuracy_score(y_predictions, y_test) * 100} %") # confusion matrix print("Confusion Matrix ::") print(f"{confusion_matrix(y_test, y_predictions)}")