Пример #1
0
 def testNaiveBayesSK2(self):
     categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
     newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
     vectorizer = TfidfVectorizer()
     # Both vectors and vectors_test are SciPy CSR matrix
     vectors = vectorizer.fit_transform(newsgroups_train.data)
     vectors_test = vectorizer.transform(newsgroups_test.data)
     nb = NaiveBayes(sqlCtx)
     nb.fit(vectors, newsgroups_train.target)
     pred = nb.predict(vectors_test)
     score = metrics.f1_score(newsgroups_test.target, pred, average='weighted')
     self.failUnless(score > 0.8)
Пример #2
0
 def testNaiveBayesSK2(self):
     categories = [
         'alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space'
     ]
     newsgroups_train = fetch_20newsgroups(subset='train',
                                           categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test',
                                          categories=categories)
     vectorizer = TfidfVectorizer()
     # Both vectors and vectors_test are SciPy CSR matrix
     vectors = vectorizer.fit_transform(newsgroups_train.data)
     vectors_test = vectorizer.transform(newsgroups_test.data)
     nb = NaiveBayes(sqlCtx)
     nb.fit(vectors, newsgroups_train.target)
     pred = nb.predict(vectors_test)
     score = metrics.f1_score(newsgroups_test.target,
                              pred,
                              average='weighted')
     self.failUnless(score > 0.8)
Пример #3
0
 def testNaiveBayesSK1(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:.9 * n_samples]
     y_train = y_digits[:.9 * n_samples]
     X_test = X_digits[.9 * n_samples:]
     y_test = y_digits[.9 * n_samples:]
     nb = NaiveBayes(sqlCtx)
     score = nb.fit(X_train, y_train).score(X_test, y_test)
     self.failUnless(score > 0.85)
 def test_naive_bayes(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     nb = NaiveBayes(sqlCtx)
     score = nb.fit(X_train, y_train).score(X_test, y_test)
     self.failUnless(score > 0.8)
Пример #5
0
 def testNaiveBayesSK1(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:.9 * n_samples]
     y_train = y_digits[:.9 * n_samples]
     X_test = X_digits[.9 * n_samples:]
     y_test = y_digits[.9 * n_samples:]
     nb = NaiveBayes(sqlCtx)
     score = nb.fit(X_train, y_train).score(X_test, y_test)
     self.failUnless(score > 0.85)
 def test_naive_bayes(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     nb = NaiveBayes(sqlCtx)
     score = nb.fit(X_train, y_train).score(X_test, y_test)
     self.failUnless(score > 0.8)
Пример #7
0
 def test_naive_bayes1(self):
     categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
     newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
     vectorizer = TfidfVectorizer()
     # Both vectors and vectors_test are SciPy CSR matrix
     vectors = vectorizer.fit_transform(newsgroups_train.data)
     vectors_test = vectorizer.transform(newsgroups_test.data)
     nb = NaiveBayes(sparkSession)
     mllearn_predicted = nb.fit(vectors, newsgroups_train.target).predict(vectors_test)
     from sklearn.naive_bayes import MultinomialNB
     clf = MultinomialNB()
     sklearn_predicted = clf.fit(vectors, newsgroups_train.target).predict(vectors_test)
     self.failUnless(accuracy_score(sklearn_predicted, mllearn_predicted) > 0.95 )
Пример #8
0
 def test_naive_bayes1(self):
     categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
     newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
     vectorizer = TfidfVectorizer()
     # Both vectors and vectors_test are SciPy CSR matrix
     vectors = vectorizer.fit_transform(newsgroups_train.data)
     vectors_test = vectorizer.transform(newsgroups_test.data)
     nb = NaiveBayes(sparkSession)
     mllearn_predicted = nb.fit(vectors, newsgroups_train.target).predict(vectors_test)
     from sklearn.naive_bayes import MultinomialNB
     clf = MultinomialNB()
     sklearn_predicted = clf.fit(vectors, newsgroups_train.target).predict(vectors_test)
     self.failUnless(accuracy_score(sklearn_predicted, mllearn_predicted) > 0.95 )
Пример #9
0
 def test_naive_bayes(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     nb = NaiveBayes(sparkSession)
     mllearn_predicted = nb.fit(X_train, y_train).predict(X_test)
     from sklearn.naive_bayes import MultinomialNB
     clf = MultinomialNB()
     sklearn_predicted = clf.fit(X_train, y_train).predict(X_test)
     self.failUnless(accuracy_score(sklearn_predicted, mllearn_predicted) > 0.95 )
Пример #10
0
 def test_naive_bayes(self):
     digits = datasets.load_digits()
     X_digits = digits.data
     y_digits = digits.target
     n_samples = len(X_digits)
     X_train = X_digits[:int(.9 * n_samples)]
     y_train = y_digits[:int(.9 * n_samples)]
     X_test = X_digits[int(.9 * n_samples):]
     y_test = y_digits[int(.9 * n_samples):]
     nb = NaiveBayes(sparkSession)
     mllearn_predicted = nb.fit(X_train, y_train).predict(X_test)
     from sklearn.naive_bayes import MultinomialNB
     clf = MultinomialNB()
     sklearn_predicted = clf.fit(X_train, y_train).predict(X_test)
     self.failUnless(accuracy_score(sklearn_predicted, mllearn_predicted) > 0.95 )