Exemplo n.º 1
0
import pickle
from sklearn.naive_bayes import BernoulliNB
from sklearn.feature_extraction.text import CountVectorizer
from filter import Filter
import re
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib

file = open('trainedmodels/mi_bow.pkl', 'rb')
bow = pickle.load(file)  #Load features

myfilter = Filter()
reObject = re.compile('(.*)\t(.*)')
df = myfilter.getFiltered(reObject, open('lib/SMSSpamCollection'))

for_burnolli = CountVectorizer(vocabulary=bow)
joblib.dump(for_burnolli, 'trainedmodels/bow_mi.pkl')
vector = for_burnolli.transform(df['data'])

detector = BernoulliNB().fit(vector, df['label'])
joblib.dump(detector, 'trainedmodels/bernoulli.pkl')
predictions = detector.predict(vector)
print("Accuracy :", accuracy_score(df['label'], predictions))
Exemplo n.º 2
0
from sklearn.externals import joblib
import re
from filter import Filter
from sklearn.metrics import accuracy_score

bow = joblib.load('trainedmodels/bow_mi.pkl')
bernoulli = joblib.load('trainedmodels/bernoulli.pkl')

file = open("lib/sms_for_test.txt")
reObject = re.compile(r'(.*),(spam|ham)$')

myfil = Filter()
msg = myfil.getFiltered(reObject, file)

vectors = bow.transform(msg['label'])
predictions = bernoulli.predict(vectors)
print("Accuracy :", accuracy_score(predictions, msg['data']))
from sklearn.externals import joblib
import re
from filter import Filter
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups
bow = joblib.load('trainedmodels/bow_20.pkl')
tfidf = joblib.load('trainedmodels/tfidf_20.pkl')
detector = joblib.load('trainedmodels/newsgroup.pkl')

# file = open("lib/sms_for_test.txt")
file = open("lib/20ng-test-no-short.txt")
#reObject=re.compile(r'(.*),(spam|ham)$')
reObject = re.compile(r'(.*)\t(.*)')
filter = Filter()
msg = filter.getFiltered(reObject, file)
allowed = ['alt.atheism', 'talk.religion.misc']
msg = msg.loc[msg['label'].isin(allowed)]

msgs_bag_of_word = bow.transform(msg['data'])
# msgs_tfidf=tfidf.transform(msgs_bag_of_word)
predictions = detector.predict(msgs_bag_of_word)
print("Accuarcy", accuracy_score(msg['label'], predictions))