def classify(self, dataSet): """ Recebe um dataSet e aplica o extrairFrase para o dataSet :param dataSet: :return: """ return nltk.classify.apply_features(self.extrairFrase, dataSet) from database.DataBase import DataBase db = DataBase() dataSet=db.get_all_data_set(['vaticannews', 'semprequestione']) # data=db.get_all_news_from('vaticannews') p = Process(dataSet) sp =p.stemmerAplay() # print(p.extrairFrase()) print(type(p.extrairFrase(dataSet))) print(type(p.freqWords(p.buscaPalavras()).values())) # d=p.freqWords(p.buscaPalavras()). # classificador=nltk.NaiveBayesClassifier.train(d) # # # print(classificador.show_most_informative_features(10)) # for s in st.most_common(50): # print(s)
import nltk from database.DataBase import DataBase from sklearn.model_selection import train_test_split from facebookapi.publushFacebook import PublishFacebook from preprocess.Process import Process from util.Character import removerAcentosECaracteresEspeciais from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.feature_extraction.text import TfidfVectorizer from nltk.corpus import stopwords db = DataBase() data = db.get_all_data_set( ['vaticannews', 'semprequestione', 'acidigital', 'cancaonova']) y = [clazz for (title, news, clazz) in data] X = [news for (title, news, clazz) in data] def train(classifier, X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33) classifier.fit(X_train, y_train) print("Accuracy: %s" % classifier.score(X_test, y_test)) return classifier