#!/usr/bin/env python # -*- coding: utf-8 -*- import read_dataset as rd train, test = rd.read() dataset = rd.get_text(train) file = open('news.txt', 'a') for text in dataset: file.write(text + '\n')
print('Escrevendo arquivo de log\n') file.write('Recall Macro: ' + str(recall_mean) + ' (+/-) ' + str(recall_std * 2) + '\n' ) file.write('Precision Macro: ' + str(precision_mean) + ' (+/-) ' + str(precision_std * 2) + '\n' ) file.write('F1 Macro: ' + str(f1_mean) + ' (+/-) ' +str(f1_std * 2) + '\n' ) file.write('Accuracy: ' + str(accuracy_mean) + ' (+/-) ' +str(accuracy_std * 2) + '\n' ) file.write('\n\n#############################################\n\n') file.close() ## LENDO DATASET ###################### train,test = rd.read() categories = ['fake', 'real'] train_text = rd.get_text(train) train_target = rd.get_target(train) # test_text = rd.get_text(test) # test_target = rd.get_target(test) ################################################# combinations = get_combinations() # combinations = use_custom() for combination in combinations: analisar_features(train_text, stem=combination['stem'], remove_stop_words=combination['remove_stop_words'], remove_punct=combination['remove_punct'],
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.pipeline import Pipeline from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import SGDClassifier from sklearn import metrics import numpy as np from sklearn.datasets import fetch_20newsgroups import read_dataset as rd ## LENDO DATASET ###################### train, test = rd.read(percent_train=.5) categories = ['fake', 'real'] train_text = rd.get_text(train) train_target = rd.get_target(train) test_text = rd.get_text(test) test_target = rd.get_target(test) ################################################# ## TREINANDO NAIVE ## print('Treinando modelo com Naive bayes...') text_clf = Pipeline([ ('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB()),