def main(): preparedDataset = readFile('carData') #Para toda execucao da main, randomiza os dados random.shuffle(preparedDataset) dataset = [] #Features dataset.append([]) #Label dataset.append([]) #Separa para dataset[0] como um vetor de vetores, onde cada elemento eh uma linha de features #Para dataset[1] eh o vetor com as labels (classes) for t in preparedDataset: dataset[0].append(t[:-1]) dataset[1].append(t[-1]) #Conjunto de features dataSet_x = dataset[0] #Conjunto de classes dataSet_y = dataset[1] #Repare acima, dataSet_x[0] representa as features da linha 1 do conjunto, bem como dataSet_y[0] eh a classe da linha 1 nTuples = len(dataSet_x) nToTrain = int(math.floor(nTuples * 0.7)) dataSet_x_train = dataSet_x[:nToTrain] dataSet_y_train = dataSet_y[:nToTrain] dataSet_x_test = dataSet_x[nToTrain:] dataSet_y_test = dataSet_y[nToTrain:] #Instancia o NaiveBayes naive = NaiveBayes() #Passa os dados para treino #naive.train(features, class) naive.train(dataSet_x_train, dataSet_y_train) accuracy = 0.0 #Faz a predicao #naive.predict(dados_para_classificar -> apenas features) results = naive.predict(dataSet_x_test) #Faz apenas o "score" do modelos, calculando quantos foram preditos corretamente for index, r in enumerate(results): yPredicted = max(r, key=r.get) y = dataSet_y_test[index] if(y == yPredicted): accuracy += 1.0 print accuracy / len(dataSet_y_test)
def trainTest(self, folds=6, classifier=3, model1="Logistic Regression", model2="Logisitc Regression"): classifierObject = NaiveBayes(model1, model2) # classifierObject.gatherData() print model1, model2 classifierObject.initializeModels(model1, model2) classifierObject.kfold(folds, classifier) classifierObject.createModel()
def run_naive_bayes(df, num_features): """ This function runs naive on the data frame and outputs statistics from five experiments :param df: The data set to run the algorithm on= :param num_features: The number of features in this dataset """ # Split dataset 5-fold stratified print(f"Size of total dataset = {len(df)}") train1, train2, train3, train4, train5 = split_into_random_stratified_groups( df) datasets = [train1, train2, train3, train4, train5] nb_scores = [] for i, d in enumerate(datasets): print("-------------") print(f"Experiment #{i + 1}") print("-------------") # Use one subset as a test set df_test = datasets[i] print(f"Test set size = {len(df_test)}") training_sets = datasets.copy() # Create a training set from remaining subsets del training_sets[i] df_train = pd.concat(training_sets) print(f"Training set size = {len(df_train)}") # Create Naive Bayes nb = NaiveBayes(df_train.iloc[:, 0:num_features], df_train.iloc[:, num_features], df_test.iloc[:, 0:num_features], df_test.iloc[:, num_features]) # Train with naive bayes nb.learn() # Test the accuracy of naive bayes nb_accuracy = nb.validate() print('Naive Bayes Percent accurate: ' + repr(nb_accuracy) + '%') nb_scores.append(nb_accuracy) return statistics.mean(nb_scores)
def main(): data = loadData() print("\n############### PROPROCESS DATA ##################") preprocessor = Precessor(missing_value_method="remove") cleanDataset = preprocessor.preprocess(data) print("\n###### DATA INFORMATION AFTER PREPROCESSING ######") print(cleanDataset.info()) naiveBayesModel = NaiveBayes() naiveBayesModel.setRomovedCol(["race", "native_country"]) naiveBayesModel.Kfold_cross_validation(cleanDataset, 10) naiveBayesModel.plotConfusionMatrix()
np.random.seed(13) np.random.shuffle(idx) X = X[idx] y = y[idx] # split the data Xtrain = X[:nTrain, :] ytrain = y[:nTrain] Xtest = X[nTrain:, :] ytest = y[nTrain:] # train the online naive Bayes modelONB = OnlineNaiveBayes(useLaplaceSmoothing=True) for i in range(nTrain): modelONB.fit(Xtrain[i:i+2, :], ytrain[i:i+2]) # train two instances at a time # train the boosted ONB modelNB = NaiveBayes(useLaplaceSmoothing=True) modelNB.fit(Xtrain, ytrain) # output predictions on the remaining data ypred_ONB = modelONB.predict(Xtest) ypred_NB = modelNB.predict(Xtest) # compute the training accuracy of the model accuracyONB = accuracy_score(ytest, ypred_ONB) accuracyNB = accuracy_score(ytest, ypred_NB) print "Online Naive Bayes Accuracy = "+str(accuracyONB) print "Batch Naive Bayes Accuracy = "+str(accuracyNB)
docId, wordId, count = map(int, row) X_train.append([docId, wordId, count]) X_train = np.array(X_train) trainDataFile.close() testDataFile = open('Data/test.data', 'r') testReader = csv.reader(testDataFile, delimiter=' ') for row in testReader: docId, wordId, count = map(int, row) X_test.append([docId, wordId, count]) X_test = np.array(X_test) testDataFile.close() alpha = 1.0 / V print 'Initializing Naive Bayes classifier...' NBClassifier = NaiveBayes(V, numY) print 'Training the model...' NBClassifier.train(X_train, y_train, alpha) print 'Making predictions on test data...' y_predicted = NBClassifier.predict(X_test) # Solution for question 3.2 print 'Accuracy of Naive Bayes classifier: ' + str( getAccuracy(y_test, y_predicted)) print #print 'Confusion Matrix:' #print #printConfusionMatrix(y_test, y_predicted, range(1,21)) #print # Solution for question 3.4
def test(self, summarizer="LexRank"): classifierObject = NaiveBayes() classifierObject.test(summarizer)
trainIndex = [] for (label, i) in trainlabels: trainX.append(data[i]) trainY.append(label) trainIndex.append(i) testX = [] testIndex = [i for i in range(len(data)) if i not in trainIndex] for i in testIndex: testX.append(data[i]) ################# USING NAIVE BAYES TO PREDICT SCORE ################# clf = NaiveBayes() clf.train(trainX, trainY) pred = clf.prediction(testX) if '-save' in sys.argv: writeFile = open(resultFile, 'w') for (idx, p) in zip(testIndex, pred): print(p, idx) writeFile.writelines('{} {}\n'.format(p, idx)) writeFile.close() else: for (idx, p) in zip(testIndex, pred): print(p, idx)
randItemIterator3 = 4 print() print( "############################################################################" ) print( "-------------------------Problem 2 question 1a/2----------------------------" ) print( "---Testing the prediction on the test data along with scatter plot and ROC--" ) print( "############################################################################" ) NaiveBayes(set1, randItemIterator1, 1, 1, m1, m2, stdDev1, stdDev2) # print() # print("############################################################################") # print("-------------------------Problem 2 question 1b------------------------------") # print("---------Changing number of samples [10, 20, 50, 100, 300, 500]-------------") # print("############################################################################") # NaiveBayes(set2, randItemIterator2, 0, 0, m1, m2, stdDev1, stdDev2) # print() print( "############################################################################" ) print( "-------------------------Problem 2 question 1c/2----------------------------" )
def predict(model, dataset, vari, deli, day): X,Y = dataset.bootstrap(vari, deli, day, n=10**4) model.fit(X, Y) x = dataset.get_x(vari, deli, day) return model.predict(x) if __name__=='__main__': vari,deli,m = 'cu','1712',7 db = DB() days = db.execute("select date from contract_daily where vari=%s and deli=%s order by date asc", (vari, deli))[30:] data = DataSet(m, 'direction') model = NaiveBayes(m) y_real = [] y_pred = [] for day in days: model.fit(*data.bootstrap(vari, deli, day, 300)) r = model.predict(data.get_x(vari, deli, day)) if r[0]>=r[1] and r[0]>=r[2]: r = 0 elif r[1]>=r[0] and r[1]>=r[2]: r = 1 else: r = 2 y_pred.append(r) y_real.append(data.get_y(vari, deli, day))
y = y[idx] # split the data Xtrain = X[:nTrain, :] ytrain = y[:nTrain] Xtest = X[nTrain:, :] ytest = y[nTrain:] # for i in range(len(Xtest)): # for j in range(len(Xtest[0])): # if(Xtest[i][j]>16): # print "out" # train the decision tree modelDT = DecisionTreeClassifier() modelDT.fit(Xtrain, ytrain) # train the naive Bayes modelNB = NaiveBayes(useLaplaceSmoothing=True) modelNB.fit(Xtrain, ytrain) # output predictions on the remaining data ypred_DT = modelDT.predict(Xtest) ypred_NB = modelNB.predict(Xtest) # compute the training accuracy of the model accuracyDT = accuracy_score(ytest, ypred_DT) accuracyNB = accuracy_score(ytest, ypred_NB) print "Decision Tree Accuracy = " + str(accuracyDT) print "Naive Bayes Accuracy = " + str(accuracyNB)
def __init__(self): # Janela Inicial self.window = Tk() variavel_controlo = BooleanVar(self.window) nb=NaiveBayes() self.window.title('Filtro de Spam') self.window.resizable(False, False) self.window.geometry('600x450+584+230') labelwel = Label(self.window) labelwel.place(relx=0.125, rely=0.325, height=75, width=450) labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificadores Filtro Spam''', font=("Rockwell", 14)) def menuNaiveBayes(): # janela do filtro de spam Naive Bayes self.window2 = Tk() variavel_controlo = BooleanVar(self.window2) nb=NaiveBayes() self.window2.title('Naïve Bayes') self.window2.resizable(False, False) self.window2.geometry('600x450+584+230') self.window2.configure(background="#91cded") labelwel = Label(self.window2) labelwel.place(relx=0.25, rely=0.067, height=35, width=305) labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Naïve Bayes''', font=("Rockwell", 14)) frame1 = Frame(self.window2) frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750) frame1.configure(relief='groove', borderwidth="2", width=305) labelframe = LabelFrame(frame1, text="Opções") labelframe.pack(fill="both", expand="yes") labelframe.configure(background="#f7f7f7") frame2 = Frame(self.window2) frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350) frame2.configure(relief='groove', borderwidth="2", width=305) labelframe2 = LabelFrame(frame2, text="Métricas") labelframe2.pack(fill="both", expand="yes") frame3 = Frame(self.window2) frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350) frame3.configure(relief='groove', borderwidth="2", width=305) labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes") labelframe3.pack(fill="both", expand="yes") def menuLista(): # Janela de frequência de palavras absolutas e relativas do Naive Bayes if variavel_controlo.get() == False: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") else: palavras = nb.matrizP palavrasRel = nb.matrizP_Relativa window9 = Toplevel() window9.resizable(False, False) window9.title('Frequências de Palavras') window9.geometry("700x450+517+216") frame1 = Frame(window9) frame1.place(relx=0.0, rely=0.0, relheight=1.011, relwidth=0.508) frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded") titleA = Label(frame1) titleA.place(relx=0.325, rely=0.0, height=40, width=125) titleA.configure(relief="ridge", text="""Absolutas""", fg='white', background="#028f99", font=("Rockwell", 10)) labeltitle = Label(frame1) labeltitle.place(relx=0.035, rely=0.138, height=31, width=80) labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle2 = Label(frame1) labeltitle2.place(relx=0.625, rely=0.138, height=31, width=50) labeltitle2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle3 = Label(frame1) labeltitle3.place(relx=0.805, rely=0.138, height=31, width=50) labeltitle3.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9)) palAbs = Text(frame1) for i, j in palavras.items(): lista = "{:22s}".format(str(i)) + "{:3s}".format(str(j[0])) + " " + "{:3s}".format(str(j[1])) + "\n" palAbs.insert(END, lista) palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915) palAbs.configure(width=174, state='disabled') frame2 = Frame(window9) frame2.place(relx=0.5, rely=0.0, relheight=1.011, relwidth=0.508) frame2.configure(relief='raised', borderwidth="2", width=700, background="#91cded") title = Label(frame2) title.place(relx=0.325, rely=0.0, height=40, width=125) title.configure(relief="ridge", text="""Relativas""", fg='white', background="#028f99", font=("Rockwell", 10)) labeltitle2 = Label(frame2) labeltitle2.place(relx=0.035, rely=0.138, height=31, width=80) labeltitle2.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle2_2 = Label(frame2) labeltitle2_2.place(relx=0.565, rely=0.138, height=31, width=50) labeltitle2_2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle3_2 = Label(frame2) labeltitle3_2.place(relx=0.765, rely=0.138, height=31, width=50) labeltitle3_2.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9)) palRel = Text(frame2) for i, j in palavrasRel.items(): lista = "{:18s}".format(str(i)) + "{:0.4f}".format(j[0]) + " " + "{:0.4f}".format(j[1]) + "\n" palRel.insert(END, lista) palRel.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915) palRel.configure(width=174, state='disabled') window9.mainloop() def iniciar(): # Funcao para iniciar o algoritmo de Naive Bayes variavel_controlo.set(True) nb.algoritmo() def exatidao(): # Funcao para retornar o valor da exatidao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(nb.exatidao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def sensibilidade(): # Funcao para retornar o valor da sensibilidade de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(nb.sensibilidade)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def precisao(): # Funcao para retornar o valor da precisao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(nb.precisao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def fScore(): # Funcao para retornar o valor de fScore de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(nb.fScore)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def MatrizC(): # Funcao para retornar a matriz de confusao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz de Confusão", message=str(nb.confMatriz)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def MatrizRel(): # Funcao para retornar a matriz de confusao relativa de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz Conf. Relativa", message=str(nb.confMatriz_Relativa)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") buttonlista = Button(frame1, command=lambda: iniciar()) buttonlista.place(relx=0.075, rely=0.400, height=35, width=150) buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11)) buttonclose = Button(frame1, command=self.window2.destroy) buttonclose.place(relx=0.475, rely=0.400, height=35, width=200) buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11)) buttonExat = Button(frame2, command=lambda: exatidao()) buttonExat.place(relx=0.135, rely=0.150, height=30, width=150) buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonSensibilidade = Button(frame2, command=lambda: sensibilidade()) buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150) buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonPrecisao = Button(frame2, command=lambda: precisao()) buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150) buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonfScore = Button(frame2, command=lambda: fScore()) buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150) buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonLista = Button(frame3, command=lambda: menuLista()) buttonLista.place(relx=0.075, rely=0.225, height=30, width=175) buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizC = Button(frame3, command=lambda: MatrizC()) buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175) buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizRel = Button(frame3, command=lambda: MatrizRel()) buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175) buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) def menuPerceptron(): # Janela do filtro de spam Perceptrao self.window3 = Tk() variavel_controlo = BooleanVar(self.window3) pt = Perceptron() self.window3.title('Perceptron') self.window3.resizable(False, False) self.window3.geometry('600x475+584+230') self.window3.configure(background="#91cded") labelwel = Label(self.window3) labelwel.place(relx=0.25, rely=0.067, height=35, width=305) labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Perceptron''', font=("Rockwell", 14)) frame1 = Frame(self.window3) frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750) frame1.configure(relief='groove', borderwidth="2", width=305) labelframe = LabelFrame(frame1, text="Opções") labelframe.pack(fill="both", expand="yes") labelframe.configure(background="#f7f7f7") frame2 = Frame(self.window3) frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350) frame2.configure(relief='groove', borderwidth="2", width=305) labelframe2 = LabelFrame(frame2, text="Métricas") labelframe2.pack(fill="both", expand="yes") frame3 = Frame(self.window3) frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350) frame3.configure(relief='groove', borderwidth="2", width=305) labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes") labelframe3.pack(fill="both", expand="yes") def menuListaPerc(): # Janela da quantidade de palavras do Perceptron if variavel_controlo.get() == False: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") else: palavras = pt.freq_Pal window10 = Toplevel() window10.resizable(False, False) window10.title('Frequências de Palavras') window10.geometry("350x450+517+216") frame1 = Frame(window10) frame1.place(relx=0.0, rely=0.0, relheight=1, relwidth=1) frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded") titleA = Label(frame1) titleA.place(relx=0.325, rely=0.0, height=40, width=125) titleA.configure(relief="ridge", text="""Frequência""", fg='white', background="#028f99", font=("Rockwell", 10)) labeltitle = Label(frame1) labeltitle.place(relx=0.035, rely=0.138, height=31, width=80) labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle2 = Label(frame1) labeltitle2.place(relx=0.665, rely=0.138, height=31, width=100) labeltitle2.configure(relief="ridge", text="""Quantidade""", fg='white', background="#0082ba", font=("Rockwell", 9)) palAbs = Text(frame1) for chave, valor in palavras.items(): lista = "{:26s}".format(str(chave)) + "{:3s}\n".format(str(valor)) palAbs.insert(END, lista) palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915) palAbs.configure(width=174, state='disabled') window10.mainloop() def iniciar(): # Funcao para iniciar o algoritmo de Perceptron variavel_controlo.set(True) pt.algoritmo() def exatidao(): # Funcao para retornar o valor da exatidao de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(pt.exatidao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") def sensibilidade(): # Funcao para retornar o valor da sensibilidade de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(pt.sensibilidade)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") def precisao(): # Funcao para retornar o valor da precisao de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(pt.precisao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") def fScore(): # Funcao para retornar o valor de fscore de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(pt.fScore)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") def MatrizC(): # Funcao para retornar o valor da matriz de confusao de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz de Confusão", message=str(pt.matriz_Confusao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") def MatrizRel(): # Funcao para retornar o valor da matriz de confusao relativa de Perceptron if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz Conf. Relativa", message=str(pt.matriz_Relativa)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.") buttonlista = Button(frame1, command=lambda: iniciar()) buttonlista.place(relx=0.075, rely=0.400, height=35, width=150) buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11)) buttonclose = Button(frame1, command=self.window3.destroy) buttonclose.place(relx=0.475, rely=0.400, height=35, width=200) buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11)) buttonExat = Button(frame2, command=lambda: exatidao()) buttonExat.place(relx=0.135, rely=0.150, height=30, width=150) buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonSensibilidade = Button(frame2, command=lambda: sensibilidade()) buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150) buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonPrecisao = Button(frame2, command=lambda: precisao()) buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150) buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonfScore = Button(frame2, command=lambda: fScore()) buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150) buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonLista = Button(frame3, command=lambda: menuListaPerc()) buttonLista.place(relx=0.075, rely=0.225, height=30, width=175) buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizC = Button(frame3, command=lambda: MatrizC()) buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175) buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizRel = Button(frame3, command=lambda: MatrizRel()) buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175) buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) def creditos(): # Funcao para mostrar os creditos do trabalho messagebox.showinfo('Créditos',""" Elaborado por: - {:20s} - Nº 30003769 - {:20s} - Nº 30003043 - {:20s} - Nº 30005711 - {:20s} - Nº 30003039 Inteligência Artificial @UAL - Universidade Autónoma de Lisboa """.format('Bruno Silva', 'David Monteiro', 'Nuno Barrocas', 'Zacarias Chiena')) def quit_program(): # Funcao para Sair do programa self.window.quit() self.window.destroy() def on_closing(): # Funcao para terminar/fechar a janela do programa de filtro de spam if messagebox.askokcancel("Fechar", "De certeza que quer sair?"): self.window.destroy() self.window2.destroy() self.window3.destroy() self.window.protocol("WM_DELETE_WINDOW", on_closing) menubar = Menu(self.window) gamemenu = Menu(menubar, tearoff=0) gamemenu.add_command(label="Sobre", command=lambda: creditos()) gamemenu.add_command(label="Sair", command=lambda: quit_program()) menubar.add_cascade(label="Opções", menu=gamemenu) menubar.add_cascade(label="Naïve Bayes", command=lambda: menuNaiveBayes()) menubar.add_cascade(label="Perceptron", command=lambda: menuPerceptron()) self.window.config(menu=menubar) self.window.mainloop()
def menuNaiveBayes(): # janela do filtro de spam Naive Bayes self.window2 = Tk() variavel_controlo = BooleanVar(self.window2) nb=NaiveBayes() self.window2.title('Naïve Bayes') self.window2.resizable(False, False) self.window2.geometry('600x450+584+230') self.window2.configure(background="#91cded") labelwel = Label(self.window2) labelwel.place(relx=0.25, rely=0.067, height=35, width=305) labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Naïve Bayes''', font=("Rockwell", 14)) frame1 = Frame(self.window2) frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750) frame1.configure(relief='groove', borderwidth="2", width=305) labelframe = LabelFrame(frame1, text="Opções") labelframe.pack(fill="both", expand="yes") labelframe.configure(background="#f7f7f7") frame2 = Frame(self.window2) frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350) frame2.configure(relief='groove', borderwidth="2", width=305) labelframe2 = LabelFrame(frame2, text="Métricas") labelframe2.pack(fill="both", expand="yes") frame3 = Frame(self.window2) frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350) frame3.configure(relief='groove', borderwidth="2", width=305) labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes") labelframe3.pack(fill="both", expand="yes") def menuLista(): # Janela de frequência de palavras absolutas e relativas do Naive Bayes if variavel_controlo.get() == False: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") else: palavras = nb.matrizP palavrasRel = nb.matrizP_Relativa window9 = Toplevel() window9.resizable(False, False) window9.title('Frequências de Palavras') window9.geometry("700x450+517+216") frame1 = Frame(window9) frame1.place(relx=0.0, rely=0.0, relheight=1.011, relwidth=0.508) frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded") titleA = Label(frame1) titleA.place(relx=0.325, rely=0.0, height=40, width=125) titleA.configure(relief="ridge", text="""Absolutas""", fg='white', background="#028f99", font=("Rockwell", 10)) labeltitle = Label(frame1) labeltitle.place(relx=0.035, rely=0.138, height=31, width=80) labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle2 = Label(frame1) labeltitle2.place(relx=0.625, rely=0.138, height=31, width=50) labeltitle2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle3 = Label(frame1) labeltitle3.place(relx=0.805, rely=0.138, height=31, width=50) labeltitle3.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9)) palAbs = Text(frame1) for i, j in palavras.items(): lista = "{:22s}".format(str(i)) + "{:3s}".format(str(j[0])) + " " + "{:3s}".format(str(j[1])) + "\n" palAbs.insert(END, lista) palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915) palAbs.configure(width=174, state='disabled') frame2 = Frame(window9) frame2.place(relx=0.5, rely=0.0, relheight=1.011, relwidth=0.508) frame2.configure(relief='raised', borderwidth="2", width=700, background="#91cded") title = Label(frame2) title.place(relx=0.325, rely=0.0, height=40, width=125) title.configure(relief="ridge", text="""Relativas""", fg='white', background="#028f99", font=("Rockwell", 10)) labeltitle2 = Label(frame2) labeltitle2.place(relx=0.035, rely=0.138, height=31, width=80) labeltitle2.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle2_2 = Label(frame2) labeltitle2_2.place(relx=0.565, rely=0.138, height=31, width=50) labeltitle2_2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9)) labeltitle3_2 = Label(frame2) labeltitle3_2.place(relx=0.765, rely=0.138, height=31, width=50) labeltitle3_2.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9)) palRel = Text(frame2) for i, j in palavrasRel.items(): lista = "{:18s}".format(str(i)) + "{:0.4f}".format(j[0]) + " " + "{:0.4f}".format(j[1]) + "\n" palRel.insert(END, lista) palRel.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915) palRel.configure(width=174, state='disabled') window9.mainloop() def iniciar(): # Funcao para iniciar o algoritmo de Naive Bayes variavel_controlo.set(True) nb.algoritmo() def exatidao(): # Funcao para retornar o valor da exatidao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(nb.exatidao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def sensibilidade(): # Funcao para retornar o valor da sensibilidade de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(nb.sensibilidade)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def precisao(): # Funcao para retornar o valor da precisao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(nb.precisao)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def fScore(): # Funcao para retornar o valor de fScore de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(nb.fScore)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def MatrizC(): # Funcao para retornar a matriz de confusao de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz de Confusão", message=str(nb.confMatriz)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") def MatrizRel(): # Funcao para retornar a matriz de confusao relativa de Naive Bayes if variavel_controlo.get() == True: messagebox.showinfo(title="Matriz Conf. Relativa", message=str(nb.confMatriz_Relativa)) else: messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.") buttonlista = Button(frame1, command=lambda: iniciar()) buttonlista.place(relx=0.075, rely=0.400, height=35, width=150) buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11)) buttonclose = Button(frame1, command=self.window2.destroy) buttonclose.place(relx=0.475, rely=0.400, height=35, width=200) buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11)) buttonExat = Button(frame2, command=lambda: exatidao()) buttonExat.place(relx=0.135, rely=0.150, height=30, width=150) buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonSensibilidade = Button(frame2, command=lambda: sensibilidade()) buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150) buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonPrecisao = Button(frame2, command=lambda: precisao()) buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150) buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonfScore = Button(frame2, command=lambda: fScore()) buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150) buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonLista = Button(frame3, command=lambda: menuLista()) buttonLista.place(relx=0.075, rely=0.225, height=30, width=175) buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizC = Button(frame3, command=lambda: MatrizC()) buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175) buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10)) buttonMatrizRel = Button(frame3, command=lambda: MatrizRel()) buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175) buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
from naiveBayes import NaiveBayes from glob import glob r = NaiveBayes('../data/nb/20news-bydate-train/') r.train() folders = ['talk.politics.mideast', 'sci.crypt', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'rec.sport.baseball'] for folder in folders: for file in glob('../data/nb/20news-bydate-test/' + folder + "/*"): print file r.testFile(file) break break