def main():

    preparedDataset = readFile('carData')

    #Para toda execucao da main, randomiza os dados
    random.shuffle(preparedDataset)

    dataset = []
    #Features
    dataset.append([])
    #Label
    dataset.append([])

    #Separa para dataset[0] como um vetor de vetores, onde cada elemento eh uma linha de features
    #Para dataset[1] eh o vetor com as labels (classes)
    for t in preparedDataset:
        dataset[0].append(t[:-1])
        dataset[1].append(t[-1])

    #Conjunto de features
    dataSet_x = dataset[0]
    #Conjunto de classes
    dataSet_y = dataset[1]
    #Repare acima, dataSet_x[0] representa as features da linha 1 do conjunto, bem como dataSet_y[0] eh a classe da linha 1

    nTuples = len(dataSet_x)

    nToTrain = int(math.floor(nTuples * 0.7))

    dataSet_x_train = dataSet_x[:nToTrain]
    dataSet_y_train = dataSet_y[:nToTrain]

    dataSet_x_test = dataSet_x[nToTrain:]
    dataSet_y_test = dataSet_y[nToTrain:]

    #Instancia o NaiveBayes
    naive = NaiveBayes()

    #Passa os dados para treino
    #naive.train(features, class)
    naive.train(dataSet_x_train, dataSet_y_train)

    accuracy = 0.0

    #Faz a predicao
    #naive.predict(dados_para_classificar -> apenas features)
    results = naive.predict(dataSet_x_test)

    #Faz apenas o "score" do modelos, calculando quantos foram preditos corretamente
    for index, r in enumerate(results):
        yPredicted = max(r, key=r.get)
        y = dataSet_y_test[index]

        if(y == yPredicted):
            accuracy += 1.0

    print accuracy / len(dataSet_y_test)
示例#2
0
 def trainTest(self,
               folds=6,
               classifier=3,
               model1="Logistic Regression",
               model2="Logisitc Regression"):
     classifierObject = NaiveBayes(model1, model2)
     # classifierObject.gatherData()
     print model1, model2
     classifierObject.initializeModels(model1, model2)
     classifierObject.kfold(folds, classifier)
     classifierObject.createModel()
示例#3
0
def run_naive_bayes(df, num_features):
    """
        This function runs naive on the data frame and outputs statistics from five experiments
        :param df: The data set to run the algorithm on=
        :param num_features: The number of features in this dataset
        """
    # Split dataset 5-fold stratified
    print(f"Size of total dataset = {len(df)}")
    train1, train2, train3, train4, train5 = split_into_random_stratified_groups(
        df)
    datasets = [train1, train2, train3, train4, train5]
    nb_scores = []
    for i, d in enumerate(datasets):
        print("-------------")
        print(f"Experiment #{i + 1}")
        print("-------------")

        # Use one subset as a test set
        df_test = datasets[i]
        print(f"Test set size = {len(df_test)}")
        training_sets = datasets.copy()

        # Create a training set from remaining subsets
        del training_sets[i]
        df_train = pd.concat(training_sets)
        print(f"Training set size = {len(df_train)}")

        # Create Naive Bayes
        nb = NaiveBayes(df_train.iloc[:, 0:num_features],
                        df_train.iloc[:, num_features],
                        df_test.iloc[:, 0:num_features],
                        df_test.iloc[:, num_features])

        # Train with naive bayes
        nb.learn()

        # Test the accuracy of naive bayes
        nb_accuracy = nb.validate()
        print('Naive Bayes Percent accurate: ' + repr(nb_accuracy) + '%')
        nb_scores.append(nb_accuracy)

    return statistics.mean(nb_scores)
示例#4
0
def main():

    data = loadData()

    print("\n############### PROPROCESS DATA ##################")
    preprocessor = Precessor(missing_value_method="remove")
    cleanDataset = preprocessor.preprocess(data)

    print("\n###### DATA INFORMATION AFTER PREPROCESSING ######")
    print(cleanDataset.info())

    naiveBayesModel = NaiveBayes()
    naiveBayesModel.setRomovedCol(["race", "native_country"])
    naiveBayesModel.Kfold_cross_validation(cleanDataset, 10)

    naiveBayesModel.plotConfusionMatrix()
np.random.seed(13)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain, :]
ytrain = y[:nTrain]
Xtest = X[nTrain:, :]
ytest = y[nTrain:]

# train the online naive Bayes
modelONB = OnlineNaiveBayes(useLaplaceSmoothing=True)
for i in range(nTrain):
    modelONB.fit(Xtrain[i:i+2, :], ytrain[i:i+2])  # train two instances at a time

# train the boosted ONB
modelNB = NaiveBayes(useLaplaceSmoothing=True)
modelNB.fit(Xtrain, ytrain)

# output predictions on the remaining data
ypred_ONB = modelONB.predict(Xtest)
ypred_NB = modelNB.predict(Xtest)

# compute the training accuracy of the model
accuracyONB = accuracy_score(ytest, ypred_ONB)
accuracyNB = accuracy_score(ytest, ypred_NB)

print "Online Naive Bayes Accuracy = "+str(accuracyONB)
print "Batch Naive Bayes Accuracy = "+str(accuracyNB)
示例#6
0
        docId, wordId, count = map(int, row)
        X_train.append([docId, wordId, count])
    X_train = np.array(X_train)
    trainDataFile.close()

    testDataFile = open('Data/test.data', 'r')
    testReader = csv.reader(testDataFile, delimiter=' ')
    for row in testReader:
        docId, wordId, count = map(int, row)
        X_test.append([docId, wordId, count])
    X_test = np.array(X_test)
    testDataFile.close()

    alpha = 1.0 / V
    print 'Initializing Naive Bayes classifier...'
    NBClassifier = NaiveBayes(V, numY)
    print 'Training the model...'
    NBClassifier.train(X_train, y_train, alpha)
    print 'Making predictions on test data...'
    y_predicted = NBClassifier.predict(X_test)

    # Solution for question 3.2
    print 'Accuracy of Naive Bayes classifier: ' + str(
        getAccuracy(y_test, y_predicted))
    print
    #print 'Confusion Matrix:'
    #print
    #printConfusionMatrix(y_test, y_predicted, range(1,21))
    #print

    # Solution for question 3.4
示例#7
0
 def test(self, summarizer="LexRank"):
     classifierObject = NaiveBayes()
     classifierObject.test(summarizer)
示例#8
0
trainIndex = []

for (label, i) in trainlabels:
    trainX.append(data[i])
    trainY.append(label)
    trainIndex.append(i)

testX = []
testIndex = [i for i in range(len(data)) if i not in trainIndex]

for i in testIndex:
    testX.append(data[i])

################# USING NAIVE BAYES TO PREDICT SCORE #################

clf = NaiveBayes()
clf.train(trainX, trainY)
pred = clf.prediction(testX)

if '-save' in sys.argv:
    writeFile = open(resultFile, 'w')
    
    for (idx, p) in zip(testIndex, pred):
        print(p, idx)
        writeFile.writelines('{} {}\n'.format(p, idx))
    
    writeFile.close()

else:
    for (idx, p) in zip(testIndex, pred):
        print(p, idx)
示例#9
0
    randItemIterator3 = 4

    print()
    print(
        "############################################################################"
    )
    print(
        "-------------------------Problem 2 question 1a/2----------------------------"
    )
    print(
        "---Testing the prediction on the test data along with scatter plot and ROC--"
    )
    print(
        "############################################################################"
    )
    NaiveBayes(set1, randItemIterator1, 1, 1, m1, m2, stdDev1, stdDev2)

    #    print()
    #    print("############################################################################")
    #    print("-------------------------Problem 2 question 1b------------------------------")
    #    print("---------Changing number of samples [10, 20, 50, 100, 300, 500]-------------")
    #    print("############################################################################")
    #    NaiveBayes(set2, randItemIterator2, 0, 0, m1, m2, stdDev1, stdDev2)
    #
    print()
    print(
        "############################################################################"
    )
    print(
        "-------------------------Problem 2 question 1c/2----------------------------"
    )
示例#10
0
def predict(model, dataset, vari, deli, day):
    X,Y = dataset.bootstrap(vari, deli, day, n=10**4)
    model.fit(X, Y)
    x = dataset.get_x(vari, deli, day)
    return model.predict(x)

if __name__=='__main__':
    vari,deli,m = 'cu','1712',7
    
    db = DB()
    days = db.execute("select date from contract_daily where vari=%s and deli=%s order by date asc",
                      (vari, deli))[30:]
    
    data = DataSet(m, 'direction')
    
    model = NaiveBayes(m)
    
    y_real = []
    y_pred = []
    for day in days:
        model.fit(*data.bootstrap(vari, deli, day, 300))
        r = model.predict(data.get_x(vari, deli, day))
        if r[0]>=r[1] and r[0]>=r[2]:
            r = 0
        elif r[1]>=r[0] and r[1]>=r[2]:
            r = 1
        else:
            r = 2
        y_pred.append(r)
        y_real.append(data.get_y(vari, deli, day))
    
示例#11
0
y = y[idx]

# split the data
Xtrain = X[:nTrain, :]
ytrain = y[:nTrain]
Xtest = X[nTrain:, :]
ytest = y[nTrain:]

# for i in range(len(Xtest)):
# 	for j in range(len(Xtest[0])):
# 		if(Xtest[i][j]>16):
# 			print "out"

# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain, ytrain)
# train the naive Bayes
modelNB = NaiveBayes(useLaplaceSmoothing=True)
modelNB.fit(Xtrain, ytrain)

# output predictions on the remaining data
ypred_DT = modelDT.predict(Xtest)
ypred_NB = modelNB.predict(Xtest)

# compute the training accuracy of the model
accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyNB = accuracy_score(ytest, ypred_NB)

print "Decision Tree Accuracy = " + str(accuracyDT)
print "Naive Bayes Accuracy = " + str(accuracyNB)
示例#12
0
 def __init__(self):                        # Janela Inicial
     
     self.window = Tk()
     variavel_controlo = BooleanVar(self.window)
         
     nb=NaiveBayes()    
         
     self.window.title('Filtro de Spam')
     self.window.resizable(False, False)
     self.window.geometry('600x450+584+230')
         
     labelwel = Label(self.window)
     labelwel.place(relx=0.125, rely=0.325, height=75, width=450)
     labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificadores Filtro Spam''', font=("Rockwell", 14))
         
     def menuNaiveBayes():                  # janela do filtro de spam Naive Bayes
         self.window2 = Tk()
         variavel_controlo = BooleanVar(self.window2)
         
         nb=NaiveBayes()    
         
         self.window2.title('Naïve Bayes')
         self.window2.resizable(False, False)
         self.window2.geometry('600x450+584+230')
         self.window2.configure(background="#91cded")
         
         labelwel = Label(self.window2)
         labelwel.place(relx=0.25, rely=0.067, height=35, width=305)
         labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Naïve Bayes''', font=("Rockwell", 14))
         
         frame1 = Frame(self.window2)
         frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750)
         frame1.configure(relief='groove', borderwidth="2", width=305)
         labelframe = LabelFrame(frame1, text="Opções")
         labelframe.pack(fill="both", expand="yes")
         labelframe.configure(background="#f7f7f7")
             
         frame2 = Frame(self.window2)
         frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350)
         frame2.configure(relief='groove', borderwidth="2", width=305)
         labelframe2 = LabelFrame(frame2, text="Métricas")
         labelframe2.pack(fill="both", expand="yes")
             
         frame3 = Frame(self.window2)
         frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350)
         frame3.configure(relief='groove', borderwidth="2", width=305)
         labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes")
         labelframe3.pack(fill="both", expand="yes")
         
         def menuLista():                   # Janela de frequência de palavras absolutas e relativas do Naive Bayes
             if variavel_controlo.get() == False:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
             else:
                 
                 palavras = nb.matrizP
                 palavrasRel = nb.matrizP_Relativa
                 
                 window9 = Toplevel()
                 window9.resizable(False, False)
                 window9.title('Frequências de Palavras')
                 window9.geometry("700x450+517+216")
                 
                 frame1 = Frame(window9)
                 frame1.place(relx=0.0, rely=0.0, relheight=1.011, relwidth=0.508)
                 frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded")
                 
                 titleA = Label(frame1)
                 titleA.place(relx=0.325, rely=0.0, height=40, width=125)
                 titleA.configure(relief="ridge", text="""Absolutas""", fg='white', background="#028f99", font=("Rockwell", 10))
                 
                 labeltitle = Label(frame1)
                 labeltitle.place(relx=0.035, rely=0.138, height=31, width=80)
                 labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 labeltitle2 = Label(frame1)
                 labeltitle2.place(relx=0.625, rely=0.138, height=31, width=50)
                 labeltitle2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 labeltitle3 = Label(frame1)
                 labeltitle3.place(relx=0.805, rely=0.138, height=31, width=50)
                 labeltitle3.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 palAbs = Text(frame1)
                 
                 for i, j in palavras.items():
                     lista = "{:22s}".format(str(i)) + "{:3s}".format(str(j[0])) + "   " + "{:3s}".format(str(j[1])) + "\n"
                     palAbs.insert(END, lista)
                     
                 palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915)
                 palAbs.configure(width=174, state='disabled')
                 
                 frame2 = Frame(window9)
                 frame2.place(relx=0.5, rely=0.0, relheight=1.011, relwidth=0.508)
                 frame2.configure(relief='raised', borderwidth="2", width=700, background="#91cded")
                 
                 title = Label(frame2)
                 title.place(relx=0.325, rely=0.0, height=40, width=125)
                 title.configure(relief="ridge", text="""Relativas""", fg='white', background="#028f99", font=("Rockwell", 10))
                 
                 labeltitle2 = Label(frame2)
                 labeltitle2.place(relx=0.035, rely=0.138, height=31, width=80)
                 labeltitle2.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 labeltitle2_2 = Label(frame2)
                 labeltitle2_2.place(relx=0.565, rely=0.138, height=31, width=50)
                 labeltitle2_2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 labeltitle3_2 = Label(frame2)
                 labeltitle3_2.place(relx=0.765, rely=0.138, height=31, width=50)
                 labeltitle3_2.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 palRel = Text(frame2)
                 
                 for i, j in palavrasRel.items():
                     lista = "{:18s}".format(str(i)) + "{:0.4f}".format(j[0]) + " " + "{:0.4f}".format(j[1]) + "\n"
                     palRel.insert(END, lista)
                     
                 palRel.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915)
                 palRel.configure(width=174, state='disabled')
                 
                 window9.mainloop()
             
         def iniciar():                  # Funcao para iniciar o algoritmo de Naive Bayes
             variavel_controlo.set(True)        
             nb.algoritmo()        
     
         def exatidao():                 # Funcao para retornar o valor da exatidao de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(nb.exatidao))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
         
         def sensibilidade():            # Funcao para retornar o valor da sensibilidade de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(nb.sensibilidade))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
         def precisao():                 # Funcao para retornar o valor da precisao de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(nb.precisao))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
         def fScore():                   # Funcao para retornar o valor de fScore de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(nb.fScore))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
         def MatrizC():                  # Funcao para retornar a matriz de confusao de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Matriz de Confusão", message=str(nb.confMatriz))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
         def MatrizRel():                # Funcao para retornar a matriz de confusao relativa de Naive Bayes
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Matriz Conf. Relativa", message=str(nb.confMatriz_Relativa))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
         
         buttonlista = Button(frame1, command=lambda: iniciar())
         buttonlista.place(relx=0.075, rely=0.400, height=35, width=150)
         buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11))
             
         buttonclose = Button(frame1, command=self.window2.destroy)
         buttonclose.place(relx=0.475, rely=0.400, height=35, width=200)
         buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11))
         
         buttonExat = Button(frame2, command=lambda: exatidao())
         buttonExat.place(relx=0.135, rely=0.150, height=30, width=150)
         buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonSensibilidade = Button(frame2, command=lambda: sensibilidade())
         buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150)
         buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonPrecisao = Button(frame2, command=lambda: precisao())
         buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150)
         buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonfScore = Button(frame2, command=lambda: fScore())
         buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150)
         buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonLista = Button(frame3, command=lambda: menuLista())
         buttonLista.place(relx=0.075, rely=0.225, height=30, width=175)
         buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonMatrizC = Button(frame3, command=lambda: MatrizC())
         buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175)
         buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonMatrizRel = Button(frame3, command=lambda: MatrizRel())
         buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175)
         buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
     def menuPerceptron():               # Janela do filtro de spam Perceptrao
         self.window3 = Tk()
         variavel_controlo = BooleanVar(self.window3)
         
         pt = Perceptron()    
         
         self.window3.title('Perceptron')
         self.window3.resizable(False, False)
         self.window3.geometry('600x475+584+230')
         self.window3.configure(background="#91cded")
         
         labelwel = Label(self.window3)
         labelwel.place(relx=0.25, rely=0.067, height=35, width=305)
         labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Perceptron''', font=("Rockwell", 14))
         
         frame1 = Frame(self.window3)
         frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750)
         frame1.configure(relief='groove', borderwidth="2", width=305)
         labelframe = LabelFrame(frame1, text="Opções")
         labelframe.pack(fill="both", expand="yes")
         labelframe.configure(background="#f7f7f7")
             
         frame2 = Frame(self.window3)
         frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350)
         frame2.configure(relief='groove', borderwidth="2", width=305)
         labelframe2 = LabelFrame(frame2, text="Métricas")
         labelframe2.pack(fill="both", expand="yes")
             
         frame3 = Frame(self.window3)
         frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350)
         frame3.configure(relief='groove', borderwidth="2", width=305)
         labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes")
         labelframe3.pack(fill="both", expand="yes")
         
         def menuListaPerc():            # Janela da quantidade de palavras do Perceptron
             if variavel_controlo.get() == False:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
             else:
                 
                 palavras = pt.freq_Pal
                 
                 window10 = Toplevel()
                 window10.resizable(False, False)
                 window10.title('Frequências de Palavras')
                 window10.geometry("350x450+517+216")
                 
                 frame1 = Frame(window10)
                 frame1.place(relx=0.0, rely=0.0, relheight=1, relwidth=1)
                 frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded")
                 
                 titleA = Label(frame1)
                 titleA.place(relx=0.325, rely=0.0, height=40, width=125)
                 titleA.configure(relief="ridge", text="""Frequência""", fg='white', background="#028f99", font=("Rockwell", 10))
                 
                 labeltitle = Label(frame1)
                 labeltitle.place(relx=0.035, rely=0.138, height=31, width=80)
                 labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 labeltitle2 = Label(frame1)
                 labeltitle2.place(relx=0.665, rely=0.138, height=31, width=100)
                 labeltitle2.configure(relief="ridge", text="""Quantidade""", fg='white', background="#0082ba", font=("Rockwell", 9))
                 
                 palAbs = Text(frame1)
                 
                 for chave, valor in palavras.items():
                     lista = "{:26s}".format(str(chave)) + "{:3s}\n".format(str(valor))
                     palAbs.insert(END, lista)
                     
                 palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915)
                 palAbs.configure(width=174, state='disabled')
                 
                 window10.mainloop()
             
         def iniciar():              # Funcao para iniciar o algoritmo de Perceptron
             variavel_controlo.set(True)        
             pt.algoritmo()        
     
         def exatidao():             # Funcao para retornar o valor da exatidao de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(pt.exatidao))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
         
         def sensibilidade():        # Funcao para retornar o valor da sensibilidade de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(pt.sensibilidade))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
     
         def precisao():             # Funcao para retornar o valor da precisao de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(pt.precisao))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
     
         def fScore():               # Funcao para retornar o valor de fscore de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(pt.fScore))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
     
         def MatrizC():              # Funcao para retornar o valor da matriz de confusao de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Matriz de Confusão", message=str(pt.matriz_Confusao))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
     
         def MatrizRel():            # Funcao para retornar o valor da matriz de confusao relativa de Perceptron
             if variavel_controlo.get() == True:
                 messagebox.showinfo(title="Matriz Conf. Relativa", message=str(pt.matriz_Relativa))
             else:
                 messagebox.showerror(title="Erro", message="Inicie o Classificador Perceptron.")
         
         buttonlista = Button(frame1, command=lambda: iniciar())
         buttonlista.place(relx=0.075, rely=0.400, height=35, width=150)
         buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11))
             
         buttonclose = Button(frame1, command=self.window3.destroy)
         buttonclose.place(relx=0.475, rely=0.400, height=35, width=200)
         buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11))
         
         buttonExat = Button(frame2, command=lambda: exatidao())
         buttonExat.place(relx=0.135, rely=0.150, height=30, width=150)
         buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonSensibilidade = Button(frame2, command=lambda: sensibilidade())
         buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150)
         buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonPrecisao = Button(frame2, command=lambda: precisao())
         buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150)
         buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonfScore = Button(frame2, command=lambda: fScore())
         buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150)
         buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonLista = Button(frame3, command=lambda: menuListaPerc())
         buttonLista.place(relx=0.075, rely=0.225, height=30, width=175)
         buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonMatrizC = Button(frame3, command=lambda: MatrizC())
         buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175)
         buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
             
         buttonMatrizRel = Button(frame3, command=lambda: MatrizRel())
         buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175)
         buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
     
     def creditos():                     # Funcao para mostrar os creditos do trabalho
                     messagebox.showinfo('Créditos',"""  
                     Elaborado por:                         
                                                             
                      - {:20s}       -   Nº 30003769       
                      - {:20s}   -   Nº 30003043       
                      - {:20s}    -   Nº 30005711       
                      - {:20s}    -   Nº 30003039       
                                                             
                             Inteligência Artificial        
                      @UAL - Universidade Autónoma de Lisboa     
                     """.format('Bruno Silva', 'David Monteiro', 'Nuno Barrocas', 'Zacarias Chiena'))
     
     def quit_program():             # Funcao para Sair do programa
         self.window.quit()
         self.window.destroy()
     
     def on_closing():               # Funcao para terminar/fechar a janela do programa de filtro de spam
         if messagebox.askokcancel("Fechar", "De certeza que quer sair?"):
             self.window.destroy()
             self.window2.destroy()
             self.window3.destroy()
     self.window.protocol("WM_DELETE_WINDOW", on_closing)  
             
     menubar = Menu(self.window)
     gamemenu = Menu(menubar, tearoff=0)
     gamemenu.add_command(label="Sobre", command=lambda: creditos())
     gamemenu.add_command(label="Sair", command=lambda: quit_program())
     
     menubar.add_cascade(label="Opções", menu=gamemenu)
     menubar.add_cascade(label="Naïve Bayes", command=lambda: menuNaiveBayes())
     menubar.add_cascade(label="Perceptron", command=lambda: menuPerceptron())
     
     self.window.config(menu=menubar)
     self.window.mainloop()
示例#13
0
 def menuNaiveBayes():                  # janela do filtro de spam Naive Bayes
     self.window2 = Tk()
     variavel_controlo = BooleanVar(self.window2)
     
     nb=NaiveBayes()    
     
     self.window2.title('Naïve Bayes')
     self.window2.resizable(False, False)
     self.window2.geometry('600x450+584+230')
     self.window2.configure(background="#91cded")
     
     labelwel = Label(self.window2)
     labelwel.place(relx=0.25, rely=0.067, height=35, width=305)
     labelwel.configure(background="#0082ba", foreground="white", relief='ridge', text='''Classificador Naïve Bayes''', font=("Rockwell", 14))
     
     frame1 = Frame(self.window2)
     frame1.place(relx=0.125, rely=0.225, relheight=0.250, relwidth=0.750)
     frame1.configure(relief='groove', borderwidth="2", width=305)
     labelframe = LabelFrame(frame1, text="Opções")
     labelframe.pack(fill="both", expand="yes")
     labelframe.configure(background="#f7f7f7")
         
     frame2 = Frame(self.window2)
     frame2.place(relx=0.125, rely=0.500, relheight=0.475, relwidth=0.350)
     frame2.configure(relief='groove', borderwidth="2", width=305)
     labelframe2 = LabelFrame(frame2, text="Métricas")
     labelframe2.pack(fill="both", expand="yes")
         
     frame3 = Frame(self.window2)
     frame3.place(relx=0.525, rely=0.500, relheight=0.475, relwidth=0.350)
     frame3.configure(relief='groove', borderwidth="2", width=305)
     labelframe3 = LabelFrame(frame3, text="Palavras e Matrizes")
     labelframe3.pack(fill="both", expand="yes")
     
     def menuLista():                   # Janela de frequência de palavras absolutas e relativas do Naive Bayes
         if variavel_controlo.get() == False:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
         else:
             
             palavras = nb.matrizP
             palavrasRel = nb.matrizP_Relativa
             
             window9 = Toplevel()
             window9.resizable(False, False)
             window9.title('Frequências de Palavras')
             window9.geometry("700x450+517+216")
             
             frame1 = Frame(window9)
             frame1.place(relx=0.0, rely=0.0, relheight=1.011, relwidth=0.508)
             frame1.configure(relief='raised', borderwidth="2", width=700, background="#91cded")
             
             titleA = Label(frame1)
             titleA.place(relx=0.325, rely=0.0, height=40, width=125)
             titleA.configure(relief="ridge", text="""Absolutas""", fg='white', background="#028f99", font=("Rockwell", 10))
             
             labeltitle = Label(frame1)
             labeltitle.place(relx=0.035, rely=0.138, height=31, width=80)
             labeltitle.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             labeltitle2 = Label(frame1)
             labeltitle2.place(relx=0.625, rely=0.138, height=31, width=50)
             labeltitle2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             labeltitle3 = Label(frame1)
             labeltitle3.place(relx=0.805, rely=0.138, height=31, width=50)
             labeltitle3.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             palAbs = Text(frame1)
             
             for i, j in palavras.items():
                 lista = "{:22s}".format(str(i)) + "{:3s}".format(str(j[0])) + "   " + "{:3s}".format(str(j[1])) + "\n"
                 palAbs.insert(END, lista)
                 
             palAbs.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915)
             palAbs.configure(width=174, state='disabled')
             
             frame2 = Frame(window9)
             frame2.place(relx=0.5, rely=0.0, relheight=1.011, relwidth=0.508)
             frame2.configure(relief='raised', borderwidth="2", width=700, background="#91cded")
             
             title = Label(frame2)
             title.place(relx=0.325, rely=0.0, height=40, width=125)
             title.configure(relief="ridge", text="""Relativas""", fg='white', background="#028f99", font=("Rockwell", 10))
             
             labeltitle2 = Label(frame2)
             labeltitle2.place(relx=0.035, rely=0.138, height=31, width=80)
             labeltitle2.configure(relief="ridge", text="""Palavra:""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             labeltitle2_2 = Label(frame2)
             labeltitle2_2.place(relx=0.565, rely=0.138, height=31, width=50)
             labeltitle2_2.configure(relief="ridge", text="""Spam""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             labeltitle3_2 = Label(frame2)
             labeltitle3_2.place(relx=0.765, rely=0.138, height=31, width=50)
             labeltitle3_2.configure(relief="ridge", text="""Ham""", fg='white', background="#0082ba", font=("Rockwell", 9))
             
             palRel = Text(frame2)
             
             for i, j in palavrasRel.items():
                 lista = "{:18s}".format(str(i)) + "{:0.4f}".format(j[0]) + " " + "{:0.4f}".format(j[1]) + "\n"
                 palRel.insert(END, lista)
                 
             palRel.place(relx=0.035, rely=0.225, relheight=0.700, relwidth=0.915)
             palRel.configure(width=174, state='disabled')
             
             window9.mainloop()
         
     def iniciar():                  # Funcao para iniciar o algoritmo de Naive Bayes
         variavel_controlo.set(True)        
         nb.algoritmo()        
 
     def exatidao():                 # Funcao para retornar o valor da exatidao de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="Exatidão", message='''{:0.3f} %'''.format(nb.exatidao))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
     def sensibilidade():            # Funcao para retornar o valor da sensibilidade de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="Sensibilidade", message='''{:0.3f} %''' .format(nb.sensibilidade))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
 
     def precisao():                 # Funcao para retornar o valor da precisao de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="Precisão", message='''{:0.3f} %''' .format(nb.precisao))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
 
     def fScore():                   # Funcao para retornar o valor de fScore de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="fScore", message='''{:0.3f} %''' .format(nb.fScore))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
 
     def MatrizC():                  # Funcao para retornar a matriz de confusao de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="Matriz de Confusão", message=str(nb.confMatriz))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
 
     def MatrizRel():                # Funcao para retornar a matriz de confusao relativa de Naive Bayes
         if variavel_controlo.get() == True:
             messagebox.showinfo(title="Matriz Conf. Relativa", message=str(nb.confMatriz_Relativa))
         else:
             messagebox.showerror(title="Erro", message="Inicie o Classificador Naïve Bayes.")
     
     buttonlista = Button(frame1, command=lambda: iniciar())
     buttonlista.place(relx=0.075, rely=0.400, height=35, width=150)
     buttonlista.configure(text='''Iniciar''', pady="0", width=267, background="#9ef702", foreground="black", font=("Rockwell", 11))
         
     buttonclose = Button(frame1, command=self.window2.destroy)
     buttonclose.place(relx=0.475, rely=0.400, height=35, width=200)
     buttonclose.configure(text='''Fechar Classificador''', pady="0", width=267, background="#f74702", foreground="white", font=("Rockwell", 11))
     
     buttonExat = Button(frame2, command=lambda: exatidao())
     buttonExat.place(relx=0.135, rely=0.150, height=30, width=150)
     buttonExat.configure(text='''Exatidão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonSensibilidade = Button(frame2, command=lambda: sensibilidade())
     buttonSensibilidade.place(relx=0.135, rely=0.375, height=30, width=150)
     buttonSensibilidade.configure(text='''Sensibilidade''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonPrecisao = Button(frame2, command=lambda: precisao())
     buttonPrecisao.place(relx=0.135, rely=0.600, height=30, width=150)
     buttonPrecisao.configure(text='''Precisão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonfScore = Button(frame2, command=lambda: fScore())
     buttonfScore.place(relx=0.135, rely=0.825, height=30, width=150)
     buttonfScore.configure(text='''fScore''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonLista = Button(frame3, command=lambda: menuLista())
     buttonLista.place(relx=0.075, rely=0.225, height=30, width=175)
     buttonLista.configure(text='''Frequência Palavras''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonMatrizC = Button(frame3, command=lambda: MatrizC())
     buttonMatrizC.place(relx=0.075, rely=0.475, height=30, width=175)
     buttonMatrizC.configure(text='''Matriz de Confusão''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
         
     buttonMatrizRel = Button(frame3, command=lambda: MatrizRel())
     buttonMatrizRel.place(relx=0.075, rely=0.725, height=30, width=175)
     buttonMatrizRel.configure(text='''Matriz Conf. Relativa''', pady="0", width=267, background="#028f99", foreground="white", font=("Rockwell", 10))
示例#14
0
from naiveBayes import NaiveBayes
from glob import glob

r = NaiveBayes('../data/nb/20news-bydate-train/')

r.train()

folders = ['talk.politics.mideast', 'sci.crypt', 'comp.os.ms-windows.misc',
           'comp.sys.ibm.pc.hardware', 'rec.sport.baseball']

for folder in folders:
    for file in glob('../data/nb/20news-bydate-test/' + folder + "/*"):
        print file
        r.testFile(file)
        break
    break