def test(test_data, test_labels, type_of_data, algorithm): if type_of_data == 1: weights_perceptron = np.load('digits_perceptron_weights.npy') features_train = np.load('digits_knn_features.npy') height = 28 width = 28 classes = 10 else: weights_perceptron = np.load('faces_perceptron_weights.npy') features_train = np.load('faces_knn_features.npy') height = 70 width = 60 classes = 2 samples, sample_lines = readfile(test_data, type_of_data) samples = getsamples(samples, sample_lines, height, width) labels, label_lines = readfile(test_labels, type_of_data) labels = getlabels(labels) if type_of_data == 1: feature_matrix = get_features_for_digits(samples) if algorithm == 'perceptron': test_multiclass_perceptron(feature_matrix, labels, classes, weights_perceptron) if algorithm == 'knn': test_knn(feature_matrix, labels, features_train, 1) if algorithm == 'naivebayes': bayes(feature_matrix, labels, features_train, 1) else: feature_matrix = get_features_for_faces(samples) if algorithm == 'perceptron': test_binary_perceptron(feature_matrix, labels, classes, weights_perceptron) if algorithm == 'knn': test_knn(feature_matrix, labels, features_train, 2) if algorithm == 'naivebayes': bayes(feature_matrix, labels, features_train, 2)
def bayes_naive(self, predictData, trainData): h = hp() nb = bayes() accuracy = [] precision = [] recall = [] f_score = [] for i in range(len(trainData)): tmp = None predictData = trainData[i] tmp = [lt for j, lt in enumerate(trainData) if j != i] td = h.convertToList(tmp) classPriorProbabilities = nb.findClassPriorProbability(td) classes = nb.segregateClasses(td) occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites( classes, td) nb.classify(predictData, classPriorProbabilities, occurences, means, stdDev) truePositives, trueNegatives, falsePositives, falseNegatives = h.findParams( predictData) accuracy.append( h.findAccuracy(truePositives, trueNegatives, falsePositives, falseNegatives)) tmpPrecision = h.findPrecision(truePositives, trueNegatives, falsePositives, falseNegatives) tmpRecall = h.findRecall(truePositives, trueNegatives, falsePositives, falseNegatives) precision.append(tmpPrecision) recall.append(tmpRecall) f_score.append(h.findFMeasure(tmpPrecision, tmpRecall)) return accuracy, precision, recall, f_score
def bayes_naive(self, predictData, trainData): h = hp() nb = bayes() matrix = defaultdict(list) pd = [pt for pt in predictData] # for i in range(len(trainData)): tmp = [lt for j, lt in enumerate(trainData)] td = h.convertToList(tmp) classPriorProbabilities = nb.findClassPriorProbability(td) classes = nb.segregateClasses(td) occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites(classes, td) nb.classify(predictData, classPriorProbabilities, occurences, means, stdDev) return predictData
def getFitness(population): fitness = [] print population for i in range(len(population)): current_no_of_features = 0 for j in range(no_of_features): if population[i][j] == 1: current_no_of_features += 1 new_data_frame = dropColumns(population[i], data_frame) accuracy = bayes(new_data_frame) #print "Accuracy",accuracy #Giving more importance to states having less no of features fitness.append(0.99 * accuracy + 0.01 * (no_of_features - current_no_of_features)) return fitness
def bayes_naive_demo(self, predictData, trainData): h = hp() nb = bayes() classPriorProbabilities = nb.findClassPriorProbability(trainData) classes = nb.segregateClasses(trainData) occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites( classes, trainData) probabilities = nb.classify_demo(predictData, classPriorProbabilities, occurences, means, stdDev) maxProb = float('-inf') classKey = -1 for key in probabilities: print("P(X|H{})*P(H{}) = {}".format(key, key, probabilities[key])) if probabilities[key] > maxProb: maxProb = probabilities[key] classKey = key print("This test data record belongs to: Class {}".format(classKey))
import pandas as pd import random as random import math as math import numpy as np from naive_bayes import bayes file_name = raw_input("Enter data file location: ") #col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label'] #features of the pima data set data_frame = pd.read_csv(file_name, header=None) no_of_features = len(data_frame.columns) - 1 print no_of_features, " columns" original_accuracy = bayes(data_frame) #get accuracy with all features present print "Original accuracy", original_accuracy uniformProbability = 0.5 mutationProbability = 0.1 #Create a random individual def createIndividual(): all_zero = True while True: genes = np.random.choice([0, 1], size=(no_of_features, ), p=[4. / 5, 1. / 5]) for i in range(len(genes)): if genes[i] == 1: all_zero = False break if all_zero == False: break