Пример #1
0
def test(test_data, test_labels, type_of_data, algorithm):
    if type_of_data == 1:
        weights_perceptron = np.load('digits_perceptron_weights.npy')
        features_train = np.load('digits_knn_features.npy')
        height = 28
        width = 28
        classes = 10
    else:
        weights_perceptron = np.load('faces_perceptron_weights.npy')
        features_train = np.load('faces_knn_features.npy')
        height = 70
        width = 60
        classes = 2
    samples, sample_lines = readfile(test_data, type_of_data)
    samples = getsamples(samples, sample_lines, height, width)
    labels, label_lines = readfile(test_labels, type_of_data)
    labels = getlabels(labels)
    if type_of_data == 1:
        feature_matrix = get_features_for_digits(samples)
        if algorithm == 'perceptron':
            test_multiclass_perceptron(feature_matrix, labels, classes, weights_perceptron)
        if algorithm == 'knn':
            test_knn(feature_matrix, labels, features_train, 1)
        if algorithm == 'naivebayes':
            bayes(feature_matrix, labels, features_train, 1)
    else:
        feature_matrix = get_features_for_faces(samples)
        if algorithm == 'perceptron':
            test_binary_perceptron(feature_matrix, labels, classes, weights_perceptron)
        if algorithm == 'knn':
            test_knn(feature_matrix, labels, features_train, 2)
        if algorithm == 'naivebayes':
            bayes(feature_matrix, labels, features_train, 2)
Пример #2
0
 def bayes_naive(self, predictData, trainData):
     h = hp()
     nb = bayes()
     accuracy = []
     precision = []
     recall = []
     f_score = []
     for i in range(len(trainData)):
         tmp = None
         predictData = trainData[i]
         tmp = [lt for j, lt in enumerate(trainData) if j != i]
         td = h.convertToList(tmp)
         classPriorProbabilities = nb.findClassPriorProbability(td)
         classes = nb.segregateClasses(td)
         occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites(
             classes, td)
         nb.classify(predictData, classPriorProbabilities, occurences,
                     means, stdDev)
         truePositives, trueNegatives, falsePositives, falseNegatives = h.findParams(
             predictData)
         accuracy.append(
             h.findAccuracy(truePositives, trueNegatives, falsePositives,
                            falseNegatives))
         tmpPrecision = h.findPrecision(truePositives, trueNegatives,
                                        falsePositives, falseNegatives)
         tmpRecall = h.findRecall(truePositives, trueNegatives,
                                  falsePositives, falseNegatives)
         precision.append(tmpPrecision)
         recall.append(tmpRecall)
         f_score.append(h.findFMeasure(tmpPrecision, tmpRecall))
     return accuracy, precision, recall, f_score
Пример #3
0
 def bayes_naive(self, predictData, trainData):
     h = hp()
     nb = bayes()
     matrix = defaultdict(list)
     pd = [pt for pt in predictData]
     # for i in range(len(trainData)):
     tmp = [lt for j, lt in enumerate(trainData)]
     td = h.convertToList(tmp)
     classPriorProbabilities = nb.findClassPriorProbability(td)
     classes = nb.segregateClasses(td)
     occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites(classes, td)
     nb.classify(predictData, classPriorProbabilities, occurences, means, stdDev)
     return predictData
def getFitness(population):
    fitness = []
    print population
    for i in range(len(population)):
        current_no_of_features = 0
        for j in range(no_of_features):
            if population[i][j] == 1:
                current_no_of_features += 1
        new_data_frame = dropColumns(population[i], data_frame)
        accuracy = bayes(new_data_frame)
        #print "Accuracy",accuracy
        #Giving more importance to states having less no of features
        fitness.append(0.99 * accuracy + 0.01 *
                       (no_of_features - current_no_of_features))
    return fitness
Пример #5
0
 def bayes_naive_demo(self, predictData, trainData):
     h = hp()
     nb = bayes()
     classPriorProbabilities = nb.findClassPriorProbability(trainData)
     classes = nb.segregateClasses(trainData)
     occurences, means, stdDev = nb.findDescriptorPosteriorProbabilites(
         classes, trainData)
     probabilities = nb.classify_demo(predictData, classPriorProbabilities,
                                      occurences, means, stdDev)
     maxProb = float('-inf')
     classKey = -1
     for key in probabilities:
         print("P(X|H{})*P(H{}) = {}".format(key, key, probabilities[key]))
         if probabilities[key] > maxProb:
             maxProb = probabilities[key]
             classKey = key
     print("This test data record belongs to: Class {}".format(classKey))
import pandas as pd
import random as random
import math as math
import numpy as np
from naive_bayes import bayes

file_name = raw_input("Enter data file location: ")
#col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label'] #features of the pima data set
data_frame = pd.read_csv(file_name, header=None)
no_of_features = len(data_frame.columns) - 1
print no_of_features, " columns"
original_accuracy = bayes(data_frame)  #get accuracy with all features present
print "Original accuracy", original_accuracy
uniformProbability = 0.5
mutationProbability = 0.1


#Create a random individual
def createIndividual():
    all_zero = True
    while True:
        genes = np.random.choice([0, 1],
                                 size=(no_of_features, ),
                                 p=[4. / 5, 1. / 5])
        for i in range(len(genes)):
            if genes[i] == 1:
                all_zero = False
                break
        if all_zero == False:
            break