def dtree_Validate(dataName, grpName, folds, trans=None):
    """ 
		params: 
			dataName := file with the data set
			grpName := file with the different groupings
			folds := number of folds
			trans := tranformation function to be applied to the data set
		objective: performs cross validation using neural net as classifier
		returns: a list of tuples organized as (test_predicted, test_groundTruth)
	"""
    valid = vd.Validate(grpName, folds)
    data, labels = bd(dataName)
    results = []  #stores tuples: (list_predicted, list_groundTruth)
    for i in range(valid.getFoldCount()):
        #get the train and test indices of the data set
        testIndex, trainIndex = valid.getTest(i), valid.getTrain(i)
        #build the test set and test labels
        testSet, testLabels = data[testIndex, :], labels[testIndex]
        #build the train set and training labels
        trainSet, trainLabels = data[trainIndex, :], labels[trainIndex]
        #if the data is to be transformed
        if trans is not None:
            if trans is fld:
                tmp = trans(trainSet, trainLabels)
                trainSet = np.matmul(trainSet, tmp)
                trainSet = trainSet.reshape(-1, 1).astype(np.float64)
                testSet = np.matmul(testSet, tmp)
                testSet = testSet.reshape(-1, 1).astype(np.float64)
            else:
                tmp = trans(trainSet).transpose()
                trainSet = np.matmul(trainSet, tmp)
                testSet = np.matmul(testSet, tmp)

        #standardize the training and test set
        trainSet, testSet = standard(trainSet, testSet)
        #classify test set and add it to the results list
        results.append((dtree.dtree(trainSet, testSet,
                                    trainLabels), testLabels))
        r  #esults.append((randForest(trainSet, testSet, trainLabels), testLabels))
    results = ev.buildConfusionMatrices(results)
    results = ev.normalizeConfMat(results)
    results = ev.getAvgProbMatrix(results)
    results = ev.rocData(results)
    print("dtree Accuracy: %f" % results["Acc"])
    return results
Exemplo n.º 2
0
 def __init__(self):
     """ Constructor """
     self.tree = dtree.dtree()
Exemplo n.º 3
0
# Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
# by Stephen Marsland (http://stephenmonika.net)

# You are free to use, change, or redistribute the code in any way you wish for
# non-commercial purposes, but please maintain the name of the original author.
# This code comes with no warranty of any kind.

# Stephen Marsland, 2008, 2014

import dtree
import randomforest
tree = dtree.dtree()
forest = randomforest.randomforest()
data,classes,features = tree.read_data('car.data')
train = data[::2][:]
test = data[1::2][:]
trainc = classes[::2]
testc = classes[1::2]
f=f = forest.rf(train,trainc,features,50,100,2,maxlevel=3)
#f=f = forest.rf(train,trainc,features,100,200,2)
out = forest.rfclass(f,test)

import numpy as np

a = np.zeros(len(out))
b = np.zeros(len(out))
d = np.zeros(len(out))

for i in range(len(out)):
    if testc[i] == 'good' or testc[i]== 'v-good':
Exemplo n.º 4
0
 def __init__(self):
     """ Constructor """
     self.tree = dtree.dtree()
Exemplo n.º 5
0
 def __init__(self):
     self.tree = dtree.dtree()
Exemplo n.º 6
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 22 12:52:45 2017

@author: jabong
"""

import dtree

tree = dtree.dtree()
party, classes, features = tree.read_data('party.data')
t = tree.make_tree(party, classes, features)
tree.printTree(t, ' ')

print tree.classifyAll(t, party)

for i in range(len(party)):
    tree.classify(t, party[i])

print "True Classes"
print classes
Exemplo n.º 7
0
all_inputs = df[list_col].values
all_classes = df['class'].values

(train_inputs, test_inputs, train_classes,
 test_classes) = model_selection.train_test_split(all_inputs,
                                                  all_classes,
                                                  train_size=0.7,
                                                  test_size=0.3)
print(train_inputs)
print(train_classes)
print(test_inputs)
print(test_classes)

# DRZEWA DECYZYJNE
start_tree = timer()
score_dtree, error_dtree = dtree.dtree(train_inputs, test_inputs,
                                       train_classes, test_classes)
end_tree = timer()

# NAIWNY BAYES
start_bayes = timer()
score_nbayes, error_nbayes = bayes.bayes(train_inputs, test_inputs,
                                         train_classes, test_classes)
end_bayes = timer()

# k-NN k = 3
start_knn3 = timer()
score_knn3, error_knn3 = k_NN.k_NN(3, train_inputs, test_inputs, train_classes,
                                   test_classes)
end_knn3 = timer()

# neural network
Exemplo n.º 8
0
y = df.iloc[:, -1].values[..., np.newaxis]

# prepare training and test sets
x, y = data.shuffle(x, y)
(x_train, y_train), (x_test, y_test) = data.split(x, y, ratio=0.8)

print()
print("x_train:", x_train.shape, "y_train:", y_train.shape)
print("x_test:", x_test.shape, "y_test:", y_test.shape)
print()

# create (and train) the tree

n_groups = 3

dt = dtree.dtree((x_train, y_train), n_groups=n_groups, max_depth=4)

print(
    f"Train on {len(x_train)} samples, validate on {len(x_test)} samples, n_groups: {n_groups}"
)

# evaluate on test set
y_test_pred = dt.predict(x_test)

# calculate metrics
tp, tn, fp, fn = metrics.tfpn(y_test, y_test_pred)
test_acc = metrics.accuracy()(y_test, y_test_pred)
test_prec = metrics.precision()(y_test, y_test_pred)
test_sens = metrics.sensitivity()(y_test, y_test_pred)
test_spec = metrics.specificity()(y_test, y_test_pred)
test_f1 = metrics.f1()(y_test, y_test_pred)
Exemplo n.º 9
0
nb.Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y, my_tags,
               xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram,
               xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars,
               xvalid_tfidf_ngram_chars)
#Bernoulli Naive Bayes classifier implementation
bnb.Bernoulli_Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y,
                          my_tags, xtrain_tfidf, xvalid_tfidf,
                          xtrain_tfidf_ngram, xvalid_tfidf_ngram,
                          xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars)
#Gaussian Naive Bayes classifier implementation
gnb.Gaussian_Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y, my_tags,
                         xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram,
                         xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars,
                         xvalid_tfidf_ngram_chars)
#Decision tree classifier implementation
dt.dtree(train, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf,
         xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars,
         xvalid_tfidf_ngram_chars)
#KNN classifier implementation
k.knn(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf,
      xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram,
      xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars)
#Linear SVM classifier implementation
s.Svm(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf,
      xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram,
      xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars)
#SGD SVM classifier implementation
sg.SGD_Svm(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf,
           xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram,
           xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars)
Exemplo n.º 10
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:


from numpy import *
import dtree



# In[2]:


# create a decision tree object
dt = dtree.dtree()


# In[3]:


fileName = raw_input('Enter file name: ')


# In[4]:


fh = open(fileName)
# load your data in for building the tree

data, classData, featureNames = dt.read_data(fileName)
Exemplo n.º 11
0
import dtree
import numpy as np
import pandas as pd
import jenkspy

from xlsxwriter import Workbook

crash_data = pd.read_excel(
    'C:/Users/fproulx/Working/MiDOT_Sidepaths/20170706_Kent and Oakland data.xlsx',
    sheetname='Raw cleaned',
    skiprows=2)

crash_data.columns = map(lambda x: x.replace(' ', '_'), crash_data.columns)

reload(dtree)
tree = dtree.dtree(crash_data)

output_folder = 'C:/Users/fproulx/Desktop/MiDOT_Sidepaths/trees/'

tree.data.columns

tree_formulae = {
    'BikeDirection': [
        'Intersection_or_Non_Intersection_or_Driveway', 'Bike_Facility_Type',
        'Direction_of_Travel'
    ],
    'BikeAction': [
        'Intersection_or_Non_Intersection_or_Driveway', 'Bike_Facility_Type',
        'Bike_Action'
    ],
    'VehicleAction': [