def dtree_Validate(dataName, grpName, folds, trans=None): """ params: dataName := file with the data set grpName := file with the different groupings folds := number of folds trans := tranformation function to be applied to the data set objective: performs cross validation using neural net as classifier returns: a list of tuples organized as (test_predicted, test_groundTruth) """ valid = vd.Validate(grpName, folds) data, labels = bd(dataName) results = [] #stores tuples: (list_predicted, list_groundTruth) for i in range(valid.getFoldCount()): #get the train and test indices of the data set testIndex, trainIndex = valid.getTest(i), valid.getTrain(i) #build the test set and test labels testSet, testLabels = data[testIndex, :], labels[testIndex] #build the train set and training labels trainSet, trainLabels = data[trainIndex, :], labels[trainIndex] #if the data is to be transformed if trans is not None: if trans is fld: tmp = trans(trainSet, trainLabels) trainSet = np.matmul(trainSet, tmp) trainSet = trainSet.reshape(-1, 1).astype(np.float64) testSet = np.matmul(testSet, tmp) testSet = testSet.reshape(-1, 1).astype(np.float64) else: tmp = trans(trainSet).transpose() trainSet = np.matmul(trainSet, tmp) testSet = np.matmul(testSet, tmp) #standardize the training and test set trainSet, testSet = standard(trainSet, testSet) #classify test set and add it to the results list results.append((dtree.dtree(trainSet, testSet, trainLabels), testLabels)) r #esults.append((randForest(trainSet, testSet, trainLabels), testLabels)) results = ev.buildConfusionMatrices(results) results = ev.normalizeConfMat(results) results = ev.getAvgProbMatrix(results) results = ev.rocData(results) print("dtree Accuracy: %f" % results["Acc"]) return results
def __init__(self): """ Constructor """ self.tree = dtree.dtree()
# Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition) # by Stephen Marsland (http://stephenmonika.net) # You are free to use, change, or redistribute the code in any way you wish for # non-commercial purposes, but please maintain the name of the original author. # This code comes with no warranty of any kind. # Stephen Marsland, 2008, 2014 import dtree import randomforest tree = dtree.dtree() forest = randomforest.randomforest() data,classes,features = tree.read_data('car.data') train = data[::2][:] test = data[1::2][:] trainc = classes[::2] testc = classes[1::2] f=f = forest.rf(train,trainc,features,50,100,2,maxlevel=3) #f=f = forest.rf(train,trainc,features,100,200,2) out = forest.rfclass(f,test) import numpy as np a = np.zeros(len(out)) b = np.zeros(len(out)) d = np.zeros(len(out)) for i in range(len(out)): if testc[i] == 'good' or testc[i]== 'v-good':
def __init__(self): self.tree = dtree.dtree()
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Fri Sep 22 12:52:45 2017 @author: jabong """ import dtree tree = dtree.dtree() party, classes, features = tree.read_data('party.data') t = tree.make_tree(party, classes, features) tree.printTree(t, ' ') print tree.classifyAll(t, party) for i in range(len(party)): tree.classify(t, party[i]) print "True Classes" print classes
all_inputs = df[list_col].values all_classes = df['class'].values (train_inputs, test_inputs, train_classes, test_classes) = model_selection.train_test_split(all_inputs, all_classes, train_size=0.7, test_size=0.3) print(train_inputs) print(train_classes) print(test_inputs) print(test_classes) # DRZEWA DECYZYJNE start_tree = timer() score_dtree, error_dtree = dtree.dtree(train_inputs, test_inputs, train_classes, test_classes) end_tree = timer() # NAIWNY BAYES start_bayes = timer() score_nbayes, error_nbayes = bayes.bayes(train_inputs, test_inputs, train_classes, test_classes) end_bayes = timer() # k-NN k = 3 start_knn3 = timer() score_knn3, error_knn3 = k_NN.k_NN(3, train_inputs, test_inputs, train_classes, test_classes) end_knn3 = timer() # neural network
y = df.iloc[:, -1].values[..., np.newaxis] # prepare training and test sets x, y = data.shuffle(x, y) (x_train, y_train), (x_test, y_test) = data.split(x, y, ratio=0.8) print() print("x_train:", x_train.shape, "y_train:", y_train.shape) print("x_test:", x_test.shape, "y_test:", y_test.shape) print() # create (and train) the tree n_groups = 3 dt = dtree.dtree((x_train, y_train), n_groups=n_groups, max_depth=4) print( f"Train on {len(x_train)} samples, validate on {len(x_test)} samples, n_groups: {n_groups}" ) # evaluate on test set y_test_pred = dt.predict(x_test) # calculate metrics tp, tn, fp, fn = metrics.tfpn(y_test, y_test_pred) test_acc = metrics.accuracy()(y_test, y_test_pred) test_prec = metrics.precision()(y_test, y_test_pred) test_sens = metrics.sensitivity()(y_test, y_test_pred) test_spec = metrics.specificity()(y_test, y_test_pred) test_f1 = metrics.f1()(y_test, y_test_pred)
nb.Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #Bernoulli Naive Bayes classifier implementation bnb.Bernoulli_Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #Gaussian Naive Bayes classifier implementation gnb.Gaussian_Naive_Bayes(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #Decision tree classifier implementation dt.dtree(train, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #KNN classifier implementation k.knn(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #Linear SVM classifier implementation s.Svm(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars) #SGD SVM classifier implementation sg.SGD_Svm(xtrain_count, xvalid_count, train_y, valid_y, my_tags, xtrain_tfidf, xvalid_tfidf, xtrain_tfidf_ngram, xvalid_tfidf_ngram, xtrain_tfidf_ngram_chars, xvalid_tfidf_ngram_chars)
#!/usr/bin/env python # coding: utf-8 # In[1]: from numpy import * import dtree # In[2]: # create a decision tree object dt = dtree.dtree() # In[3]: fileName = raw_input('Enter file name: ') # In[4]: fh = open(fileName) # load your data in for building the tree data, classData, featureNames = dt.read_data(fileName)
import dtree import numpy as np import pandas as pd import jenkspy from xlsxwriter import Workbook crash_data = pd.read_excel( 'C:/Users/fproulx/Working/MiDOT_Sidepaths/20170706_Kent and Oakland data.xlsx', sheetname='Raw cleaned', skiprows=2) crash_data.columns = map(lambda x: x.replace(' ', '_'), crash_data.columns) reload(dtree) tree = dtree.dtree(crash_data) output_folder = 'C:/Users/fproulx/Desktop/MiDOT_Sidepaths/trees/' tree.data.columns tree_formulae = { 'BikeDirection': [ 'Intersection_or_Non_Intersection_or_Driveway', 'Bike_Facility_Type', 'Direction_of_Travel' ], 'BikeAction': [ 'Intersection_or_Non_Intersection_or_Driveway', 'Bike_Facility_Type', 'Bike_Action' ], 'VehicleAction': [