def crossvalidate(): ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) #normaldata = mldata.normalizeinput() ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' mldata4 = loadheartdisease(path4, True) #normaldata4 = mldata4.normalizeinput() ''' Wine dataset ''' path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data' mldata5 = loadwine(path5) #normaldata5 = mldata5.normalizeinput() cvacc = {} cvacc['Iris'] = cv10foldresult(mldata) cvacc['Cars'] = cv10foldresult(mldata1) cvacc['Mushrm'] = cv10foldresult(mldata2) cvacc['Voting'] = cv10foldresult(mldata3) cvacc['Heart'] = cv10foldresult(mldata4) cvacc['Wine'] = cv10foldresult(mldata5) for i in cvacc: print i, ":", cvacc[i]
def crossvalidate(): ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) normaldata = mldata.normalizeinput() ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' #path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.switzerland.data' #mldata4 = loadheartdisease(path4, True, True) mldata4 = loadheartdisease(path4, True) normaldata4 = mldata4.normalizeinput() ''' Wine dataset ''' path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data' mldata5 = loadwine(path5) normaldata5 = mldata5.normalizeinput() cvacc = {} """ cvacc['Iris'] = cv10foldresult(normaldata, [3], learn_rate=0.05, epoch=700, stop_criterion=50) cvacc['Cars'] = cv10foldresult(mldata1, [5], learn_rate=0.01, epoch=1000, stop_criterion=100) cvacc['Mushrm'] = cv10foldresult(mldata2, [5], learn_rate=0.05, epoch=50, stop_criterion=4) cvacc['Voting'] = cv10foldresult(mldata3, [5], learn_rate=0.01, epoch=500, stop_criterion=100) cvacc['Heart'] = cv10foldresult(normaldata4, [10, 5], learn_rate=0.01, epoch=1000, stop_criterion=100) cvacc['Wine'] = cv10foldresult(normaldata5, [5], learn_rate=0.005, epoch=500, stop_criterion=50) """ #cvacc['Iris'] = cv10foldresult(normaldata, None, learn_rate=0.05, epoch=700, stop_criterion=50) cvacc['Cars'] = cv10foldresult(mldata1, None, learn_rate=0.01, epoch=1000, stop_criterion=50) cvacc['Mushrm'] = cv10foldresult(mldata2, None, learn_rate=0.08, epoch=50, stop_criterion=2) #cvacc['Voting'] = cv10foldresult(mldata3, None, learn_rate=0.05, epoch=500, stop_criterion=100) #cvacc['Heart'] = cv10foldresult(normaldata4, None, learn_rate=0.01, epoch=1000, stop_criterion=100) #cvacc['Wine'] = cv10foldresult(normaldata5, None, learn_rate=0.005, epoch=500, stop_criterion=50) for i in cvacc: print i, ":", cvacc[i] f = open("nn_cv", 'w') pickle.dump(cvacc, f) f.close()
def main(argv=None): ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) normaldata = mldata.normalizeinput() sampledmldata = normaldata.sampledata({'train':70, 'test':15, 'cv':15}) #knn = NearestNeighbor(3, sampledmldata['train']) #knn = NearestNeighbor(3, sampledmldata['train'], False, True, sampledmldata['cv']) knn = NearestNeighbor(3, sampledmldata['train'], True) knn.predict(sampledmldata['test']) #knn.predict(sampledmldata['test']) #knn.predictdistweighted(sampledmldata['test']) #bestkexperiment('iris', normaldata) ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) sampledmldata1 = mldata1.sampledata({'train':70, 'test':15, 'cv':15}) #knn1 = NearestNeighbor(5, sampledmldata1['train']) #knn1 = NearestNeighbor(5, sampledmldata1['train'], False, True, sampledmldata1['cv']) knn1 = NearestNeighbor(5, sampledmldata1['train'], True) knn1.predict(sampledmldata1['test']) #bestkexperiment('cars', mldata1) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) sampledmldata2 = mldata2.sampledata({'train':65, 'test':15, 'cv':20}) #knn2 = NearestNeighbor(1, sampledmldata2['train']) #knn2 = NearestNeighbor(1, sampledmldata2['train'], False, True, sampledmldata2['cv']) knn2 = NearestNeighbor(1, sampledmldata2['train'], True) knn2.predict(sampledmldata2['test']) #bestkexperiment('mushroom', mldata2) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) sampledmldata3 = mldata3.sampledata({'train':70, 'test':15, 'cv':15}) #knn3 = NearestNeighbor(1, sampledmldata3['train']) #knn3 = NearestNeighbor(1, sampledmldata3['train'], False, True, sampledmldata3['cv']) knn3 = NearestNeighbor(5, sampledmldata3['train'], True) knn3.predict(sampledmldata3['test']) #bestkexperiment('voting', mldata3) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' mldata4 = loadheartdisease(path4, True) normaldata4 = mldata4.normalizeinput() sampledmldata4 = normaldata4.sampledata({'train':70, 'test':15, 'cv':15}) #knn4 = NearestNeighbor(5, sampledmldata4['train']) #knn4 = NearestNeighbor(5, sampledmldata4['train'], False, True, sampledmldata4['cv']) knn4 = NearestNeighbor(5, sampledmldata4['train'], True) knn4.predict(sampledmldata4['test']) #bestkexperiment('heart disease', normaldata4) ''' Wine dataset ''' path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data' mldata5 = loadwine(path5) normaldata5 = mldata5.normalizeinput() sampledmldata5 = normaldata5.sampledata({'train':70, 'test':15, 'cv':15}) #knn5 = NearestNeighbor(3, sampledmldata5['train']) #knn5 = NearestNeighbor(3, sampledmldata5['train'], False, True, sampledmldata5['cv']) knn5 = NearestNeighbor(5, sampledmldata5['train'], True) knn5.predict(sampledmldata5['test'])
def main(argv=None): if argv is None: argv = sys.argv #toy dataset #mldata = loadtennis() #sampledmldata = mldata.sampledata({'train':80, 'test':20, 'cv':0}) ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) sampledmldata = mldata.sampledata({'train': 70, 'test': 10, 'cv': 20}) dt = dtree(sampledmldata['train']) dt.generatetree(dt.root) print "before pruning...." dt.printtree() dt.testtree(sampledmldata['test'], False, True) #print "after pruning...." #besttree = dt.prune(2, sampledmldata['cv'], True) #besttree.testtree(sampledmldata['test'], False, True) #dt.prune(1, sampledmldata['cv'], True) #dt.testtree(sampledmldata['test'], False, True) ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) sampledmldata1 = mldata1.sampledata({'train': 70, 'test': 10, 'cv': 20}) dt1 = dtree(sampledmldata1['train']) dt1.generatetree(dt1.root) dt1.printtree() dt1.testtree(sampledmldata1['test'], False, True) #print "after pruning...." #besttree1 = dt1.prune(2, sampledmldata1['cv'], True) #besttree1.testtree(sampledmldata1['test'], False, True) #dt1.prune(1, sampledmldata1['cv'], True) #dt1.testtree(sampledmldata1['test'], False, True) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) sampledmldata2 = mldata2.sampledata({'train': 70, 'test': 10, 'cv': 20}) dt2 = dtree(sampledmldata2['train']) dt2.generatetree(dt2.root) print "before pruning...." dt2.printtree() dt2.testtree(sampledmldata2['test'], False, True) #print "after pruning...." #besttree2 = dt2.prune(2, sampledmldata2['cv'], True) #besttree2.testtree(sampledmldata2['test'], False, True) #dt2.prune(1, sampledmldata2['cv'], True) #dt2.testtree(sampledmldata2['test'], False, True) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) sampledmldata3 = mldata3.sampledata({'train': 70, 'test': 10, 'cv': 20}) dt3 = dtree(sampledmldata3['train']) dt3.generatetree(dt3.root) print "before pruning...." dt3.printtree() dt3.testtree(sampledmldata3['test'], False, True) #print "after pruning...." #besttree3 = dt3.prune(2, sampledmldata3['cv'], True) #besttree3.testtree(sampledmldata3['test'], False, True) #dt3.prune(1, sampledmldata3['cv'], True) #dt3.testtree(sampledmldata3['test'], False, True) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' mldata4 = loadheartdisease(path4) sampledmldata4 = mldata4.sampledata({'train': 70, 'test': 10, 'cv': 20}) dt4 = dtree(sampledmldata4['train']) dt4.generatetree(dt4.root) print "before pruning...." dt4.printtree() dt4.testtree(sampledmldata4['test'], False, True) #print "after pruning...." #besttree4 = dt4.prune(2, sampledmldata4['cv'], True) #besttree4.testtree(sampledmldata4['test'], False, True) #dt4.prune(1, sampledmldata4['cv'], True) #dt4.testtree(sampledmldata4['test'], False, True) return 0
def main(argv=None): if argv is None: argv = sys.argv #toy dataset #mldata = loadtennis() #sampledmldata = mldata.sampledata({'train':80, 'test':20, 'cv':0}) ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) sampledmldata = mldata.sampledata({'train':70, 'test':10, 'cv':20}) dt = dtree(sampledmldata['train']) dt.generatetree(dt.root) print "before pruning...." dt.printtree() dt.testtree(sampledmldata['test'], False, True) #print "after pruning...." #besttree = dt.prune(2, sampledmldata['cv'], True) #besttree.testtree(sampledmldata['test'], False, True) #dt.prune(1, sampledmldata['cv'], True) #dt.testtree(sampledmldata['test'], False, True) ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) sampledmldata1 = mldata1.sampledata({'train':70, 'test':10, 'cv':20}) dt1 = dtree(sampledmldata1['train']) dt1.generatetree(dt1.root) dt1.printtree() dt1.testtree(sampledmldata1['test'], False, True) #print "after pruning...." #besttree1 = dt1.prune(2, sampledmldata1['cv'], True) #besttree1.testtree(sampledmldata1['test'], False, True) #dt1.prune(1, sampledmldata1['cv'], True) #dt1.testtree(sampledmldata1['test'], False, True) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) sampledmldata2 = mldata2.sampledata({'train':70, 'test':10, 'cv':20}) dt2 = dtree(sampledmldata2['train']) dt2.generatetree(dt2.root) print "before pruning...." dt2.printtree() dt2.testtree(sampledmldata2['test'], False, True) #print "after pruning...." #besttree2 = dt2.prune(2, sampledmldata2['cv'], True) #besttree2.testtree(sampledmldata2['test'], False, True) #dt2.prune(1, sampledmldata2['cv'], True) #dt2.testtree(sampledmldata2['test'], False, True) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) sampledmldata3 = mldata3.sampledata({'train':70, 'test':10, 'cv':20}) dt3 = dtree(sampledmldata3['train']) dt3.generatetree(dt3.root) print "before pruning...." dt3.printtree() dt3.testtree(sampledmldata3['test'], False, True) #print "after pruning...." #besttree3 = dt3.prune(2, sampledmldata3['cv'], True) #besttree3.testtree(sampledmldata3['test'], False, True) #dt3.prune(1, sampledmldata3['cv'], True) #dt3.testtree(sampledmldata3['test'], False, True) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' mldata4 = loadheartdisease(path4) sampledmldata4 = mldata4.sampledata({'train':70, 'test':10, 'cv':20}) dt4 = dtree(sampledmldata4['train']) dt4.generatetree(dt4.root) print "before pruning...." dt4.printtree() dt4.testtree(sampledmldata4['test'], False, True) #print "after pruning...." #besttree4 = dt4.prune(2, sampledmldata4['cv'], True) #besttree4.testtree(sampledmldata4['test'], False, True) #dt4.prune(1, sampledmldata4['cv'], True) #dt4.testtree(sampledmldata4['test'], False, True) return 0
def main(argv=None): ''' Iris dataset ''' path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data' mldata = loadiris(path) normaldata = mldata.normalizeinput() sampledmldata = normaldata.sampledata({'train': 70, 'test': 15, 'cv': 15}) #knn = NearestNeighbor(3, sampledmldata['train']) #knn = NearestNeighbor(3, sampledmldata['train'], False, True, sampledmldata['cv']) knn = NearestNeighbor(3, sampledmldata['train'], True) knn.predict(sampledmldata['test']) #knn.predict(sampledmldata['test']) #knn.predictdistweighted(sampledmldata['test']) #bestkexperiment('iris', normaldata) ''' Cars dataset ''' path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data' mldata1 = loadcars(path1) sampledmldata1 = mldata1.sampledata({'train': 70, 'test': 15, 'cv': 15}) #knn1 = NearestNeighbor(5, sampledmldata1['train']) #knn1 = NearestNeighbor(5, sampledmldata1['train'], False, True, sampledmldata1['cv']) knn1 = NearestNeighbor(5, sampledmldata1['train'], True) knn1.predict(sampledmldata1['test']) #bestkexperiment('cars', mldata1) ''' Mushroom dataset ''' path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data' mldata2 = loadmushroom(path2) sampledmldata2 = mldata2.sampledata({'train': 65, 'test': 15, 'cv': 20}) #knn2 = NearestNeighbor(1, sampledmldata2['train']) #knn2 = NearestNeighbor(1, sampledmldata2['train'], False, True, sampledmldata2['cv']) knn2 = NearestNeighbor(1, sampledmldata2['train'], True) knn2.predict(sampledmldata2['test']) #bestkexperiment('mushroom', mldata2) ''' Voting dataset ''' path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data' mldata3 = loadvoting(path3) sampledmldata3 = mldata3.sampledata({'train': 70, 'test': 15, 'cv': 15}) #knn3 = NearestNeighbor(1, sampledmldata3['train']) #knn3 = NearestNeighbor(1, sampledmldata3['train'], False, True, sampledmldata3['cv']) knn3 = NearestNeighbor(5, sampledmldata3['train'], True) knn3.predict(sampledmldata3['test']) #bestkexperiment('voting', mldata3) ''' Heart Disease dataset ''' path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data' mldata4 = loadheartdisease(path4, True) normaldata4 = mldata4.normalizeinput() sampledmldata4 = normaldata4.sampledata({ 'train': 70, 'test': 15, 'cv': 15 }) #knn4 = NearestNeighbor(5, sampledmldata4['train']) #knn4 = NearestNeighbor(5, sampledmldata4['train'], False, True, sampledmldata4['cv']) knn4 = NearestNeighbor(5, sampledmldata4['train'], True) knn4.predict(sampledmldata4['test']) #bestkexperiment('heart disease', normaldata4) ''' Wine dataset ''' path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data' mldata5 = loadwine(path5) normaldata5 = mldata5.normalizeinput() sampledmldata5 = normaldata5.sampledata({ 'train': 70, 'test': 15, 'cv': 15 }) #knn5 = NearestNeighbor(3, sampledmldata5['train']) #knn5 = NearestNeighbor(3, sampledmldata5['train'], False, True, sampledmldata5['cv']) knn5 = NearestNeighbor(5, sampledmldata5['train'], True) knn5.predict(sampledmldata5['test'])