示例#1
0
def crossvalidate():
    
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    #normaldata = mldata.normalizeinput()
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4, True)
    #normaldata4 = mldata4.normalizeinput()
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    #normaldata5 = mldata5.normalizeinput()
        
    cvacc = {}
    cvacc['Iris'] = cv10foldresult(mldata)
    cvacc['Cars'] = cv10foldresult(mldata1)
    cvacc['Mushrm'] = cv10foldresult(mldata2)
    cvacc['Voting'] = cv10foldresult(mldata3)
    cvacc['Heart'] = cv10foldresult(mldata4)
    cvacc['Wine'] = cv10foldresult(mldata5)
    
    for i in cvacc:
        print i, ":", cvacc[i]
示例#2
0
def crossvalidate():
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    #normaldata = mldata.normalizeinput()
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4, True)
    #normaldata4 = mldata4.normalizeinput()
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    #normaldata5 = mldata5.normalizeinput()

    cvacc = {}
    cvacc['Iris'] = cv10foldresult(mldata)
    cvacc['Cars'] = cv10foldresult(mldata1)
    cvacc['Mushrm'] = cv10foldresult(mldata2)
    cvacc['Voting'] = cv10foldresult(mldata3)
    cvacc['Heart'] = cv10foldresult(mldata4)
    cvacc['Wine'] = cv10foldresult(mldata5)

    for i in cvacc:
        print i, ":", cvacc[i]
示例#3
0
def crossvalidate():
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    normaldata = mldata.normalizeinput()
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    #path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.switzerland.data'
    #mldata4 = loadheartdisease(path4, True, True)
    mldata4 = loadheartdisease(path4, True)
    normaldata4 = mldata4.normalizeinput()
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    normaldata5 = mldata5.normalizeinput()

    cvacc = {}
    """
    cvacc['Iris'] = cv10foldresult(normaldata, [3], learn_rate=0.05, epoch=700, stop_criterion=50)
    cvacc['Cars'] = cv10foldresult(mldata1, [5], learn_rate=0.01, epoch=1000, stop_criterion=100)
    cvacc['Mushrm'] = cv10foldresult(mldata2, [5], learn_rate=0.05, epoch=50, stop_criterion=4)
    cvacc['Voting'] = cv10foldresult(mldata3, [5], learn_rate=0.01, epoch=500, stop_criterion=100)
    cvacc['Heart'] = cv10foldresult(normaldata4, [10, 5], learn_rate=0.01, epoch=1000, stop_criterion=100)
    cvacc['Wine'] = cv10foldresult(normaldata5, [5], learn_rate=0.005, epoch=500, stop_criterion=50)
    """

    #cvacc['Iris'] = cv10foldresult(normaldata, None, learn_rate=0.05, epoch=700, stop_criterion=50)
    cvacc['Cars'] = cv10foldresult(mldata1,
                                   None,
                                   learn_rate=0.01,
                                   epoch=1000,
                                   stop_criterion=50)
    cvacc['Mushrm'] = cv10foldresult(mldata2,
                                     None,
                                     learn_rate=0.08,
                                     epoch=50,
                                     stop_criterion=2)
    #cvacc['Voting'] = cv10foldresult(mldata3, None, learn_rate=0.05, epoch=500, stop_criterion=100)
    #cvacc['Heart'] = cv10foldresult(normaldata4, None, learn_rate=0.01, epoch=1000, stop_criterion=100)
    #cvacc['Wine'] = cv10foldresult(normaldata5, None, learn_rate=0.005, epoch=500, stop_criterion=50)

    for i in cvacc:
        print i, ":", cvacc[i]
    f = open("nn_cv", 'w')
    pickle.dump(cvacc, f)
    f.close()
def crossvalidate():
    
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    normaldata = mldata.normalizeinput()
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    #path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.switzerland.data'
    #mldata4 = loadheartdisease(path4, True, True)
    mldata4 = loadheartdisease(path4, True)
    normaldata4 = mldata4.normalizeinput()
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    normaldata5 = mldata5.normalizeinput()
        
    cvacc = {}
    """
    cvacc['Iris'] = cv10foldresult(normaldata, [3], learn_rate=0.05, epoch=700, stop_criterion=50)
    cvacc['Cars'] = cv10foldresult(mldata1, [5], learn_rate=0.01, epoch=1000, stop_criterion=100)
    cvacc['Mushrm'] = cv10foldresult(mldata2, [5], learn_rate=0.05, epoch=50, stop_criterion=4)
    cvacc['Voting'] = cv10foldresult(mldata3, [5], learn_rate=0.01, epoch=500, stop_criterion=100)
    cvacc['Heart'] = cv10foldresult(normaldata4, [10, 5], learn_rate=0.01, epoch=1000, stop_criterion=100)
    cvacc['Wine'] = cv10foldresult(normaldata5, [5], learn_rate=0.005, epoch=500, stop_criterion=50)
    """
    
    #cvacc['Iris'] = cv10foldresult(normaldata, None, learn_rate=0.05, epoch=700, stop_criterion=50)
    cvacc['Cars'] = cv10foldresult(mldata1, None, learn_rate=0.01, epoch=1000, stop_criterion=50)
    cvacc['Mushrm'] = cv10foldresult(mldata2, None, learn_rate=0.08, epoch=50, stop_criterion=2)
    #cvacc['Voting'] = cv10foldresult(mldata3, None, learn_rate=0.05, epoch=500, stop_criterion=100)
    #cvacc['Heart'] = cv10foldresult(normaldata4, None, learn_rate=0.01, epoch=1000, stop_criterion=100)
    #cvacc['Wine'] = cv10foldresult(normaldata5, None, learn_rate=0.005, epoch=500, stop_criterion=50)
    
    for i in cvacc:
        print i, ":", cvacc[i]
    f = open("nn_cv", 'w')
    pickle.dump(cvacc, f)
    f.close()
def main(argv=None):
    
    
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    normaldata = mldata.normalizeinput()
    sampledmldata = normaldata.sampledata({'train':70, 'test':15, 'cv':15})
    #knn = NearestNeighbor(3, sampledmldata['train'])
    #knn = NearestNeighbor(3, sampledmldata['train'], False, True, sampledmldata['cv'])
    knn = NearestNeighbor(3, sampledmldata['train'], True)
    knn.predict(sampledmldata['test'])
    #knn.predict(sampledmldata['test'])
    #knn.predictdistweighted(sampledmldata['test'])
    #bestkexperiment('iris', normaldata)
    
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    sampledmldata1 = mldata1.sampledata({'train':70, 'test':15, 'cv':15})
    #knn1 = NearestNeighbor(5, sampledmldata1['train'])
    #knn1 = NearestNeighbor(5, sampledmldata1['train'], False, True, sampledmldata1['cv'])
    knn1 = NearestNeighbor(5, sampledmldata1['train'], True)
    knn1.predict(sampledmldata1['test'])
    #bestkexperiment('cars', mldata1)
    
    
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    sampledmldata2 = mldata2.sampledata({'train':65, 'test':15, 'cv':20})
    #knn2 = NearestNeighbor(1, sampledmldata2['train'])
    #knn2 = NearestNeighbor(1, sampledmldata2['train'], False, True, sampledmldata2['cv'])
    knn2 = NearestNeighbor(1, sampledmldata2['train'], True)
    knn2.predict(sampledmldata2['test'])
    #bestkexperiment('mushroom', mldata2)
    
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    sampledmldata3 = mldata3.sampledata({'train':70, 'test':15, 'cv':15})
    #knn3 = NearestNeighbor(1, sampledmldata3['train'])
    #knn3 = NearestNeighbor(1, sampledmldata3['train'], False, True, sampledmldata3['cv'])
    knn3 = NearestNeighbor(5, sampledmldata3['train'], True)
    knn3.predict(sampledmldata3['test'])
    #bestkexperiment('voting', mldata3)
    
    
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4, True)
    normaldata4 = mldata4.normalizeinput()
    sampledmldata4 = normaldata4.sampledata({'train':70, 'test':15, 'cv':15})
    #knn4 = NearestNeighbor(5, sampledmldata4['train'])
    #knn4 = NearestNeighbor(5, sampledmldata4['train'], False, True, sampledmldata4['cv'])
    knn4 = NearestNeighbor(5, sampledmldata4['train'], True)
    knn4.predict(sampledmldata4['test'])
    #bestkexperiment('heart disease', normaldata4)
    
    
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    normaldata5 = mldata5.normalizeinput()
    sampledmldata5 = normaldata5.sampledata({'train':70, 'test':15, 'cv':15})
    #knn5 = NearestNeighbor(3, sampledmldata5['train'])
    #knn5 = NearestNeighbor(3, sampledmldata5['train'], False, True, sampledmldata5['cv'])
    knn5 = NearestNeighbor(5, sampledmldata5['train'], True)
    knn5.predict(sampledmldata5['test'])
def main(argv=None):
    if argv is None:
        argv = sys.argv

    #toy dataset
    #mldata = loadtennis()
    #sampledmldata = mldata.sampledata({'train':80, 'test':20, 'cv':0})
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    sampledmldata = mldata.sampledata({'train': 70, 'test': 10, 'cv': 20})
    dt = dtree(sampledmldata['train'])
    dt.generatetree(dt.root)
    print "before pruning...."
    dt.printtree()
    dt.testtree(sampledmldata['test'], False, True)
    #print "after pruning...."
    #besttree = dt.prune(2, sampledmldata['cv'], True)
    #besttree.testtree(sampledmldata['test'], False, True)
    #dt.prune(1, sampledmldata['cv'], True)
    #dt.testtree(sampledmldata['test'], False, True)
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    sampledmldata1 = mldata1.sampledata({'train': 70, 'test': 10, 'cv': 20})
    dt1 = dtree(sampledmldata1['train'])
    dt1.generatetree(dt1.root)
    dt1.printtree()
    dt1.testtree(sampledmldata1['test'], False, True)
    #print "after pruning...."
    #besttree1 = dt1.prune(2, sampledmldata1['cv'], True)
    #besttree1.testtree(sampledmldata1['test'], False, True)
    #dt1.prune(1, sampledmldata1['cv'], True)
    #dt1.testtree(sampledmldata1['test'], False, True)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    sampledmldata2 = mldata2.sampledata({'train': 70, 'test': 10, 'cv': 20})
    dt2 = dtree(sampledmldata2['train'])
    dt2.generatetree(dt2.root)
    print "before pruning...."
    dt2.printtree()
    dt2.testtree(sampledmldata2['test'], False, True)
    #print "after pruning...."
    #besttree2 = dt2.prune(2, sampledmldata2['cv'], True)
    #besttree2.testtree(sampledmldata2['test'], False, True)
    #dt2.prune(1, sampledmldata2['cv'], True)
    #dt2.testtree(sampledmldata2['test'], False, True)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    sampledmldata3 = mldata3.sampledata({'train': 70, 'test': 10, 'cv': 20})
    dt3 = dtree(sampledmldata3['train'])
    dt3.generatetree(dt3.root)
    print "before pruning...."
    dt3.printtree()
    dt3.testtree(sampledmldata3['test'], False, True)
    #print "after pruning...."
    #besttree3 = dt3.prune(2, sampledmldata3['cv'], True)
    #besttree3.testtree(sampledmldata3['test'], False, True)
    #dt3.prune(1, sampledmldata3['cv'], True)
    #dt3.testtree(sampledmldata3['test'], False, True)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4)
    sampledmldata4 = mldata4.sampledata({'train': 70, 'test': 10, 'cv': 20})
    dt4 = dtree(sampledmldata4['train'])
    dt4.generatetree(dt4.root)
    print "before pruning...."
    dt4.printtree()
    dt4.testtree(sampledmldata4['test'], False, True)
    #print "after pruning...."
    #besttree4 = dt4.prune(2, sampledmldata4['cv'], True)
    #besttree4.testtree(sampledmldata4['test'], False, True)
    #dt4.prune(1, sampledmldata4['cv'], True)
    #dt4.testtree(sampledmldata4['test'], False, True)

    return 0
示例#7
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    
    #toy dataset
    #mldata = loadtennis()
    #sampledmldata = mldata.sampledata({'train':80, 'test':20, 'cv':0})
    
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    sampledmldata = mldata.sampledata({'train':70, 'test':10, 'cv':20})
    dt = dtree(sampledmldata['train'])
    dt.generatetree(dt.root)
    print "before pruning...."
    dt.printtree()
    dt.testtree(sampledmldata['test'], False, True)
    #print "after pruning...."
    #besttree = dt.prune(2, sampledmldata['cv'], True)
    #besttree.testtree(sampledmldata['test'], False, True)
    #dt.prune(1, sampledmldata['cv'], True)
    #dt.testtree(sampledmldata['test'], False, True)
    
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    sampledmldata1 = mldata1.sampledata({'train':70, 'test':10, 'cv':20})
    dt1 = dtree(sampledmldata1['train'])
    dt1.generatetree(dt1.root)
    dt1.printtree()
    dt1.testtree(sampledmldata1['test'], False, True)
    #print "after pruning...."
    #besttree1 = dt1.prune(2, sampledmldata1['cv'], True)
    #besttree1.testtree(sampledmldata1['test'], False, True)
    #dt1.prune(1, sampledmldata1['cv'], True)
    #dt1.testtree(sampledmldata1['test'], False, True)
    
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    sampledmldata2 = mldata2.sampledata({'train':70, 'test':10, 'cv':20})
    dt2 = dtree(sampledmldata2['train'])
    dt2.generatetree(dt2.root)
    print "before pruning...."
    dt2.printtree()
    dt2.testtree(sampledmldata2['test'], False, True)
    #print "after pruning...."
    #besttree2 = dt2.prune(2, sampledmldata2['cv'], True)
    #besttree2.testtree(sampledmldata2['test'], False, True)
    #dt2.prune(1, sampledmldata2['cv'], True)
    #dt2.testtree(sampledmldata2['test'], False, True)
    
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    sampledmldata3 = mldata3.sampledata({'train':70, 'test':10, 'cv':20})
    dt3 = dtree(sampledmldata3['train'])
    dt3.generatetree(dt3.root)
    print "before pruning...."
    dt3.printtree()
    dt3.testtree(sampledmldata3['test'], False, True)
    #print "after pruning...."
    #besttree3 = dt3.prune(2, sampledmldata3['cv'], True)
    #besttree3.testtree(sampledmldata3['test'], False, True)
    #dt3.prune(1, sampledmldata3['cv'], True)
    #dt3.testtree(sampledmldata3['test'], False, True)
    
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4)
    sampledmldata4 = mldata4.sampledata({'train':70, 'test':10, 'cv':20})
    dt4 = dtree(sampledmldata4['train'])
    dt4.generatetree(dt4.root)
    print "before pruning...."
    dt4.printtree()
    dt4.testtree(sampledmldata4['test'], False, True)
    #print "after pruning...."
    #besttree4 = dt4.prune(2, sampledmldata4['cv'], True)
    #besttree4.testtree(sampledmldata4['test'], False, True)
    #dt4.prune(1, sampledmldata4['cv'], True)
    #dt4.testtree(sampledmldata4['test'], False, True)
    
    return 0
示例#8
0
def main(argv=None):
    ''' Iris dataset '''
    path = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/iris/iris.data'
    mldata = loadiris(path)
    normaldata = mldata.normalizeinput()
    sampledmldata = normaldata.sampledata({'train': 70, 'test': 15, 'cv': 15})
    #knn = NearestNeighbor(3, sampledmldata['train'])
    #knn = NearestNeighbor(3, sampledmldata['train'], False, True, sampledmldata['cv'])
    knn = NearestNeighbor(3, sampledmldata['train'], True)
    knn.predict(sampledmldata['test'])
    #knn.predict(sampledmldata['test'])
    #knn.predictdistweighted(sampledmldata['test'])
    #bestkexperiment('iris', normaldata)
    ''' Cars dataset '''
    path1 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/cars/car.data'
    mldata1 = loadcars(path1)
    sampledmldata1 = mldata1.sampledata({'train': 70, 'test': 15, 'cv': 15})
    #knn1 = NearestNeighbor(5, sampledmldata1['train'])
    #knn1 = NearestNeighbor(5, sampledmldata1['train'], False, True, sampledmldata1['cv'])
    knn1 = NearestNeighbor(5, sampledmldata1['train'], True)
    knn1.predict(sampledmldata1['test'])
    #bestkexperiment('cars', mldata1)
    ''' Mushroom dataset '''
    path2 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/mushroom/agaricus-lepiota.data'
    mldata2 = loadmushroom(path2)
    sampledmldata2 = mldata2.sampledata({'train': 65, 'test': 15, 'cv': 20})
    #knn2 = NearestNeighbor(1, sampledmldata2['train'])
    #knn2 = NearestNeighbor(1, sampledmldata2['train'], False, True, sampledmldata2['cv'])
    knn2 = NearestNeighbor(1, sampledmldata2['train'], True)
    knn2.predict(sampledmldata2['test'])
    #bestkexperiment('mushroom', mldata2)
    ''' Voting dataset '''
    path3 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/voting/house-votes-84.data'
    mldata3 = loadvoting(path3)
    sampledmldata3 = mldata3.sampledata({'train': 70, 'test': 15, 'cv': 15})
    #knn3 = NearestNeighbor(1, sampledmldata3['train'])
    #knn3 = NearestNeighbor(1, sampledmldata3['train'], False, True, sampledmldata3['cv'])
    knn3 = NearestNeighbor(5, sampledmldata3['train'], True)
    knn3.predict(sampledmldata3['test'])
    #bestkexperiment('voting', mldata3)
    ''' Heart Disease dataset '''
    path4 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/heart_disease/processed.cleveland.data'
    mldata4 = loadheartdisease(path4, True)
    normaldata4 = mldata4.normalizeinput()
    sampledmldata4 = normaldata4.sampledata({
        'train': 70,
        'test': 15,
        'cv': 15
    })
    #knn4 = NearestNeighbor(5, sampledmldata4['train'])
    #knn4 = NearestNeighbor(5, sampledmldata4['train'], False, True, sampledmldata4['cv'])
    knn4 = NearestNeighbor(5, sampledmldata4['train'], True)
    knn4.predict(sampledmldata4['test'])
    #bestkexperiment('heart disease', normaldata4)
    ''' Wine dataset '''
    path5 = '/host/Users/vandana/Documents/Grad_Studies/Spring2012/Machine Learning/Projects/wine/wine.data'
    mldata5 = loadwine(path5)
    normaldata5 = mldata5.normalizeinput()
    sampledmldata5 = normaldata5.sampledata({
        'train': 70,
        'test': 15,
        'cv': 15
    })
    #knn5 = NearestNeighbor(3, sampledmldata5['train'])
    #knn5 = NearestNeighbor(3, sampledmldata5['train'], False, True, sampledmldata5['cv'])
    knn5 = NearestNeighbor(5, sampledmldata5['train'], True)
    knn5.predict(sampledmldata5['test'])