Пример #1
0
def cross_vad(examples, num_folds = 10):
    data = ut.dataCrossSplit(examples, num_folds, False)
    errorRates = []
    for i in range(num_folds):
        egs = data[i]
        dt = DTree(SelectAtt)
        dt.training(egs[0], 1)
        # calculate error rate
        error = [0.] * 2
        for j in range(2):
            for x in egs[j]:
                if dt.predict(x) != x[0]:
                    error[j] = error[j] + 1
            error[j] = error[j] / len(egs[j])
        print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1]
        errorRates.append(error)
    arr = np.array(errorRates)
    print "Train Mean ErrorRate:", np.mean(arr[:,0]), " Test Mean ErrorRate:", np.mean(arr[:,1])
    print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:,0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))

if __name__ == '__main__':
    filename = sys.argv[1]
    egs = ut.importRawData(filename)
    egs = ut.preprocess(egs)
    cross_vad(egs)
    dt = DTree(SelectAtt)
    dt.training(egs)
    print "========= the decision tree ============"
    dt.printTree()
Пример #2
0
        for k in range(1, len(crossData[i][1]) + 1):
            for x in crossData[i][1][k]:
                totNum = totNum + 1
                if cl.classify(x) != k:
                    totError = totError + 1
    return (totError + 0.) / totNum
            

# check input
if len(sys.argv) != 3:  
    print "input error"
    exit()
fileName = sys.argv[1]
if fileName == "Iris.csv":
    subDim = 3
else:
    subDim = 9
num_cross = int(sys.argv[2])
# import data
# since the date in the file are in order
# need to make it random to run cross validation
rawData = ut.makeDataRandom(ut.importRawData(fileName))
rawData = ut.importRawData(fileName)
trainData = ut.dataCrossSplit(rawData, num_cross)



#print tmp[1]
print "Data: ", fileName
print "Error rate for cross_validation:", cross_validation(trainData, subDim)
Пример #3
0
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 20 22:55:53 2013

@author: jiecaoc
"""

import utilities as ut
import classes as cls

egs = ut.importRawData('Ionosphere.csv')


raw = ut.dataCrossSplit(egs, 2, False)
dt = cls.DTree(cls.SelectAtt)
dt.training(raw[0][0])
c = 0
for eg in raw[0][1]:
    if eg[0] != dt.predict(eg):
        # print eg[0], dt.predict(eg)
        c = c + 1
print (c + .0) / len(egs)
Пример #4
0
    w1 = ut.ls2Vec([0.5] * phi.shape[1])
    delta = 1
    while delta > 0.5:
        w0 = w1
        pi = Pi(w0, phi)
        R = calcR(pi)
        tmp = reduce(np.dot, [phi.transpose(), R, phi])
        z = np.dot(phi, w0) - np.dot(np.linalg.inv(R), (pi - y))
        w1 = reduce(np.dot, [np.linalg.inv(tmp), phi.transpose(), R, z])
        delta = np.linalg.norm(np.dot(phi.transpose(), (y - pi)))
        print delta
    return w1
       
    
# import data

data = ut.importRawData('Pima.csv')
k = 100
y = ut.ls2Vec(map(lambda x: x[0] - 1 , data[1:k]))

phi = np.array(map(lambda x: x[1:], data[1:k]))
#print phi
w = calculateW(phi, y)
#print calcR(Pi(w, phi))
#pi = Pi(w, phi)
#print np.linalg.det(np.dot(phi.transpose(), phi))
# w = reduce(np.dot, [np.linalg.inv(np.dot(phi.transpose(), phi)) , phi.transpose(), y])
print w
for x in data[1:k]:
   print x[0] - 1, sigma( np.dot(w.transpose(), ut.ls2Vec(x[1:])))
#print np.dot(phi.transpose(), (y - pi))