def cross_vad(examples, num_folds = 10): data = ut.dataCrossSplit(examples, num_folds, False) errorRates = [] for i in range(num_folds): egs = data[i] dt = DTree(SelectAtt) dt.training(egs[0], 1) # calculate error rate error = [0.] * 2 for j in range(2): for x in egs[j]: if dt.predict(x) != x[0]: error[j] = error[j] + 1 error[j] = error[j] / len(egs[j]) print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1] errorRates.append(error) arr = np.array(errorRates) print "Train Mean ErrorRate:", np.mean(arr[:,0]), " Test Mean ErrorRate:", np.mean(arr[:,1]) print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:,0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1])) if __name__ == '__main__': filename = sys.argv[1] egs = ut.importRawData(filename) egs = ut.preprocess(egs) cross_vad(egs) dt = DTree(SelectAtt) dt.training(egs) print "========= the decision tree ============" dt.printTree()
for k in range(1, len(crossData[i][1]) + 1): for x in crossData[i][1][k]: totNum = totNum + 1 if cl.classify(x) != k: totError = totError + 1 return (totError + 0.) / totNum # check input if len(sys.argv) != 3: print "input error" exit() fileName = sys.argv[1] if fileName == "Iris.csv": subDim = 3 else: subDim = 9 num_cross = int(sys.argv[2]) # import data # since the date in the file are in order # need to make it random to run cross validation rawData = ut.makeDataRandom(ut.importRawData(fileName)) rawData = ut.importRawData(fileName) trainData = ut.dataCrossSplit(rawData, num_cross) #print tmp[1] print "Data: ", fileName print "Error rate for cross_validation:", cross_validation(trainData, subDim)
# -*- coding: utf-8 -*- """ Created on Wed Nov 20 22:55:53 2013 @author: jiecaoc """ import utilities as ut import classes as cls egs = ut.importRawData('Ionosphere.csv') raw = ut.dataCrossSplit(egs, 2, False) dt = cls.DTree(cls.SelectAtt) dt.training(raw[0][0]) c = 0 for eg in raw[0][1]: if eg[0] != dt.predict(eg): # print eg[0], dt.predict(eg) c = c + 1 print (c + .0) / len(egs)
w1 = ut.ls2Vec([0.5] * phi.shape[1]) delta = 1 while delta > 0.5: w0 = w1 pi = Pi(w0, phi) R = calcR(pi) tmp = reduce(np.dot, [phi.transpose(), R, phi]) z = np.dot(phi, w0) - np.dot(np.linalg.inv(R), (pi - y)) w1 = reduce(np.dot, [np.linalg.inv(tmp), phi.transpose(), R, z]) delta = np.linalg.norm(np.dot(phi.transpose(), (y - pi))) print delta return w1 # import data data = ut.importRawData('Pima.csv') k = 100 y = ut.ls2Vec(map(lambda x: x[0] - 1 , data[1:k])) phi = np.array(map(lambda x: x[1:], data[1:k])) #print phi w = calculateW(phi, y) #print calcR(Pi(w, phi)) #pi = Pi(w, phi) #print np.linalg.det(np.dot(phi.transpose(), phi)) # w = reduce(np.dot, [np.linalg.inv(np.dot(phi.transpose(), phi)) , phi.transpose(), y]) print w for x in data[1:k]: print x[0] - 1, sigma( np.dot(w.transpose(), ut.ls2Vec(x[1:]))) #print np.dot(phi.transpose(), (y - pi))