def train(self, X, Y): # Get relevant data idx = Y==self.i jdx = Y==self.j ijdx = np.logical_or(idx, jdx) x = X[ijdx] y = Y[ijdx] # Project y onto {-1, +1} y[y==self.i] = -1.0 y[y==self.j] = 1.0 self.Y = y self.G = self.G[ijdx] G = self.G.T[ijdx].T # Train SMO self.a, self.b = train(x, y, G, C=1e-5, tol=1e-2, eps=1e-3)
def proc((c,code)): print c,code coded_labels = train_labels.copy() # Create the code-mapped label vector for i, label in enumerate(train_labels): coded_labels[i] = code[int(label)] # SVM for this code w, b = train(train_data, coded_labels, C=0.00001, tol=0.00001, eps=1e-2) # Hypothesis h_train = hypothesis(train_data, w, b) h_test = hypothesis(test_data, w, b) return c, h_train, h_test
data_file = "../data/spambase/spambase.data" dmat = [] f = open(data_file, "r") for line in f: x = line.split(',') x = [float(e) for e in x] dmat.append(x) data = np.array(dmat) # k-folds xvalidation k = 10 kfolder = KFolder(data, k, standard=True, shuffle=True) for i in range(k-1): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.testing(i+1) # Get the testing data Xi,Yi = kfolder.testing(i) Yi[Yi==0] = -1.0 # Train Y[Y==0] = -1.0 G, Gi = gram(X), tgram(X, Xi) a, b = train(X, Y.ravel(), G, C=1e-4, tol=1e-4, eps=1e-3) # Test print "Training accuracy:", test(Y, Y, G, a, b) print "Testing accuracy:", test(Y, Yi, Gi, a, b)