def greedyRLS(XPath, yPath, metaPath, fcount=5, scount=50, resultPath=None): X, Y = readAuto(XPath, yPath) meta = {} if metaPath != None: print "Loading metadata from", metaPath meta = result.getMeta(metaPath) X_train, X_hidden, Y_train, Y_hidden = hidden.split(X, Y, meta=meta) #if "classes" in meta: # print "Class distribution = ", getClassDistribution(y) #logrps = range(15, 25) logrps = range(15, 26) print "Training RLS" loopCount = 1 best_perf = -1 best_logrp = None best_scount = None for logrp in logrps: kf = KFold(len(Y_train), n_folds=fcount, indices=True, shuffle=True, random_state=77) for train, test in kf: perfs = [] print "------------ Processing fold", str(loopCount) + "/" + str(fcount), "------------" kwargs = {} kwargs['train_features'] = X_train[train] kwargs['train_labels'] = Y_train[train] kwargs['subsetsize'] = scount kwargs['regparam'] = 2.**logrp kwargs['bias'] = 1 cb = CallbackFunction(X_train[test], Y_train[test]) kwargs['callback_obj'] = cb rls = GreedyRLS.createLearner(**kwargs) rls.train() perfs.append(cb.perfs) loopCount += 1 print "---------------------------------------------------" perfs = np.mean(perfs, axis=0) perf = np.max(perfs) perf = perfs[-1] sc = np.argmax(perfs)+1 print "%f AUC, %d logrp, %d selected" %(perf, logrp, sc) if perf>best_perf: best_perf = perf best_logrp = logrp best_scount = sc kwargs = {} kwargs['train_features'] = X_train kwargs['train_labels'] = Y_train kwargs['subsetsize'] = scount kwargs['regparam'] = 2.**best_logrp kwargs['bias'] = 1 cb = CallbackFunction(X_hidden, Y_hidden) kwargs['callback_obj'] = cb rls = GreedyRLS.createLearner(**kwargs) rls.train() perfs = cb.perfs selected = rls.selected model = rls.getModel() #if resultPath != None: # saveResults(meta, resultPath, perfs, selected) return model, perfs, selected, best_logrp, best_scount
def test_compare(self): for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: #No bias greedy_rls = GreedyRLS(X, Y, subsetsize=10, regparam=12, bias=0.) selected = greedy_rls.selected s_complement = list( set(range(X.shape[1])).difference(selected)) X_cut = X[:, selected] rls = RLS(X_cut, Y, regparam=12., bias=0.) W = greedy_rls.predictor.W[selected] W2 = rls.predictor.W assert_allclose(W, W2) assert_array_equal(greedy_rls.predictor.W[s_complement], 0) assert_array_equal(greedy_rls.predictor.b, 0) #Bias greedy_rls = GreedyRLS(X, Y, subsetsize=10, regparam=12, bias=2.) selected = greedy_rls.selected X_cut = X[:, selected] rls = RLS(X_cut, Y, regparam=12., bias=2.) W = greedy_rls.predictor.W[selected] W2 = rls.predictor.W assert_allclose(W, W2) assert_allclose(greedy_rls.predictor.b, rls.predictor.b)
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() cb = Callback(X_test, Y_test) learner = GreedyRLS(X_train, Y_train, 13, callbackfun = cb) #Test set predictions P_test = learner.predict(X_test) print("test error %f" %sqerror(Y_test, P_test)) print("Selected features " +str(learner.selected))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #we select 5 features learner = GreedyRLS(X_train, Y_train, 5) #Test set predictions P_test = learner.predict(X_test) print("test error %f" %sqerror(Y_test, P_test)) print("Selected features " +str(learner.selected))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() cb = Callback(X_test, Y_test) learner = GreedyRLS(X_train, Y_train, 13, callbackfun=cb) #Test set predictions P_test = learner.predict(X_test) print("test error %f" % sqerror(Y_test, P_test)) print("Selected features " + str(learner.selected))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = GreedyRLS(X_train, Y_train, 5) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor w = predictor.W b = predictor.b print("number of coefficients %d" %len(w)) print("w-coefficients " +str(w)) print("bias term %f" %b)
def speedtest(): tsize, fsize = 3000, 3000 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) bias = 2. rp = 1. ylen = 2 Y = mat(random.rand(tsize, ylen), dtype=float64) rpool = {} class TestCallback(object): def callback(self, learner): print('round') def finished(self, learner): pass tcb = TestCallback() rpool['callback'] = tcb rpool['X'] = Xtrain.T rpool['Y'] = Y rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS(**rpool) print(grls.selected) print(grls.A[grls.selected]) print(grls.b)
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = GreedyRLS(X_train, Y_train, 5) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor w = predictor.W b = predictor.b print("number of coefficients %d" % len(w)) print("w-coefficients " + str(w)) print("bias term %f" % b)
def speedtest(): tsize, fsize = 3000, 3000 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) bias = 2. rp = 1. bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain,bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] #print 'GreedyRLS', learner.looperf.T print 'round' tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def core_greedyrls(X, y, regparam, scount): cb = Callback() learner = GreedyRLS(X, y, scount, regparam=regparam, callbackfun=cb, bias=0.) selected = learner.selected return selected
def train_rls(): mndata = MNIST("./data") X_train, Y_train = mndata.load_training() X_test, Y_test = mndata.load_testing() X_train, X_test = np.array(X_train), np.array(X_test) #One-vs-all mapping Y_train = ova(Y_train) Y_test = ova(Y_test) #Train greedy RLS, select 10 features cb = Callback(X_test, Y_test) learner = GreedyRLS(X_train, Y_train, 50, callbackfun=cb) print("Selected features " + str(learner.selected))
def speedtest(): tsize, fsize = 3000, 3000 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) bias = 2. rp = 1. bias_slice = sqrt(bias) * mat(ones((1, Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain, bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] #print 'GreedyRLS', learner.looperf.T print 'round' tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def testRLS(self): print("\n\n\n\nTesting the correctness of the GreedyRLS module.\n\n") tsize, fsize = 10, 30 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) bias = 2. bias_slice = sqrt(bias) * mat(ones( (1, Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain, bias_slice]) ylen = 2 Y = mat(random.rand(tsize, ylen), dtype=float64) selected = [] rp = 1. currentfcount = 0 while currentfcount < desiredfcount: selected_plus_bias = selected + [fsize] bestlooperf = 9999999999. for ci in range(fsize): if ci in selected_plus_bias: continue updK = Xtrain_biased[selected_plus_bias + [ci]].T * Xtrain_biased[selected_plus_bias + [ci]] looperf = 0. for hi in range(tsize): hoinds = list(range(0, hi)) + list(range(hi + 1, tsize)) updcutK = updK[ix_(hoinds, hoinds)] updcrossK = updK[ix_([hi], hoinds)] loopred = updcrossK * la.inv( updcutK + rp * mat(eye(tsize - 1))) * Y[hoinds] looperf += mean( multiply((loopred - Y[hi]), (loopred - Y[hi]))) if looperf < bestlooperf: bestcind = ci bestlooperf = looperf print('Tester ', ci, looperf) selected.append(bestcind) print('Tester ', selected) currentfcount += 1 selected_plus_bias = selected + [fsize] K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[ selected_plus_bias] G = la.inv(K + rp * mat(eye(tsize))) A = Xtrain_biased[selected_plus_bias] * G * Y print('Tester ', A) rpool = {} class TestCallback(object): def callback(self, learner): print('GreedyRLS', learner.looperf.T) pass def finished(self, learner): pass tcb = TestCallback() rpool['callback'] = tcb rpool['X'] = Xtrain.T rpool['Y'] = Y rpool['subsetsize'] = desiredfcount rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS(**rpool) assert_array_equal(selected, grls.selected) assert_allclose(A[:-1], grls.A[selected]) assert_allclose(np.sqrt(bias) * A[-1], grls.b)
def testRLS(self): print print print print print "Testing the correctness of the GreedyRLS module." print print floattype = float64 #m, n = 10, 30 tsize, fsize = 10, 30 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) #print Xtrain bias = 2. bias_slice = sqrt(bias) * mat(ones( (1, Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain, bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8) #save("bar",Y) #print Y #for i in range(tsize): # if Y[i,0] < 0.5: Y[i,0] = -1. # else: Y[i,0] = 1. selected = [] rp = 1. currentfcount = 0 while currentfcount < desiredfcount: selected_plus_bias = selected + [fsize] bestlooperf = 9999999999. K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[ selected_plus_bias] #+ mat(ones((tsize,tsize))) for ci in range(fsize): if ci in selected_plus_bias: continue cv = Xtrain_biased[ci] updK = Xtrain_biased[selected_plus_bias + [ci]].T * Xtrain_biased[ selected_plus_bias + [ci]] #+ mat(ones((tsize,tsize))) #print 1. / diag(updG) looperf = 0. #''' for hi in range(tsize): hoinds = range(0, hi) + range(hi + 1, tsize) updcutK = updK[ix_(hoinds, hoinds)] updcrossK = updK[ix_([hi], hoinds)] loopred = updcrossK * la.inv( updcutK + rp * mat(eye(tsize - 1))) * Y[hoinds] looperf += mean( multiply((loopred - Y[hi]), (loopred - Y[hi]))) ''' loodiff = zeros((tsize, ylen)) updG = la.inv(updK+rp * mat(eye(tsize))) for hi in range(tsize): updcrossK = updK[hi] loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!! looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) loodiff[hi] = loopred - Y[hi] print loodiff.T''' if looperf < bestlooperf: bestcind = ci bestlooperf = looperf print 'Tester ', ci, looperf selected.append(bestcind) print 'Tester ', selected currentfcount += 1 selected_plus_bias = selected + [fsize] K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[ selected_plus_bias] G = la.inv(K + rp * mat(eye(tsize))) A = Xtrain_biased[selected_plus_bias] * G * Y print 'Tester ', A #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] print 'GreedyRLS', learner.looperf.T pass tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T] #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]] rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) #grls = MTGreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def testRLS(self): print print print print print "Testing the correctness of the GreedyRLS module." print print floattype = float64 #m, n = 10, 30 tsize, fsize = 10, 30 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) #print Xtrain bias = 2. bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain,bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8) #save("bar",Y) #print Y #for i in range(tsize): # if Y[i,0] < 0.5: Y[i,0] = -1. # else: Y[i,0] = 1. selected = [] rp = 1. currentfcount=0 while currentfcount < desiredfcount: selected_plus_bias = selected + [fsize] bestlooperf = 9999999999. K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias] #+ mat(ones((tsize,tsize))) for ci in range(fsize): if ci in selected_plus_bias: continue cv = Xtrain_biased[ci] updK = Xtrain_biased[selected_plus_bias+[ci]].T*Xtrain_biased[selected_plus_bias+[ci]] #+ mat(ones((tsize,tsize))) #print 1. / diag(updG) looperf = 0. #''' for hi in range(tsize): hoinds = range(0, hi) + range(hi + 1, tsize) updcutK = updK[ix_(hoinds, hoinds)] updcrossK = updK[ix_([hi], hoinds)] loopred = updcrossK * la.inv(updcutK + rp * mat(eye(tsize-1))) * Y[hoinds] looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) ''' loodiff = zeros((tsize, ylen)) updG = la.inv(updK+rp * mat(eye(tsize))) for hi in range(tsize): updcrossK = updK[hi] loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!! looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) loodiff[hi] = loopred - Y[hi] print loodiff.T''' if looperf < bestlooperf: bestcind = ci bestlooperf = looperf print 'Tester ', ci, looperf selected.append(bestcind) print 'Tester ', selected currentfcount += 1 selected_plus_bias = selected + [fsize] K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias] G = la.inv(K+rp * mat(eye(tsize))) A = Xtrain_biased[selected_plus_bias]*G*Y print 'Tester ', A #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] print 'GreedyRLS', learner.looperf.T pass tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T] #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]] rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) #grls = MTGreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b