def test_kernel(self): #tests that learning with kernels works for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: m = X.shape[0] qids, L = generate_qids(m) #Basic case dual_rls = QueryRankRLS(X, Y, qids, kernel= "GaussianKernel", regparam=5.0, gamma=0.01) kernel = GaussianKernel(X, gamma = 0.01) K = kernel.getKM(X) m = K.shape[0] A = dual_rls.predictor.A A2 = np.linalg.solve(np.dot(L, K) +5.0*np.eye(m), np.dot(L, Y) ) assert_allclose(A, A2) #Fast regularization dual_rls.solve(1000) A = dual_rls.predictor.A A2 = np.linalg.solve(np.dot(L, K) + 1000 * np.eye(m), np.dot(L, Y)) assert_allclose(A, A2) #Precomputed kernel dual_rls = QueryRankRLS(K, Y, qids, kernel="PrecomputedKernel", regparam = 1000) assert_allclose(dual_rls.predictor.W, A2) #Reduced set approximation kernel = PolynomialKernel(X[self.bvectors], gamma=0.5, coef0 = 1.2, degree = 2) Kr = kernel.getKM(X) Krr = kernel.getKM(X[self.bvectors]) dual_rls = QueryRankRLS(X, Y, qids, kernel="PolynomialKernel", basis_vectors = X[self.bvectors], regparam = 200, gamma=0.5, coef0=1.2, degree = 2) A = dual_rls.predictor.A A2 = np.linalg.solve(np.dot(Kr.T, np.dot(L, Kr))+ 200 * Krr, np.dot(Kr.T, np.dot(L, Y))) assert_allclose(A, A2) dual_rls = QueryRankRLS(Kr, Y, qids, kernel="PrecomputedKernel", basis_vectors = Krr, regparam=200) A = dual_rls.predictor.W assert_allclose(A, A2)
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 100 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 100) basis_vectors = X_train[indices] kernel = GaussianKernel(basis_vectors, gamma=0.00003) K_train = kernel.getKM(X_train) K_rr = kernel.getKM(basis_vectors) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, basis_vectors=K_rr, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() kernel = GaussianKernel(X_train, gamma = 0.00003) K_train = kernel.getKM(X_train) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" %sqerror(Y_train, P_loo)) print("test error %f" %sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
def testPairwisePreferences(self): m, n = 100, 300 Xtrain = np.mat(np.random.rand(m, n)) Xtest = np.mat(np.random.rand(5, n)) for regparam in [0.00000001, 1, 100000000]: Y = np.mat(np.random.rand(m, 1)) pairs = [] for i in range(1000): a = random.randint(0, m - 1) b = random.randint(0, m - 1) if Y[a] > Y[b]: pairs.append((a, b)) else: pairs.append((b, a)) pairs = np.array(pairs) rpool = {} rpool['X'] = Xtrain rpool["pairs_start_inds"] = pairs[:, 0] rpool["pairs_end_inds"] = pairs[:, 1] rpool['regparam'] = regparam rpool["bias"] = 1.0 rpool["kernel"] = "GaussianKernel" ker = GaussianKernel(Xtrain, 1.0) trainkm = ker.getKM(Xtrain) rls = PPRankRLS(**rpool) model = rls.predictor P1 = model.predict(Xtest) Im = np.mat(np.identity(m)) vals = np.concatenate([ np.ones((pairs.shape[0]), dtype=np.float64), -np.ones( (pairs.shape[0]), dtype=np.float64) ]) row = np.concatenate( [np.arange(pairs.shape[0]), np.arange(pairs.shape[0])]) col = np.concatenate([pairs[:, 0], pairs[:, 1]]) coo = coo_matrix((vals, (row, col)), shape=(pairs.shape[0], Xtrain.shape[0])) L = (coo.T * coo).todense() P2 = np.dot( ker.getKM(Xtest), np.mat((L * trainkm + regparam * Im).I * coo.T * np.mat(np.ones((pairs.shape[0], 1))))) for i in range(P1.shape[0]): self.assertAlmostEqual(P1[i], P2[i, 0], places=3)
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting4_split() kernel1 = GaussianKernel(X1_train, gamma=0.01) kernel2 = GaussianKernel(X2_train, gamma=10**-9) K1_train = kernel1.getKM(X1_train) K1_test = kernel1.getKM(X1_test) K2_train = kernel2.getKM(X2_train) K2_test = kernel2.getKM(X2_test) learner = KronRLS(K1 = K1_train, K2 = K2_train, Y = Y_train, regparam=2**-5) predictor = learner.predictor P = predictor.predict(K1_test, K2_test) print("Number of predictions: %d" %P.shape) print("three first predictions: " +str(P[:3])) x1_ind = [0,1,2] x2_ind = [0,0,0] P2 = predictor.predict(K1_test, K2_test, x1_ind, x2_ind) print("three first predictions again: " +str(P2)) print("Number of coefficients %d" %predictor.A.shape)
def test_sparse(self): #mix of linear and kernel learning testing that learning works also with #sparse data matrices for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: Xsp = csc_matrix(X) #linear kernel without bias primal_rls = RLS(Xsp, Y, regparam=2.0, bias=0.) W = primal_rls.predictor.W d = X.shape[1] W2 = np.linalg.solve( np.dot(X.T, X) + 2.0 * np.eye(d), np.dot(X.T, Y)) assert_allclose(W, W2) #linear kernel with bias primal_rls = RLS(Xsp, Y, regparam=1.0, bias=2.) O = np.sqrt(2.) * np.ones((X.shape[0], 1)) X_new = np.hstack((X, O)) W = primal_rls.predictor.W W2 = np.linalg.solve( np.dot(X_new.T, X_new) + np.eye(d + 1), np.dot(X_new.T, Y)) b = primal_rls.predictor.b b2 = W2[-1] W2 = W2[:-1] assert_allclose(W, W2) assert_allclose(b, np.sqrt(2) * b2) #reduced set approximation primal_rls = RLS(Xsp, Y, basis_vectors=Xsp[self.bvectors], regparam=5.0, bias=2.) W = primal_rls.predictor.W b = primal_rls.predictor.b K = np.dot(X_new, X_new.T) Kr = K[:, self.bvectors] Krr = K[np.ix_(self.bvectors, self.bvectors)] A = np.linalg.solve( np.dot(Kr.T, Kr) + 5.0 * Krr, np.dot(Kr.T, Y)) W2 = np.dot(X_new[self.bvectors].T, A) b2 = W2[-1] W2 = W2[:-1] assert_allclose(W, W2) assert_allclose(b, np.sqrt(2) * b2) #Kernels dual_rls = RLS(Xsp, Y, kernel="GaussianKernel", regparam=5.0, gamma=0.01) kernel = GaussianKernel(X, gamma=0.01) K = kernel.getKM(X) m = K.shape[0] A = dual_rls.predictor.A A2 = np.linalg.solve(K + 5.0 * np.eye(m), Y) assert_allclose(A, A2)
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() kernel = GaussianKernel(X_train, gamma=0.00003) K_train = kernel.getKM(X_train) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 100 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 100) basis_vectors = X_train[indices] kernel = GaussianKernel(basis_vectors, gamma=0.00003) K_train = kernel.getKM(X_train) K_rr = kernel.getKM(basis_vectors) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, basis_vectors = K_rr, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" %sqerror(Y_train, P_loo)) print("test error %f" %sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
def testPairwisePreferences(self): m, n = 100, 300 Xtrain = np.mat(np.random.rand(m, n)) Xtest = np.mat(np.random.rand(5, n)) for regparam in [0.00000001, 1, 100000000]: Y = np.mat(np.random.rand(m, 1)) pairs = [] for i in range(1000): a = random.randint(0, m - 1) b = random.randint(0, m - 1) if Y[a] > Y[b]: pairs.append((a, b)) else: pairs.append((b, a)) pairs = np.array(pairs) rpool = {} rpool['X'] = Xtrain rpool["pairs_start_inds"] = pairs[:,0] rpool["pairs_end_inds"] = pairs[:,1] rpool['regparam'] = regparam rpool["bias"] = 1.0 rpool["kernel"] = "GaussianKernel" ker = GaussianKernel(Xtrain, 1.0) trainkm = ker.getKM(Xtrain) rls = PPRankRLS(**rpool) model = rls.predictor P1 = model.predict(Xtest) Im = np.mat(np.identity(m)) vals = np.concatenate([np.ones((pairs.shape[0]), dtype = np.float64), -np.ones((pairs.shape[0]), dtype = np.float64)]) row = np.concatenate([np.arange(pairs.shape[0]), np.arange(pairs.shape[0])]) col = np.concatenate([pairs[:, 0], pairs[:, 1]]) coo = coo_matrix((vals, (row, col)), shape = (pairs.shape[0], Xtrain.shape[0])) L = (coo.T * coo).todense() P2 = np.dot(ker.getKM(Xtest), np.mat((L * trainkm + regparam * Im).I * coo.T * np.mat(np.ones((pairs.shape[0], 1))))) for i in range(P1.shape[0]): self.assertAlmostEqual(P1[i], P2[i,0], places = 3)
def test_sparse(self): #mix of linear and kernel learning testing that learning works also with #sparse data matrices for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: Xsp = csc_matrix(X) #linear kernel without bias primal_rls = RLS(Xsp, Y, regparam=2.0, bias=0.) W = primal_rls.predictor.W d = X.shape[1] W2 = np.linalg.solve(np.dot(X.T, X) + 2.0 * np.eye(d), np.dot(X.T, Y)) assert_allclose(W, W2) #linear kernel with bias primal_rls = RLS(Xsp, Y, regparam=1.0, bias=2.) O = np.sqrt(2.) * np.ones((X.shape[0],1)) X_new = np.hstack((X, O)) W = primal_rls.predictor.W W2 = np.linalg.solve(np.dot(X_new.T, X_new) + np.eye(d+1), np.dot(X_new.T, Y)) b = primal_rls.predictor.b b2 = W2[-1] W2 = W2[:-1] assert_allclose(W, W2) assert_allclose(b, np.sqrt(2) * b2) #reduced set approximation primal_rls = RLS(Xsp, Y, basis_vectors = Xsp[self.bvectors], regparam=5.0, bias=2.) W = primal_rls.predictor.W b = primal_rls.predictor.b K = np.dot(X_new, X_new.T) Kr = K[:, self.bvectors] Krr = K[np.ix_(self.bvectors, self.bvectors)] A = np.linalg.solve(np.dot(Kr.T, Kr)+ 5.0 * Krr, np.dot(Kr.T, Y)) W2 = np.dot(X_new[self.bvectors].T, A) b2 = W2[-1] W2 = W2[:-1] assert_allclose(W, W2) assert_allclose(b, np.sqrt(2) * b2) #Kernels dual_rls = RLS(Xsp, Y, kernel= "GaussianKernel", regparam=5.0, gamma=0.01) kernel = GaussianKernel(X, gamma = 0.01) K = kernel.getKM(X) m = K.shape[0] A = dual_rls.predictor.A A2 = np.linalg.solve(K+5.0*np.eye(m), Y) assert_allclose(A, A2)
def testRLS(self): print("\n\n\n\nTesting the cross-validation routines of the RLS module.\n\n") m, n = 100, 300 Xtrain = random.rand(m, n) Y = mat(random.rand(m, 1)) basis_vectors = [0,3,7,8] #hoindices = [45, 50, 55] hoindices = [0, 1, 2] hocompl = list(set(range(m)) - set(hoindices)) bk = GaussianKernel(**{'X':Xtrain[basis_vectors], 'gamma':0.001}) rpool = {} rpool['X'] = Xtrain bk2 = GaussianKernel(**{'X':Xtrain, 'gamma':0.001}) K = np.mat(bk2.getKM(Xtrain)) Yho = Y[hocompl] rpool = {} rpool['Y'] = Y rpool['X'] = Xtrain rpool['basis_vectors'] = Xtrain[basis_vectors] Xhocompl = Xtrain[hocompl] testX = Xtrain[hoindices] rpool = {} rpool['Y'] = Yho rpool['X'] = Xhocompl rpool["kernel"] = "RsetKernel" rpool["base_kernel"] = bk rpool["basis_features"] = Xtrain[basis_vectors] #rk = RsetKernel(**{'base_kernel':bk, 'basis_features':Xtrain[basis_vectors], 'X':Xhocompl}) dualrls_naive = RLS(**rpool) rpool = {} rpool['Y'] = Yho rpool['X'] = Xhocompl rsaK = K[:, basis_vectors] * la.inv(K[ix_(basis_vectors, basis_vectors)]) * K[basis_vectors] rsaKho = rsaK[ix_(hocompl, hocompl)] rsa_testkm = rsaK[ix_(hocompl, hoindices)] loglambdas = range(-5, 5) for j in range(0, len(loglambdas)): regparam = 2. ** loglambdas[j] print("\nRegparam 2^%1d" % loglambdas[j]) print((rsa_testkm.T * la.inv(rsaKho + regparam * eye(rsaKho.shape[0])) * Yho).T, 'Dumb HO (dual)') dumbho = np.squeeze(np.array(rsa_testkm.T * la.inv(rsaKho + regparam * eye(rsaKho.shape[0])) * Yho)) dualrls_naive.solve(regparam) predho1 = np.squeeze(dualrls_naive.predictor.predict(testX)) print(predho1.T, 'Naive HO (dual)') #dualrls.solve(regparam) #predho2 = np.squeeze(dualrls.computeHO(hoindices)) #print predho2.T, 'Fast HO (dual)' for predho in [dumbho, predho1]:#, predho2]: self.assertEqual(dumbho.shape, predho.shape) for row in range(predho.shape[0]): #for col in range(predho.shape[1]): # self.assertAlmostEqual(dumbho[row,col],predho[row,col]) self.assertAlmostEqual(dumbho[row],predho[row])
def random_data(size, n_features): np.random.seed(77) X1 = np.random.randn(size, n_features) X2 = np.random.randn(size, n_features) Y = np.random.randn(size**2) return X1, X2, Y if __name__=="__main__": #trains Kronecker RLS for different sample sizes #comparing CPU time and verifying that the learned #dual coefficients are same for both methods regparam = 1.0 for size in [10, 20, 40, 60, 80, 100, 500, 1000, 2000, 4000, 6000]: X1, X2, y = random_data(size, 100) kernel1 = GaussianKernel(X1, gamma=0.01) K1 = kernel1.getKM(X1) kernel2 = GaussianKernel(X2, gamma=0.01) K2 = kernel2.getKM(X2) start = time.clock() rls = KronRLS(K1=K1, K2=K2, Y=y, regparam=regparam) dur = time.clock() - start print("RLScore pairs: %d, CPU time: %f" %(size**2, dur)) #forming full Kronecker product kernel matrix becomes fast #unfeasible if size <=100: K = np.kron(K2, K1) start = time.clock() ridge = KernelRidge(alpha=regparam, kernel="precomputed") ridge.fit(K, y) dur = time.clock() - start print("sklearn pairs: %d, CPU time: %f" %(size**2, dur))