Пример #1
0
 def test_linear(self):
     #Test that learning with linear kernel works correctly both
     #with low and high-dimensional data
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             #Basic case
             m = X.shape[0]
             qids, L = generate_qids(m)
             primal_rls = QueryRankRLS(X, Y, qids, regparam=1.0, bias=0.)
             W = primal_rls.predictor.W
             d = X.shape[1]
             W2 = np.linalg.solve(np.dot(X.T, np.dot(L, X)) + np.eye(d), np.dot(X.T, np.dot(L, Y)))
             assert_allclose(W, W2)
             #For RankRLS, bias should have no effect
             primal_rls = QueryRankRLS(X, Y, qids, regparam=1.0, bias=5.)
             W2 = primal_rls.predictor.W
             assert_allclose(W, W2)
             #Fast regularization
             primal_rls.solve(10)
             W = primal_rls.predictor.W
             W2 = np.linalg.solve(np.dot(X.T, np.dot(L, X)) + 10 * np.eye(d), np.dot(X.T, np.dot(L, Y)))
             assert_allclose(W, W2)
             #reduced set approximation
             primal_rls = QueryRankRLS(X, Y, qids, basis_vectors = X[self.bvectors], regparam=5.0)
             W = primal_rls.predictor.W
             K = np.dot(X, X.T)
             Kr = K[:, self.bvectors]
             Krr = K[np.ix_(self.bvectors, self.bvectors)]
             A = np.linalg.solve(np.dot(Kr.T, np.dot(L, Kr))+ 5.0 * Krr, np.dot(Kr.T, np.dot(L, Y)))
             W_reduced = np.dot(X[self.bvectors].T, A)
             #assert_allclose(W, W_reduced)
             #Pre-computed linear kernel, reduced set approximation
             dual_rls = QueryRankRLS(Kr, Y, qids, kernel="PrecomputedKernel", basis_vectors = Krr, regparam=5.0)
             W = np.dot(X[self.bvectors].T, dual_rls.predictor.W)
             assert_allclose(W, W_reduced)
Пример #2
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train =  np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" %perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" %test_perf)
Пример #3
0
 def test_kernel(self):
     #tests that learning with kernels works
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             #Basic case
             dual_rls = QueryRankRLS(X, Y, qids, kernel= "GaussianKernel", regparam=5.0, gamma=0.01)
             kernel = GaussianKernel(X, gamma = 0.01)
             K = kernel.getKM(X)
             m = K.shape[0]
             A = dual_rls.predictor.A
             A2 = np.linalg.solve(np.dot(L, K) +5.0*np.eye(m), np.dot(L, Y) )
             assert_allclose(A, A2)
             #Fast regularization
             dual_rls.solve(1000)
             A = dual_rls.predictor.A
             A2 = np.linalg.solve(np.dot(L, K) + 1000 * np.eye(m), np.dot(L, Y))
             assert_allclose(A, A2)
             #Precomputed kernel
             dual_rls = QueryRankRLS(K, Y, qids, kernel="PrecomputedKernel", regparam = 1000)
             assert_allclose(dual_rls.predictor.W, A2)
             #Reduced set approximation
             kernel = PolynomialKernel(X[self.bvectors], gamma=0.5, coef0 = 1.2, degree = 2)              
             Kr = kernel.getKM(X)
             Krr = kernel.getKM(X[self.bvectors])
             dual_rls = QueryRankRLS(X, Y, qids, kernel="PolynomialKernel", basis_vectors = X[self.bvectors], regparam = 200, gamma=0.5, coef0=1.2, degree = 2)
             A = dual_rls.predictor.A
             A2 = np.linalg.solve(np.dot(Kr.T, np.dot(L, Kr))+ 200 * Krr, np.dot(Kr.T, np.dot(L, Y)))
             assert_allclose(A, A2)
             dual_rls = QueryRankRLS(Kr, Y, qids, kernel="PrecomputedKernel", basis_vectors = Krr, regparam=200)
             A = dual_rls.predictor.W
             assert_allclose(A, A2)
Пример #4
0
    def testLabelRankRLS(self):
        
        print("Testing the cross-validation routines of the QueryRankRLS module.\n")
        
        np.random.seed(100)
        floattype = np.float64
        
        m, n = 100, 400 #data, features
        Xtrain = np.mat(np.random.rand(m, n))
        K = Xtrain * Xtrain.T
        ylen = 1
        Y = np.mat(np.zeros((m, ylen), dtype=floattype))
        Y[:, 0] = np.sum(Xtrain, 1)
        
        
        labelcount = 5
        
        hoindices = range(labelcount)
        hocompl = list(set(range(m)) - set(hoindices))
        
        qidlist = [0 for i in range(100)]
        for h in range(5, 12):
            qidlist[h] = 1
        for h in range(12, 32):
            qidlist[h] = 2
        for h in range(32, 34):
            qidlist[h] = 3
        for h in range(34, 85):
            qidlist[h] = 4
        for h in range(85, 100):
            qidlist[h] = 5
        qidlist_cv = qidlist[5: len(qidlist)]
        
        objcount = max(qidlist) + 1
        P = np.mat(np.zeros((m, objcount), dtype=np.float64))
        for i in range(m):
            qid = qidlist[i]
            P[i, qid] = 1.
        labelcounts = np.sum(P, axis=0)
        P = np.divide(P, np.sqrt(labelcounts))
        D = np.mat(np.ones((1, m), dtype=np.float64))
        L = np.multiply(np.eye(m), D) - P * P.T
        
        Kcv = K[np.ix_(hocompl, hocompl)]
        Lcv = L[np.ix_(hocompl, hocompl)]
        
        Xcv = Xtrain[hocompl]
        Xtest = Xtrain[hoindices]
        Yho = Y[hocompl]
        
        rpool = {}
        rpool["X"] = Xtrain
        rpool["Y"] = Y
        rpool["qids"] = qidlist
        primalrls = QueryRankRLS(**rpool)        
        
        rpool = {}
        rpool["X"] = K
        rpool['kernel'] = 'PrecomputedKernel'
        rpool["Y"] = Y
        rpool["qids"] = qidlist        
        dualrls = QueryRankRLS(**rpool)
        
        rpool = {}
        rpool['X'] = Xcv
        rpool['Y'] = Yho
        rpool['qids'] = qidlist_cv
        primalrls_naive = QueryRankRLS(**rpool)

        rpool = {}
        rpool['X'] = Kcv
        rpool['kernel'] = 'PrecomputedKernel'        
        rpool['Y'] = Yho
        #rpool['X'] = Xcv
        rpool['qids'] = qidlist_cv
        dualrls_naive = QueryRankRLS(**rpool)
        
        testkm = K[np.ix_(hocompl, hoindices)]
        
        loglambdas = range(-5, 5)
        for j in range(0, len(loglambdas)):
            regparam = 2. ** loglambdas[j]
            print
            print("Regparam 2^%1d" % loglambdas[j])
            
            
            print(str(np.squeeze(np.array((testkm.T * la.inv(Lcv * Kcv + regparam * np.eye(Lcv.shape[0])) * Lcv * Yho).T))) + ' Dumb HO')
            
            predhos = []
            primalrls_naive.solve(regparam)
            predho = primalrls_naive.predictor.predict(Xtest)
            print(str(predho.T) + ' Naive HO (primal)')
            predhos.append(predho)
            
            dualrls_naive.solve(regparam)
            predho = dualrls_naive.predictor.predict(testkm.T)
            print(str(predho.T) + ' Naive HO (dual)')
            predhos.append(predho)
            
            primalrls.solve(regparam)
            predho = np.squeeze(primalrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (primal)')
            predhos.append(predho)
            
            dualrls.solve(regparam)
            predho = np.squeeze(dualrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (dual)')
            predhos.append(predho)
            
            predho0 = predhos.pop(0)
            for predho in predhos:
                self.assertEqual(predho0.shape, predho.shape)
                for row in range(predho.shape[0]):
                    #for col in range(predho.shape[1]):
                    #    self.assertAlmostEqual(predho0[row,col],predho[row,col], places=5)
                        self.assertAlmostEqual(predho0[row],predho[row], places=5)
Пример #5
0
 def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             qids = np.array(qids)
             hoindices = np.where(qids == 1)[0]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = QueryRankRLS(X, Y, qids)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = QueryRankRLS(X, Y, qids, bias = 3.0)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], bias = 3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-5, 5):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = QueryRankRLS(X, Y, qids, kernel = "GaussianKernel", gamma = 0.01)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], kernel = "GaussianKernel", gamma = 0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1,1,2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [0,4,8]
             self.assertRaises(IndexError, rls1.holdout, I)