Exemplo n.º 1
0
def soft_cascade_LR_1LNN(trX1, trY1, teX1, teY1, trX2, teX2, lambda_vector,
                         K1):

    (N, D1) = trX2.shape
    D = trX1.shape[1]
    C = 2
    t1 = ComputeComplexity([D1, C])
    t2 = ComputeComplexity([D, K1, C])

    n_it = 10000
    time1 = np.zeros((len(lambda_vector), 1))
    accuracy1 = np.zeros((len(lambda_vector), 1))
    F1 = np.zeros((len(lambda_vector), 1))
    nnz_first = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        X = T.fmatrix()
        F = T.fmatrix()
        Y = T.fvector()

        w_l = CF.init_weights((D1, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        # w_l.set_value(np.zeros((D1,)))
        # b_l.set_value(np.zeros((1,)))

        w_h1 = CF.init_weights((D, K1))
        b1 = CF.init_weights((K1, ))
        w_o = CF.init_weights((K1, ))
        bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                           broadcastable=(True, ))

        pygx1 = CF.model00(F, w_l, b_l)
        pygx2 = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)
        pygx_final = pygx1 * pygx2

        yhat1 = (pygx1 > 0.5)
        yhat = (pygx2 > 0.5)

        reg = T.mean(t1 + t2 * pygx1)
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        params = [w_l, b_l, w_h1, w_o, b1, bo]
        updates = lasagne.updates.rmsprop(cost,
                                          params,
                                          learning_rate=0.001 * 5,
                                          rho=0.9,
                                          epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)

        train = theano.function(inputs=[X, F, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[F],
                                    outputs=reg,
                                    allow_input_downcast=True)

        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        predict_second = theano.function(inputs=[X],
                                         outputs=yhat,
                                         allow_input_downcast=True)

        max_iter = 300
        for j in range(max_iter):
            c = train(trX1, trX2, trY1)
            r = reg_value(trX2)
            print(c - plambda * r, plambda * r)

        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX2)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where(teY1 == 1)[0]
        int_result = np.intersect1d(inds_test, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test.shape[0], inds_true.shape[0], int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %
              (r1, p1, a1))

        teX11 = teX1[inds_test, :]

        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1

        teY2 = np.zeros(teY1.shape, dtype=int)
        teY2.fill(0)
        teY2[inds_test] = teQ2

        inds_second = np.where(teY2 == 1)[0]
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, a2))
        F1[i] = 2 * r2 * p2 / (r2 + p2)
        accuracy1[i] = a2

    return time1, accuracy1, F1, nnz_first
Exemplo n.º 2
0
def cascade_three_stage(trX1, trY1, teX1, teY1, trX2, teX2, trX3, teX3, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a):
    
    (N,D) = trX3.shape
    lambda_vector = plambda
    
    n_it = 10000
    time1 = np.zeros((len(lambda_vector),1))
    accuracy1 = np.zeros((len(lambda_vector),1))
    F1 = np.zeros((len(lambda_vector),1))
    nnz_first = np.zeros((len(lambda_vector),1))
    nnz_second = np.zeros((len(lambda_vector),1))
    
    for i,plambda in enumerate(lambda_vector):
                    
        X = T.fmatrix()
        F = T.fmatrix()
        E = T.fmatrix()
        Y = T.fvector()
               
        w_l = CF.init_weights((D,))
        b_l  = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True,))        
        w_l.set_value(np.zeros((D,)))    
        b_l.set_value(np.zeros((1,)))               
               
        pygx1 = CF.model00(E, w_l, b_l)
        pygx2 = CF.model3(F, v_h1, v_o, c1, co, 0, 1)
        pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1)
        
        yhat1 = (pygx1 > 0.5)
        yhat2 = (pygx2 > 0.5)
        yhat = (pygx > 0.5)
        
        f = lambda x, a: 1/(1+T.exp(-a*(x-0.5)))
        
        pygx_final = (1-f(pygx1,a))*pygx1 + (1-f(pygx2,a))*f(pygx1,a)*pygx2 + f(pygx1, a)*f(pygx2, a)*pygx

        reg = T.mean(f(pygx1,a))  
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda*reg
         
        params = [w_l, b_l]
        updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.5, rho=0.9, epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)
        
        train = theano.function(inputs=[X, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True)
        reg_value = theano.function(inputs=[E], outputs=reg, allow_input_downcast=True)
        
        predict_first = theano.function(inputs=[E], outputs=yhat1, allow_input_downcast=True)
        predict_second = theano.function(inputs=[F], outputs=yhat2, allow_input_downcast=True)
        predict_third = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True)
        
        max_iter = 500
        for j in range(max_iter):
            # c = train(trX1, trY1)
            c = train(trX1, trX2, trX3, trY1) 
            # r = reg_value(trX1)
            r = reg_value(trX3) 
            print(c-plambda*r,plambda*r)
            # cost = train(trX1, trY1)
        
        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX3)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where( teY1 == 1 )[0]
        int_result = np.intersect1d(inds_test,inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %(inds_test.shape[0],inds_true.shape[0],int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %(r1,p1,a1))
        
        teX22 = teX2[inds_test,:]
                
        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX22)
        end1 = time.clock()
        time1[i] += end1 - start1
        inds_test2 = np.where(teQ2 == 1)[0]
        nnz_second[i] = inds_test2.shape[0]
            
        teY2 = np.zeros(teY1.shape,dtype = int)
        teY2.fill(0)
        teY2[inds_test] = teQ2
        
        inds_second = np.where( teY2 == 1 )[0]            
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %(inds_second.shape[0],inds_true.shape[0],int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %(r2,p2,a2))
            
        # teX1 = teX1[inds_test2,:]
        teX11 = teX1[inds_test[inds_test2],:]
            
        start1 = time.clock()
        for t in range(n_it):
            teQ3 = predict_third(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1            
            
        teY3 = np.zeros(teY1.shape,dtype = int)
        teY3.fill(0)
        teY3[inds_test[inds_test2]] = teQ3
        accuracy1[i] = np.mean(teY1 == teY3)    
        
        inds_third = np.where( teY3 == 1 )[0]
        int_result2 = np.intersect1d(inds_third,inds_true)
        print("third stage nzs:%d,true nzs:%d,intersection:%d" %(inds_third.shape[0],inds_true.shape[0],int_result2.shape[0]))
        r3 = int_result2.shape[0] / inds_true.shape[0]
        p3 = int_result2.shape[0] / inds_third.shape[0]
        print("third stage: recall = %f, precision = %f, accuracy = %f" %(r3, p3, accuracy1[i]))
        F1[i] = 2*r3*p3/(r3 + p3)
        
    return time1, accuracy1, F1, nnz_first, nnz_second
Exemplo n.º 3
0
def NN_pretraining_one(trX2, teP, teX1, teY1, K1):

    K1_vector = [K1]

    n_it = 10000
    time1 = np.zeros((len(K1_vector), 1))
    accuracy1 = np.zeros((len(K1_vector), 1))

    inds_true = np.where(teY1 == 1)[0]

    for i, K1 in enumerate(K1_vector):

        (N, D) = trX2.shape

        X = T.fmatrix()
        Y = T.fvector()

        w_h1 = CF.init_weights((D, K1))
        b1 = CF.init_weights((K1, ))
        w_o = CF.init_weights((K1, ))
        bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                           broadcastable=(True, ))

        pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)
        # yhat_second = T.argmax(pygx, axis=1)
        yhat_second = (pygx > 0.5)

        # cost_second = T.mean(T.nnet.categorical_crossentropy(pygx, Y))
        cost_second = T.mean(T.nnet.binary_crossentropy(pygx, Y))
        params_second = [w_h1, w_o, b1, bo]

        # updates_second = lasagne.updates.adagrad(cost_second, params_second, learning_rate=1/4, epsilon=1e-06)
        updates_second = lasagne.updates.rmsprop(cost_second,
                                                 params_second,
                                                 learning_rate=0.01,
                                                 rho=0.9,
                                                 epsilon=1e-06)

        train_second = theano.function(inputs=[X, Y],
                                       outputs=cost_second,
                                       updates=updates_second,
                                       allow_input_downcast=True)
        predict_second = theano.function(inputs=[X],
                                         outputs=yhat_second,
                                         allow_input_downcast=True)

        sgd_iters = 400
        for j in range(sgd_iters):
            # train_second(trX2, teP)
            c = train_second(trX2, teP)
            print(c)
        start1 = time.clock()
        for t in range(n_it):
            teY3 = predict_second(teX1)
        end1 = time.clock()
        time1[i] += (end1 - start1)

        accuracy1[i] = np.mean(teY1 == teY3)

        inds_second = np.where(teY3 == 1)[0]
        int_result2 = np.intersect1d(inds_second, inds_true)
        print("nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result2.shape[0]))
        r2 = int_result2.shape[0] / inds_true.shape[0]
        p2 = int_result2.shape[0] / inds_second.shape[0]
        print("recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, accuracy1))
        F1 = 2 * r2 * p2 / (r2 + p2)

    return w_h1, w_o, b1, bo, time1, accuracy1, F1
Exemplo n.º 4
0
def tree_cascade_v1(trX, trY, teX, teY, trX1, teX1, trX2, teX2, w_h1, w_h2,
                    w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a):

    lambda_vector = plambda

    n_it = 10000
    time1 = np.zeros((len(lambda_vector), 1))
    accuracy1 = np.zeros((len(lambda_vector), 1))
    F1 = np.zeros((len(lambda_vector), 1))
    nnz = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        (N, D1) = trX1.shape
        (N, D2) = trX2.shape

        X = T.fmatrix()
        Z = T.fmatrix()
        F = T.fmatrix()
        E = T.fmatrix()
        Y = T.fvector()

        w_l = CF.init_weights((D1, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        w_l.set_value(np.zeros((D1, )))
        b_l.set_value(np.zeros((1, )))

        v_l = CF.init_weights((D2, ))
        c_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        v_l.set_value(np.zeros((D2, )))
        c_l.set_value(np.zeros((1, )))

        pygx1 = CF.model00(F, w_l, b_l)
        pygx2 = CF.model00(E, v_l, c_l)
        pygx3 = CF.model3(Z, v_h1, v_o, c1, co, 0, 1)
        pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1)

        yhat1 = (pygx1 > 0.5)
        yhat2 = (pygx2 > 0.5)
        yhat3 = (pygx3 > 0.5)
        yhat = (pygx > 0.5)

        f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5)))

        pygx_final = ((1 - f(pygx1, a) * f(pygx2, a)) * pygx1 * pygx2 +
                      f(pygx1, a) * f(pygx2, a) * (1 - f(pygx3, a)) * pygx3 +
                      f(pygx1, a) * f(pygx2, a) * f(pygx3, a) * pygx)

        kappa1 = 1 / 20
        kappa2 = 1 / 10
        kappa3 = 1

        # reg = T.mean( f(pygx1,a)*f(pygx2,a) )
        reg = T.mean(kappa1 + kappa2 * f(pygx1, a) * f(pygx2, a) +
                     kappa3 * f(pygx1, a) * f(pygx2, a) * f(pygx3, a))
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        # params = [w_l, b_l, v_l, c_l]
        params = [
            w_l, b_l, v_l, c_l, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co
        ]
        # params = [w_l, b_l, v_l, c_l, v_h1, v_o, c1, co]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo]

        # updates = lasagne.updates.adagrad(cost, params, learning_rate=0.1, epsilon=1e-06)
        updates = lasagne.updates.rmsprop(cost,
                                          params,
                                          learning_rate=0.1 / 7,
                                          rho=0.9,
                                          epsilon=1e-06)

        train = theano.function(inputs=[X, Z, F, E, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[Z, F, E],
                                    outputs=reg,
                                    allow_input_downcast=True)
        # reg_value = theano.function(inputs=[F, E], outputs=reg, allow_input_downcast=True)

        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        predict_second = theano.function(inputs=[E],
                                         outputs=yhat2,
                                         allow_input_downcast=True)
        predict_third = theano.function(inputs=[Z],
                                        outputs=yhat3,
                                        allow_input_downcast=True)
        predict_final = theano.function(inputs=[X],
                                        outputs=yhat,
                                        allow_input_downcast=True)

        max_iter = 2000
        for j in range(max_iter):
            c = train(trX, trX, trX1, trX2, trY)
            r = reg_value(trX, trX1, trX2)
            # r = reg_value(trX1, trX2)
            print(c - plambda * r, plambda * r)

        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX1)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test1 = np.where(teQ1 == 1)[0]
        nnz[i] = inds_test1.shape[0]

        inds_true = np.where(teY == 1)[0]
        int_result1 = np.intersect1d(inds_test1, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test1.shape[0], inds_true.shape[0], int_result1.shape[0]))
        r1 = int_result1.shape[0] / inds_true.shape[0]
        p1 = int_result1.shape[0] / inds_test1.shape[0]
        a1 = np.mean(teY == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %
              (r1, p1, a1))

        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX2)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test2 = np.where(teQ2 == 1)[0]
        nnz[i] = inds_test2.shape[0]

        int_result2 = np.intersect1d(inds_test2, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test2.shape[0], inds_true.shape[0], int_result2.shape[0]))
        r2 = int_result2.shape[0] / inds_true.shape[0]
        p2 = int_result2.shape[0] / inds_test2.shape[0]
        a2 = np.mean(teY == teQ2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, a2))

        inds_test = np.intersect1d(inds_test1, inds_test2)
        tps = np.intersect1d(inds_test, inds_true)
        print("intersects of first-second stages = %d, true positives = %d" %
              (inds_test.shape[0], tps.shape[0]))

        teXX = teX[inds_test, :]

        start1 = time.clock()
        for t in range(n_it):
            teQ3 = predict_third(teXX)
        end1 = time.clock()
        time1[i] += end1 - start1

        inds_test3 = np.where(teQ3 == 1)[0]

        teQ33 = np.zeros(teY.shape, dtype=int)
        teQ33.fill(0)
        teQ33[inds_test] = teQ3

        int_result3 = np.intersect1d(inds_test[inds_test3], inds_true)
        print("third stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test3.shape[0], inds_true.shape[0], int_result3.shape[0]))
        r3 = int_result3.shape[0] / inds_true.shape[0]
        p3 = int_result3.shape[0] / inds_test3.shape[0]
        a3 = np.mean(teY == teQ33)
        print("third stage: recall = %f, precision = %f, accuracy = %f" %
              (r3, p3, a3))

        teX = teX[inds_test[inds_test3], :]

        start1 = time.clock()
        for t in range(n_it):
            teP = predict_final(teX)
        end1 = time.clock()
        time1[i] += end1 - start1

        teY3 = np.zeros(teY.shape, dtype=int)
        teY3.fill(0)
        teY3[inds_test[inds_test3]] = teP
        accuracy1[i] = np.mean(teY == teY3)

        inds_second = np.where(teY3 == 1)[0]
        int_result = np.intersect1d(inds_second, inds_true)
        print("final stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result.shape[0]))
        r = int_result.shape[0] / inds_true.shape[0]
        p = int_result.shape[0] / inds_second.shape[0]
        print("final stage: recall = %f, precision = %f, accuracy = %f" %
              (r, p, accuracy1[i]))
        F1[i] = 2 * r * p / (r + p)

    return time1, accuracy1, F1, nnz
Exemplo n.º 5
0
def cascade_rw(trX, trY, teX, teY, trX1, teX1, trX2, teX2, w_h1, w_o, b1, bo,
               plambda, a):

    lambda_vector = plambda

    n_it = 10000
    time1 = np.zeros((len(lambda_vector), 1))
    accuracy1 = np.zeros((len(lambda_vector), 1))
    F1 = np.zeros((len(lambda_vector), 1))
    nnz = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        (N, D1) = trX1.shape
        (N, D2) = trX2.shape

        X = T.fmatrix()
        F = T.fmatrix()
        E = T.fmatrix()
        Y = T.fvector()

        w_l = CF.init_weights((D1, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        w_l.set_value(np.zeros((D1, )))
        b_l.set_value(np.zeros((1, )))

        v_l = CF.init_weights((D2, ))
        c_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        v_l.set_value(np.zeros((D2, )))
        c_l.set_value(np.zeros((1, )))

        pygx1 = CF.model00(F, w_l, b_l)
        pygx2 = CF.model00(E, v_l, c_l)
        pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)

        yhat1 = (pygx1 > 0.5)
        yhat2 = (pygx2 > 0.5)
        yhat = (pygx > 0.5)

        f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5)))

        pygx_final = (1 - f(pygx1, a)) * pygx1 + (1 - f(pygx2, a)) * f(
            pygx1, a) * pygx2 + f(pygx1, a) * f(pygx2, a) * pygx

        kappa2 = 1 / 30
        kappa3 = 1

        # reg = T.mean(f(pygx1,a))
        reg = T.mean(kappa2 * f(pygx1, a) + kappa3 * f(pygx1, a) * f(pygx2, a))
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        params = [w_l, b_l, v_l, c_l]
        # params = [w_l, b_l, v_l, c_l, w_h1, w_o, b1, bo]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo]

        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1/2, epsilon=1e-06)
        updates = lasagne.updates.rmsprop(cost,
                                          params,
                                          learning_rate=0.4,
                                          rho=0.9,
                                          epsilon=1e-06)

        train = theano.function(inputs=[X, F, E, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[F, E],
                                    outputs=reg,
                                    allow_input_downcast=True)
        # reg_value = theano.function(inputs=[X, F, E], outputs=reg, allow_input_downcast=True)
        # reg_value = theano.function(inputs=[F], outputs=reg, allow_input_downcast=True)

        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        predict_second = theano.function(inputs=[E],
                                         outputs=yhat2,
                                         allow_input_downcast=True)
        predict_final = theano.function(inputs=[X],
                                        outputs=yhat,
                                        allow_input_downcast=True)
        predict_prob = theano.function(inputs=[X, F, E],
                                       outputs=pygx_final,
                                       allow_input_downcast=True)

        max_iter = 1000
        for j in range(max_iter):
            c = train(trX, trX1, trX2, trY)
            r = reg_value(trX1, trX2)
            # r = reg_value(trX, trX1, trX2)
            # r = reg_value(trX1)
            print(c - plambda * r, plambda * r)

        probs = predict_prob(teX, teX1, teX2)
        AUC = roc_auc_score(teY, probs)

        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX1)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test1 = np.where(teQ1 == 1)[0]
        nnz[i] = inds_test1.shape[0]

        inds_true = np.where(teY == 1)[0]
        int_result1 = np.intersect1d(inds_test1, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test1.shape[0], inds_true.shape[0], int_result1.shape[0]))

        teX2 = teX2[inds_test1, :]

        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX2)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test2 = np.where(teQ2 == 1)[0]
        nnz[i] = inds_test2.shape[0]

        int_result2 = np.intersect1d(inds_test1[inds_test2], inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test2.shape[0], inds_true.shape[0], int_result2.shape[0]))

        teX = teX[inds_test1[inds_test2], :]

        start1 = time.clock()
        for t in range(n_it):
            teP = predict_final(teX)
        end1 = time.clock()
        time1[i] += end1 - start1

        teY3 = np.zeros(teY.shape, dtype=int)
        teY3.fill(0)
        teY3[inds_test1[inds_test2]] = teP
        accuracy1[i] = np.mean(teY == teY3)

        inds_second = np.where(teY3 == 1)[0]
        int_result = np.intersect1d(inds_second, inds_true)
        print("final stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result.shape[0]))
        r = int_result.shape[0] / inds_true.shape[0]
        p = int_result.shape[0] / inds_second.shape[0]
        print("final stage: recall = %f, precision = %f, accuracy = %f" %
              (r, p, accuracy1[i]))
        F1[i] = 2 * r * p / (r + p)

    return time1, accuracy1, F1, nnz, AUC
Exemplo n.º 6
0
def cascade_two_stage(trX1, trY1, teX1, teY1, trX2, teX2, w_h1, w_o, b1, bo,
                      plambda, a):

    lambda_vector = plambda

    # number of iterations for prediction:
    n_it = 10000
    # prediction time:
    time1 = np.zeros((len(lambda_vector), 1))
    # accuracy:
    accuracy1 = np.zeros((len(lambda_vector), 1))
    # F1 score:
    F1 = np.zeros((len(lambda_vector), 1))
    # number of non-zeros sent to the second stage:
    nnz = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        # N: number of training data points, D: number of dimensions/features in first stage data
        (N, D) = trX2.shape

        # second stage training data:
        X = T.fmatrix()
        # first stage training data:
        F = T.fmatrix()
        # lables for training data:
        Y = T.fvector()

        # random initialization of LR paramters:
        w_l = CF.init_weights((D, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        # zero initialization of LR parameters:
        w_l.set_value(np.zeros((D, )))
        b_l.set_value(np.zeros((1, )))

        # define LR model:
        pygx1 = CF.model00(F, w_l, b_l)
        # define 2LNN model:
        # pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1)
        # define 1LNN model:
        pygx = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)

        # hard threshold cascade: thresholding of output probabilities
        yhat1 = (pygx1 > 0.5)  # output of first stage
        yhat = (pygx > 0.5)  # output of second stage

        # definition of the gating function:
        f = lambda x, a: 1 / (1 + T.exp(-a * (x - 0.5)))

        # output probability of the cascade:
        pygx_final = (1 - f(pygx1, a)) * pygx1 + f(pygx1, a) * pygx

        # regularization term:
        reg = T.mean(f(pygx1, a))
        # objective function:
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        # parameters of the optimization problem:
        params = [w_l, b_l]
        # params = [w_h1, w_o, w_l, b1, bo, b_l]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo, b_l]
        # params = [w_h1, w_h2, w_o, w_l, b1, b2, bo]

        # updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.0004, rho=0.9, epsilon=1e-06)
        updates = lasagne.updates.adagrad(cost,
                                          params,
                                          learning_rate=1,
                                          epsilon=1e-06)

        # theano function for training:
        train = theano.function(inputs=[X, F, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[F],
                                    outputs=reg,
                                    allow_input_downcast=True)

        # theano function for prediction in first stage:
        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        # theano function for prediction in second stage:
        predict_second = theano.function(inputs=[X],
                                         outputs=yhat,
                                         allow_input_downcast=True)

        # number of steps in SGD:
        max_iter = 5000

        # iterations for SGD/training:
        for j in range(max_iter):
            c = train(trX1, trX2, trY1)
            r = reg_value(trX2)
            print(c, c - plambda * r, plambda * r)
            # cost = train(trX1, trY1)

        # prediction for first stage:
        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX2)
            # teQ1 = teX1.dot(w_l.get_value()) + b_check >= 0
            # teQ1 = np.dot(teX2,w_l.get_value()) + b_l.get_value() >= 0
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz[i] = inds_test.shape[0]

        # indices for true positives
        inds_true = np.where(teY1 == 1)[0]
        # intersection of true positives and first-stage prediction
        int_result = np.intersect1d(inds_test, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test.shape[0], inds_true.shape[0], int_result.shape[0]))
        # recall from first stage:
        r1 = int_result.shape[0] / inds_true.shape[0]
        # precision from first stage:
        p1 = int_result.shape[0] / inds_test.shape[0]
        # accuracy from first stage:
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %
              (r1, p1, a1))

        # only send positive cases from first stage to the second stage:
        teX1 = teX1[inds_test, :]

        # prediction for the second stage
        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX1)
        end1 = time.clock()
        time1[i] += end1 - start1

        # output labels from the cascade
        teY3 = np.zeros(teY1.shape, dtype=int)
        teY3.fill(0)
        teY3[inds_test] = teQ2
        # accuracy of the cascade:
        accuracy1[i] = np.mean(teY1 == teY3)

        inds_second = np.where(teY3 == 1)[0]
        int_result2 = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result2.shape[0]))
        # recall for the cascade:
        r2 = int_result2.shape[0] / inds_true.shape[0]
        # precision for the cascade:
        p2 = int_result2.shape[0] / inds_second.shape[0]
        print("second stage: recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, accuracy1[i]))
        # F1 score for the cascade:
        F1[i] = 2 * r2 * p2 / (r2 + p2)

    return time1, accuracy1, F1, nnz