Пример #1
0
    def test_spherical_op_take_gradient(self):
        print >> sys.stderr
        print >> sys.stderr, "================================================="
        print >> sys.stderr, "====== TEST spherical_op_take_gradient ==========="
        print >> sys.stderr, "================================================="

        V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params(
        )
        HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data(
        )

        V = shared(V_mat)
        UT = shared(UT_mat)
        Uinv = shared(Uinv_mat)
        QT = shared(QT_mat)
        omega = shared(omega_vec)
        w_bar = shared(w_bar_vec)

        # Fiddling to debug:
        # grad_AT_mat.fill(0.)
        # grad_q_vec.fill(0.)
        # grad_s_vec.fill(0.)
        # grad_AT_mat[0,0] = 10000.

        grad_AT = shared(grad_AT_mat)
        grad_q = shared(grad_q_vec)
        grad_s = shared(grad_s_vec)

        HT = T.matrix()
        KindexesT = T.imatrix()
        eta_var = T.scalar()
        # eta = shared(self.eta)

        # compute AT,q,s with numpy (unfactorized algo)
        W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec)
        # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat)
        numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(
            W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec,
            self.eta)

        # compute grad_HT with theano (factorized algo)
        AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(
            eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar,
                                                 HT, KindexesT)

        def z_loss(AT, q, s, eps=1e-12):
            D = self.D
            mu = s / D
            sigma = T.sqrt((q / D) - mu**2)
            c = T.nnet.softplus((mu - AT[:, 0]) / (sigma + eps))
            return c.sum()

        def simple_loss(AT, q, s, eps=1e-5):
            D = self.D
            mu = s / D
            sigma2 = (q / D) - mu**2 + eps
            c = sigma2 + 0 * AT[:, 0]
            return c.sum()

        def simplest_loss(AT, q, s, eps=1e-5):
            L = -AT[:, 0] + 0.7 * s + 0.3 * q
            return L.sum()

        L = z_loss(AT, q, s)
        L = simplest_loss(AT, q, s)

        # grad_HT = theano.grad(L, [sub] + params)
        grad_HT = T.grad(L, wrt=HT)

        # linked_grad_s = grad_s + 1e-9*s
        # grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)(
        #     V, UT, Uinv, QT, omega, w_bar,
        #     HT, KindexesT,
        #     work_d, work_m,
        #     grad_AT, grad_q, linked_grad_s,
        #     eta )

        # pdb.set_trace()
        g = function([HT, KindexesT, eta_var], [AT, q, s, L, grad_HT])
        # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta_var],[grad_HT])

        theano_AT, theano_q, theano_s, theano_L, theano_grad_HT = g(
            HT_mat, KindexesT_mat, self.eta)

        print >> sys.stderr, "******** KindexesT ********"
        print >> sys.stderr, KindexesT_mat

        print >> sys.stderr, "******** theano AT, q, s, L  ********"
        print >> sys.stderr, theano_AT, theano_q, theano_s, theano_L

        #print >> sys.stderr, "******** numpy grad_HT ********"
        #print >> sys.stderr, numpy_grad_HT
        print >> sys.stderr, "******** theano grad_HT ********"
        print >> sys.stderr, theano_grad_HT

        print >> sys.stderr, "******** old W ********"
        print >> sys.stderr, W_mat
        #print >> sys.stderr, "******** numpy new W ********"
        #print >> sys.stderr, numpy_new_W
        print >> sys.stderr, "******** theano new W ********"
        new_W_mat = spherical_op.numpy_recompute_W(V.get_value(),
                                                   UT.get_value(),
                                                   omega.get_value())
        print >> sys.stderr, new_W_mat
        print >> sys.stderr, "******** consistency of op's new w_bar and W ********"
        print >> sys.stderr, "w_bar before update: ", w_bar_vec
        print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0)
        print >> sys.stderr, "w_bar after update:", w_bar.get_value()
        print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0)
        print >> sys.stderr, "******** consistency of op's new QT and W ********"
        print >> sys.stderr, "QT before update:"
        print >> sys.stderr, QT_mat
        print >> sys.stderr, "QT after update: (consider only upper triangular part)"
        print >> sys.stderr, QT.get_value()
        print >> sys.stderr, "numpy new W.T W:"
        print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
    def test_spherical_op_take_gradient(self):
        print >> sys.stderr        
        print >> sys.stderr, "================================================="
        print >> sys.stderr, "====== TEST spherical_op_take_gradient ==========="
        print >> sys.stderr, "================================================="

        V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params()
        HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data()            

        V = shared(V_mat)
        UT = shared(UT_mat)
        Uinv = shared(Uinv_mat)
        QT = shared(QT_mat)
        omega = shared(omega_vec)
        w_bar = shared(w_bar_vec)

        # Fiddling to debug:
        # grad_AT_mat.fill(0.)
        # grad_q_vec.fill(0.)
        # grad_s_vec.fill(0.)
        # grad_AT_mat[0,0] = 10000.

        grad_AT = shared(grad_AT_mat)
        grad_q = shared(grad_q_vec)
        grad_s = shared(grad_s_vec)

        HT = T.matrix()
        KindexesT = T.imatrix()
        eta_var = T.scalar()
        # eta = shared(self.eta)


        # compute AT,q,s with numpy (unfactorized algo)
        W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec)
        # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat)
        numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(W_mat, HT_mat, KindexesT_mat,
                                                                     grad_AT_mat, grad_q_vec, grad_s_vec,
                                                                     self.eta)

        # compute grad_HT with theano (factorized algo)
        AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar,
                                                                         HT, KindexesT)

        def z_loss(AT, q, s, eps=1e-12):
            D = self.D
            mu = s / D
            sigma = T.sqrt((q / D) - mu**2)
            c = T.nnet.softplus((mu - AT[:, 0]) / (sigma + eps))
            return c.sum()

        def simple_loss(AT, q, s, eps=1e-5):
            D = self.D
            mu = s / D
            sigma2 = (q / D) - mu**2 + eps
            c = sigma2 + 0*AT[:,0]
            return c.sum()

        def simplest_loss(AT, q, s, eps=1e-5):
            L = -AT[:,0]+0.7*s+0.3*q
            return L.sum()


        L = z_loss(AT,q,s)
        L = simplest_loss(AT,q,s)

        # grad_HT = theano.grad(L, [sub] + params)
        grad_HT = T.grad(L, wrt=HT)

        # linked_grad_s = grad_s + 1e-9*s
        # grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)(
        #     V, UT, Uinv, QT, omega, w_bar,
        #     HT, KindexesT,
        #     work_d, work_m,
        #     grad_AT, grad_q, linked_grad_s,
        #     eta )

        # pdb.set_trace()
        g = function([HT, KindexesT, eta_var],[AT,q,s,L,grad_HT])
        # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta_var],[grad_HT])
        
        theano_AT, theano_q, theano_s, theano_L, theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta)

        
        print >> sys.stderr, "******** KindexesT ********"
        print >> sys.stderr, KindexesT_mat

        print >> sys.stderr, "******** theano AT, q, s, L  ********"
        print >> sys.stderr, theano_AT, theano_q, theano_s, theano_L

        #print >> sys.stderr, "******** numpy grad_HT ********"
        #print >> sys.stderr, numpy_grad_HT
        print >> sys.stderr, "******** theano grad_HT ********"
        print >> sys.stderr, theano_grad_HT

        
        print >> sys.stderr, "******** old W ********"
        print >> sys.stderr, W_mat
        #print >> sys.stderr, "******** numpy new W ********"
        #print >> sys.stderr, numpy_new_W
        print >> sys.stderr, "******** theano new W ********"
        new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value())
        print >> sys.stderr, new_W_mat
        print >> sys.stderr, "******** consistency of op's new w_bar and W ********"
        print >> sys.stderr, "w_bar before update: ", w_bar_vec
        print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0)        
        print >> sys.stderr, "w_bar after update:", w_bar.get_value()
        print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0)        
        print >> sys.stderr, "******** consistency of op's new QT and W ********"
        print >> sys.stderr, "QT before update:"
        print >> sys.stderr, QT_mat
        print >> sys.stderr, "QT after update: (consider only upper triangular part)"
        print >> sys.stderr, QT.get_value()
        print >> sys.stderr, "numpy new W.T W:"
        print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
Пример #3
0
    def test_spherical_op_grad(self):
        print >> sys.stderr
        print >> sys.stderr, "================================================="
        print >> sys.stderr, "============  TEST spherical_op_grad  ==========="
        print >> sys.stderr, "================================================="

        V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params(
        )
        HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data(
        )

        V = shared(V_mat)
        UT = shared(UT_mat)
        Uinv = shared(Uinv_mat)
        QT = shared(QT_mat)
        omega = shared(omega_vec)
        w_bar = shared(w_bar_vec)

        # Fiddling to debug:
        # grad_AT_mat.fill(0.)
        # grad_q_vec.fill(0.)
        # grad_s_vec.fill(0.)
        # grad_AT_mat[0,0] = 10000.

        grad_AT = shared(grad_AT_mat)
        grad_q = shared(grad_q_vec)
        grad_s = shared(grad_s_vec)

        HT = T.matrix()
        KindexesT = T.imatrix()
        eta_var = T.scalar()

        # compute AT,q,s with numpy (unfactorized algo)
        W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec)
        # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat)
        numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(
            W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec,
            self.eta)

        # compute grad_HT with theano (factorized algo)
        AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(
            eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar,
                                                 HT, KindexesT)

        linked_grad_s = grad_s + 1e-9 * s
        grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(
            invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT,
                                        KindexesT, work_d, work_m, grad_AT,
                                        grad_q, linked_grad_s, eta_var)

        # pdb.set_trace()
        g = function([HT, KindexesT, eta_var], [grad_HT])
        # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta],[grad_HT])

        theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta)

        # # compute AT,q,s with theano (factorized algo)
        # f = function([HT, KindexesT],
        #              spherical_op.FactoredSphericalOp(eta)(V, UT, Uinv, QT, omega, w_bar,
        #                                                    HT, KindexesT))
        # theano_AT, theano_q, theano_s = f(HT_mat, KindexesT_mat)

        # g = function([HT, KindexesT, grad_AT, grad_q, grad_s, eta ],
        #              spherical_op.FactoredSphericalOpBpropUpdate()(V, UT, Uinv, QT, omega, w_bar,
        #                                                            HT, KindexesT,
        #                                                            grad_AT, grad_q, grad_s,
        #                                                            eta ))
        # theano_grad_HT = g(HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta)

        print >> sys.stderr, "******** KindexesT ********"
        print >> sys.stderr, KindexesT_mat

        print >> sys.stderr, "******** numpy grad_HT ********"
        print >> sys.stderr, numpy_grad_HT
        print >> sys.stderr, "******** theano grad_HT ********"
        print >> sys.stderr, theano_grad_HT

        print >> sys.stderr, "******** old W ********"
        print >> sys.stderr, W_mat
        print >> sys.stderr, "******** numpy new W ********"
        print >> sys.stderr, numpy_new_W
        print >> sys.stderr, "******** theano new W ********"
        new_W_mat = spherical_op.numpy_recompute_W(V.get_value(),
                                                   UT.get_value(),
                                                   omega.get_value())
        print >> sys.stderr, new_W_mat
        print >> sys.stderr, "******** consistency of op's new w_bar and W ********"
        print >> sys.stderr, "w_bar before update: ", w_bar_vec
        print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0)
        print >> sys.stderr, "w_bar after update:", w_bar.get_value()
        print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0)
        print >> sys.stderr, "******** consistency of op's new QT and W ********"
        print >> sys.stderr, "QT before update:"
        print >> sys.stderr, QT_mat
        print >> sys.stderr, "QT after update: (consider only upper triangular part)"
        print >> sys.stderr, QT.get_value()
        print >> sys.stderr, "numpy new W.T W:"
        print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
    def test_spherical_op_grad(self):
        print >> sys.stderr
        print >> sys.stderr, "================================================="
        print >> sys.stderr, "============  TEST spherical_op_grad  ==========="
        print >> sys.stderr, "================================================="


        V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params()
        HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data()            

        V = shared(V_mat)
        UT = shared(UT_mat)
        Uinv = shared(Uinv_mat)
        QT = shared(QT_mat)
        omega = shared(omega_vec)
        w_bar = shared(w_bar_vec)

        # Fiddling to debug:
        # grad_AT_mat.fill(0.)
        # grad_q_vec.fill(0.)
        # grad_s_vec.fill(0.)
        # grad_AT_mat[0,0] = 10000.

        grad_AT = shared(grad_AT_mat)
        grad_q = shared(grad_q_vec)
        grad_s = shared(grad_s_vec)

        HT = T.matrix()
        KindexesT = T.imatrix()
        eta_var = T.scalar()

        # compute AT,q,s with numpy (unfactorized algo)
        W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec)
        # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat)
        numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(W_mat, HT_mat, KindexesT_mat,
                                                                     grad_AT_mat, grad_q_vec, grad_s_vec,
                                                                     self.eta)

        # compute grad_HT with theano (factorized algo)
        AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar,
                                                                         HT, KindexesT)

        linked_grad_s = grad_s + 1e-9*s
        grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)(
            V, UT, Uinv, QT, omega, w_bar,
            HT, KindexesT,
            work_d, work_m,
            grad_AT, grad_q, linked_grad_s,
            eta_var )

        # pdb.set_trace()
        g = function([HT, KindexesT, eta_var],[grad_HT])
        # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta],[grad_HT])
        
        theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta)

        
        # # compute AT,q,s with theano (factorized algo)
        # f = function([HT, KindexesT], 
        #              spherical_op.FactoredSphericalOp(eta)(V, UT, Uinv, QT, omega, w_bar,
        #                                                    HT, KindexesT))
        # theano_AT, theano_q, theano_s = f(HT_mat, KindexesT_mat)

        # g = function([HT, KindexesT, grad_AT, grad_q, grad_s, eta ], 
        #              spherical_op.FactoredSphericalOpBpropUpdate()(V, UT, Uinv, QT, omega, w_bar,
        #                                                            HT, KindexesT,
        #                                                            grad_AT, grad_q, grad_s,
        #                                                            eta ))
        # theano_grad_HT = g(HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta)


        print >> sys.stderr, "******** KindexesT ********"
        print >> sys.stderr, KindexesT_mat

        print >> sys.stderr, "******** numpy grad_HT ********"
        print >> sys.stderr, numpy_grad_HT
        print >> sys.stderr, "******** theano grad_HT ********"
        print >> sys.stderr, theano_grad_HT

        
        print >> sys.stderr, "******** old W ********"
        print >> sys.stderr, W_mat
        print >> sys.stderr, "******** numpy new W ********"
        print >> sys.stderr, numpy_new_W
        print >> sys.stderr, "******** theano new W ********"
        new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value())
        print >> sys.stderr, new_W_mat
        print >> sys.stderr, "******** consistency of op's new w_bar and W ********"
        print >> sys.stderr, "w_bar before update: ", w_bar_vec
        print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0)        
        print >> sys.stderr, "w_bar after update:", w_bar.get_value()
        print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0)        
        print >> sys.stderr, "******** consistency of op's new QT and W ********"
        print >> sys.stderr, "QT before update:"
        print >> sys.stderr, QT_mat
        print >> sys.stderr, "QT after update: (consider only upper triangular part)"
        print >> sys.stderr, QT.get_value()
        print >> sys.stderr, "numpy new W.T W:"
        print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)