def test_spherical_op_take_gradient(self): print >> sys.stderr print >> sys.stderr, "=================================================" print >> sys.stderr, "====== TEST spherical_op_take_gradient ===========" print >> sys.stderr, "=================================================" V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params( ) HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data( ) V = shared(V_mat) UT = shared(UT_mat) Uinv = shared(Uinv_mat) QT = shared(QT_mat) omega = shared(omega_vec) w_bar = shared(w_bar_vec) # Fiddling to debug: # grad_AT_mat.fill(0.) # grad_q_vec.fill(0.) # grad_s_vec.fill(0.) # grad_AT_mat[0,0] = 10000. grad_AT = shared(grad_AT_mat) grad_q = shared(grad_q_vec) grad_s = shared(grad_s_vec) HT = T.matrix() KindexesT = T.imatrix() eta_var = T.scalar() # eta = shared(self.eta) # compute AT,q,s with numpy (unfactorized algo) W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec) # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat) numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update( W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) # compute grad_HT with theano (factorized algo) AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp( eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT, KindexesT) def z_loss(AT, q, s, eps=1e-12): D = self.D mu = s / D sigma = T.sqrt((q / D) - mu**2) c = T.nnet.softplus((mu - AT[:, 0]) / (sigma + eps)) return c.sum() def simple_loss(AT, q, s, eps=1e-5): D = self.D mu = s / D sigma2 = (q / D) - mu**2 + eps c = sigma2 + 0 * AT[:, 0] return c.sum() def simplest_loss(AT, q, s, eps=1e-5): L = -AT[:, 0] + 0.7 * s + 0.3 * q return L.sum() L = z_loss(AT, q, s) L = simplest_loss(AT, q, s) # grad_HT = theano.grad(L, [sub] + params) grad_HT = T.grad(L, wrt=HT) # linked_grad_s = grad_s + 1e-9*s # grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)( # V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT, # work_d, work_m, # grad_AT, grad_q, linked_grad_s, # eta ) # pdb.set_trace() g = function([HT, KindexesT, eta_var], [AT, q, s, L, grad_HT]) # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta_var],[grad_HT]) theano_AT, theano_q, theano_s, theano_L, theano_grad_HT = g( HT_mat, KindexesT_mat, self.eta) print >> sys.stderr, "******** KindexesT ********" print >> sys.stderr, KindexesT_mat print >> sys.stderr, "******** theano AT, q, s, L ********" print >> sys.stderr, theano_AT, theano_q, theano_s, theano_L #print >> sys.stderr, "******** numpy grad_HT ********" #print >> sys.stderr, numpy_grad_HT print >> sys.stderr, "******** theano grad_HT ********" print >> sys.stderr, theano_grad_HT print >> sys.stderr, "******** old W ********" print >> sys.stderr, W_mat #print >> sys.stderr, "******** numpy new W ********" #print >> sys.stderr, numpy_new_W print >> sys.stderr, "******** theano new W ********" new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value()) print >> sys.stderr, new_W_mat print >> sys.stderr, "******** consistency of op's new w_bar and W ********" print >> sys.stderr, "w_bar before update: ", w_bar_vec print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0) print >> sys.stderr, "w_bar after update:", w_bar.get_value() print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0) print >> sys.stderr, "******** consistency of op's new QT and W ********" print >> sys.stderr, "QT before update:" print >> sys.stderr, QT_mat print >> sys.stderr, "QT after update: (consider only upper triangular part)" print >> sys.stderr, QT.get_value() print >> sys.stderr, "numpy new W.T W:" print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
def test_spherical_op_take_gradient(self): print >> sys.stderr print >> sys.stderr, "=================================================" print >> sys.stderr, "====== TEST spherical_op_take_gradient ===========" print >> sys.stderr, "=================================================" V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params() HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data() V = shared(V_mat) UT = shared(UT_mat) Uinv = shared(Uinv_mat) QT = shared(QT_mat) omega = shared(omega_vec) w_bar = shared(w_bar_vec) # Fiddling to debug: # grad_AT_mat.fill(0.) # grad_q_vec.fill(0.) # grad_s_vec.fill(0.) # grad_AT_mat[0,0] = 10000. grad_AT = shared(grad_AT_mat) grad_q = shared(grad_q_vec) grad_s = shared(grad_s_vec) HT = T.matrix() KindexesT = T.imatrix() eta_var = T.scalar() # eta = shared(self.eta) # compute AT,q,s with numpy (unfactorized algo) W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec) # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat) numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) # compute grad_HT with theano (factorized algo) AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT, KindexesT) def z_loss(AT, q, s, eps=1e-12): D = self.D mu = s / D sigma = T.sqrt((q / D) - mu**2) c = T.nnet.softplus((mu - AT[:, 0]) / (sigma + eps)) return c.sum() def simple_loss(AT, q, s, eps=1e-5): D = self.D mu = s / D sigma2 = (q / D) - mu**2 + eps c = sigma2 + 0*AT[:,0] return c.sum() def simplest_loss(AT, q, s, eps=1e-5): L = -AT[:,0]+0.7*s+0.3*q return L.sum() L = z_loss(AT,q,s) L = simplest_loss(AT,q,s) # grad_HT = theano.grad(L, [sub] + params) grad_HT = T.grad(L, wrt=HT) # linked_grad_s = grad_s + 1e-9*s # grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)( # V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT, # work_d, work_m, # grad_AT, grad_q, linked_grad_s, # eta ) # pdb.set_trace() g = function([HT, KindexesT, eta_var],[AT,q,s,L,grad_HT]) # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta_var],[grad_HT]) theano_AT, theano_q, theano_s, theano_L, theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta) print >> sys.stderr, "******** KindexesT ********" print >> sys.stderr, KindexesT_mat print >> sys.stderr, "******** theano AT, q, s, L ********" print >> sys.stderr, theano_AT, theano_q, theano_s, theano_L #print >> sys.stderr, "******** numpy grad_HT ********" #print >> sys.stderr, numpy_grad_HT print >> sys.stderr, "******** theano grad_HT ********" print >> sys.stderr, theano_grad_HT print >> sys.stderr, "******** old W ********" print >> sys.stderr, W_mat #print >> sys.stderr, "******** numpy new W ********" #print >> sys.stderr, numpy_new_W print >> sys.stderr, "******** theano new W ********" new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value()) print >> sys.stderr, new_W_mat print >> sys.stderr, "******** consistency of op's new w_bar and W ********" print >> sys.stderr, "w_bar before update: ", w_bar_vec print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0) print >> sys.stderr, "w_bar after update:", w_bar.get_value() print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0) print >> sys.stderr, "******** consistency of op's new QT and W ********" print >> sys.stderr, "QT before update:" print >> sys.stderr, QT_mat print >> sys.stderr, "QT after update: (consider only upper triangular part)" print >> sys.stderr, QT.get_value() print >> sys.stderr, "numpy new W.T W:" print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
def test_spherical_op_grad(self): print >> sys.stderr print >> sys.stderr, "=================================================" print >> sys.stderr, "============ TEST spherical_op_grad ===========" print >> sys.stderr, "=================================================" V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params( ) HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data( ) V = shared(V_mat) UT = shared(UT_mat) Uinv = shared(Uinv_mat) QT = shared(QT_mat) omega = shared(omega_vec) w_bar = shared(w_bar_vec) # Fiddling to debug: # grad_AT_mat.fill(0.) # grad_q_vec.fill(0.) # grad_s_vec.fill(0.) # grad_AT_mat[0,0] = 10000. grad_AT = shared(grad_AT_mat) grad_q = shared(grad_q_vec) grad_s = shared(grad_s_vec) HT = T.matrix() KindexesT = T.imatrix() eta_var = T.scalar() # compute AT,q,s with numpy (unfactorized algo) W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec) # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat) numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update( W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) # compute grad_HT with theano (factorized algo) AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp( eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT, KindexesT) linked_grad_s = grad_s + 1e-9 * s grad_HT = spherical_op.FactoredSphericalOpBpropUpdate( invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT, KindexesT, work_d, work_m, grad_AT, grad_q, linked_grad_s, eta_var) # pdb.set_trace() g = function([HT, KindexesT, eta_var], [grad_HT]) # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta],[grad_HT]) theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta) # # compute AT,q,s with theano (factorized algo) # f = function([HT, KindexesT], # spherical_op.FactoredSphericalOp(eta)(V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT)) # theano_AT, theano_q, theano_s = f(HT_mat, KindexesT_mat) # g = function([HT, KindexesT, grad_AT, grad_q, grad_s, eta ], # spherical_op.FactoredSphericalOpBpropUpdate()(V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT, # grad_AT, grad_q, grad_s, # eta )) # theano_grad_HT = g(HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) print >> sys.stderr, "******** KindexesT ********" print >> sys.stderr, KindexesT_mat print >> sys.stderr, "******** numpy grad_HT ********" print >> sys.stderr, numpy_grad_HT print >> sys.stderr, "******** theano grad_HT ********" print >> sys.stderr, theano_grad_HT print >> sys.stderr, "******** old W ********" print >> sys.stderr, W_mat print >> sys.stderr, "******** numpy new W ********" print >> sys.stderr, numpy_new_W print >> sys.stderr, "******** theano new W ********" new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value()) print >> sys.stderr, new_W_mat print >> sys.stderr, "******** consistency of op's new w_bar and W ********" print >> sys.stderr, "w_bar before update: ", w_bar_vec print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0) print >> sys.stderr, "w_bar after update:", w_bar.get_value() print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0) print >> sys.stderr, "******** consistency of op's new QT and W ********" print >> sys.stderr, "QT before update:" print >> sys.stderr, QT_mat print >> sys.stderr, "QT after update: (consider only upper triangular part)" print >> sys.stderr, QT.get_value() print >> sys.stderr, "numpy new W.T W:" print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)
def test_spherical_op_grad(self): print >> sys.stderr print >> sys.stderr, "=================================================" print >> sys.stderr, "============ TEST spherical_op_grad ===========" print >> sys.stderr, "=================================================" V_mat, UT_mat, Uinv_mat, QT_mat, omega_vec, w_bar_vec = self.generate_model_params() HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec = self.generate_data() V = shared(V_mat) UT = shared(UT_mat) Uinv = shared(Uinv_mat) QT = shared(QT_mat) omega = shared(omega_vec) w_bar = shared(w_bar_vec) # Fiddling to debug: # grad_AT_mat.fill(0.) # grad_q_vec.fill(0.) # grad_s_vec.fill(0.) # grad_AT_mat[0,0] = 10000. grad_AT = shared(grad_AT_mat) grad_q = shared(grad_q_vec) grad_s = shared(grad_s_vec) HT = T.matrix() KindexesT = T.imatrix() eta_var = T.scalar() # compute AT,q,s with numpy (unfactorized algo) W_mat = spherical_op.numpy_recompute_W(V_mat, UT_mat, omega_vec) # numpy_out = spherical_op.numpy_fprop(W_mat, HT_mat, KindexesT_mat) numpy_grad_HT, numpy_new_W = spherical_op.numpy_bprop_update(W_mat, HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) # compute grad_HT with theano (factorized algo) AT, q, s, work_d, work_m = spherical_op.FactoredSphericalOp(eta_var, invup_mode=self.invup_mode)(V, UT, Uinv, QT, omega, w_bar, HT, KindexesT) linked_grad_s = grad_s + 1e-9*s grad_HT = spherical_op.FactoredSphericalOpBpropUpdate(invup_mode=self.invup_mode)( V, UT, Uinv, QT, omega, w_bar, HT, KindexesT, work_d, work_m, grad_AT, grad_q, linked_grad_s, eta_var ) # pdb.set_trace() g = function([HT, KindexesT, eta_var],[grad_HT]) # g = function_dump("my_theano_function.dump",[HT, KindexesT, eta],[grad_HT]) theano_grad_HT = g(HT_mat, KindexesT_mat, self.eta) # # compute AT,q,s with theano (factorized algo) # f = function([HT, KindexesT], # spherical_op.FactoredSphericalOp(eta)(V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT)) # theano_AT, theano_q, theano_s = f(HT_mat, KindexesT_mat) # g = function([HT, KindexesT, grad_AT, grad_q, grad_s, eta ], # spherical_op.FactoredSphericalOpBpropUpdate()(V, UT, Uinv, QT, omega, w_bar, # HT, KindexesT, # grad_AT, grad_q, grad_s, # eta )) # theano_grad_HT = g(HT_mat, KindexesT_mat, grad_AT_mat, grad_q_vec, grad_s_vec, self.eta) print >> sys.stderr, "******** KindexesT ********" print >> sys.stderr, KindexesT_mat print >> sys.stderr, "******** numpy grad_HT ********" print >> sys.stderr, numpy_grad_HT print >> sys.stderr, "******** theano grad_HT ********" print >> sys.stderr, theano_grad_HT print >> sys.stderr, "******** old W ********" print >> sys.stderr, W_mat print >> sys.stderr, "******** numpy new W ********" print >> sys.stderr, numpy_new_W print >> sys.stderr, "******** theano new W ********" new_W_mat = spherical_op.numpy_recompute_W(V.get_value(), UT.get_value(), omega.get_value()) print >> sys.stderr, new_W_mat print >> sys.stderr, "******** consistency of op's new w_bar and W ********" print >> sys.stderr, "w_bar before update: ", w_bar_vec print >> sys.stderr, "sum of old W: ", W_mat.sum(axis=0) print >> sys.stderr, "w_bar after update:", w_bar.get_value() print >> sys.stderr, "sum of numpy new W:", new_W_mat.sum(axis=0) print >> sys.stderr, "******** consistency of op's new QT and W ********" print >> sys.stderr, "QT before update:" print >> sys.stderr, QT_mat print >> sys.stderr, "QT after update: (consider only upper triangular part)" print >> sys.stderr, QT.get_value() print >> sys.stderr, "numpy new W.T W:" print >> sys.stderr, np.dot(new_W_mat.T, new_W_mat)