def setUp(self): super(test_Unique, self).setUp() self.op_class = Unique self.ops = [Unique(), Unique(True), Unique(False, True), Unique(True, True), Unique(False, False, True), Unique(True, False, True), Unique(False, True, True), Unique(True, True, True)]
def setUp(self): super(test_Unique, self).setUp() self.op_class = Unique self.ops = [Unique(), Unique(True), Unique(False, True), Unique(True, True)] if bool(numpy_ver >= [1, 9]) : self.ops.extend([ Unique(False, False, True), Unique(True, False, True), Unique(False, True, True), Unique(True, True, True)])
def setup_method(self): super().setup_method() self.op_class = Unique self.ops = [ Unique(), Unique(True), Unique(False, True), Unique(True, True), Unique(False, False, True), Unique(True, False, True), Unique(False, True, True), Unique(True, True, True), ]
def __theano_train__(self): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] uidxs = T.ivector() pq_idxs_t = T.imatrix() # 0行:正样本s,1行:负样本s。 mask_t = T.ivector() usrs = self.ui[uidxs] # shape=(n, 20) xpqs = self.lt[pq_idxs_t] # shape=(2, n, 20) uiq_pqs = Unique(False, False, False)(pq_idxs_t) # 直接去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t # 根据性质:T.dot((n, ), (n, ))得到(1, 1) uij = user * (xp - xq) upq = log(sigmoid(uij)) """ upq_t = T.sum(usrs * (xpqs[0] - xpqs[1]), axis=1) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, L2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] bpr_l2_sqr = (T.sum([T.sum(par**2) for par in [usrs, xpqs]])) upq = T.sum(loss_t) costs = ( -upq + # 这里不需要除以batch_size,跟RNN不一样。 0.5 * l2 * bpr_l2_sqr) # 这样速度很快,但self.lt的计算并不准确,因为xpqs里可能有重复。 # n个user,2n个items,这种更新求导是最快的。直接对sub求导,并非对par求导。 # pars_subs = [(self.ui, usrs), (self.lt, xpqs)] # 不知道行不行,这里用xpqs…… # seq_updates = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, sub))) # for par, sub in pars_subs] # 以下是准确写法,但比one_by_one要慢。 pars_subs = [(self.ui, usrs)] seq_updates = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, sub))) for par, sub in pars_subs] pars_subs = [(self.lt, uiq_x, uiq_pqs)] seq_updates.extend([ (par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idxs]) ) # 但这里太耗时。 for par, sub, idxs in pars_subs ]) # ---------------------------------------------------------------------------- # 输入用户、正负样本及其它参数后,更新变量,返回损失。 self.bpr_train = theano.function(inputs=[uidxs, pq_idxs_t, mask_t], outputs=-upq, updates=seq_updates)
def __theano_train__(self): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] pidxs_t, qidxs_t = T.ivector(), T.ivector() mask_t, uidxs = T.ivector(), T.ivector() users = self.ux[uidxs] # shape=(n, 20) xps = self.lt[pidxs_t] # shape=(n, 20) xqs = self.lt[qidxs_t] pqs = T.concatenate((pidxs_t, qidxs_t)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t # 根据性质:T.dot((n, ), (n, ))得到(1, 1) uij = user * (xp - xq) upq = log(sigmoid(uij)) """ upq_t = T.sum(users * (xps - xqs), axis=1) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, L2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] bpr_l2_sqr = (T.sum([T.sum(par**2) for par in [users, xps, xqs]])) upq = T.sum(loss_t) costs = (-upq + 0.5 * l2 * bpr_l2_sqr) pars_subs = [(self.ux, users, uidxs), (self.lt, uiq_x, uiq_pqs)] bpr_updates = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idxs])) for par, sub, idxs in pars_subs] # ---------------------------------------------------------------------------- # 输入用户、正负样本及其它参数后,更新变量,返回损失。 self.bpr_train = theano.function( inputs=[pidxs_t, qidxs_t, mask_t, uidxs], outputs=-upq, updates=bpr_updates)
def __theano_trainx__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda', 'fea_random_zero'] uix, whx = self.uix, self.whx tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length mask = tra_mask.T # shape=(157, n) c0x = T.alloc(self.c0x, actual_batch_size, n_hidden) # shape=(n, 20) h0x = T.alloc(self.h0x, actual_batch_size, n_hidden) # shape=(n, 40) bix = T.alloc(self.bix, actual_batch_size, 4, n_hidden) # shape=(n, 3, 40), n_hidden放在最后 bix = bix.dimshuffle(1, 2, 0) # shape=(3, 40, n) # 输入端:只输入购买的商品即可。 pidxs, qidxs = T.imatrix(), T.imatrix() # TensorType(int32, matrix) ixps = self.lt[pidxs] # shape((actual_batch_size, seq_length, n_in)) ixps = ixps.dimshuffle(1, 0, 2) # shape=(seq_length, batch_size, n_in) uiq_ps = Unique(False, False, False)(pidxs) # 再去重 uiq_ix = self.lt[uiq_ps] # 输出端:h*w 得到score yxps, yxqs = self.vyx[pidxs], self.vyx[qidxs] yxps, yxqs = yxps.dimshuffle(1, 0, 2), yxqs.dimshuffle(1, 0, 2) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_yx = self.vyx[uiq_pqs] """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(ixp_t, yxp_t, yxq_t, mask_t, cx_t_pre1, hx_t_pre1): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) gatesx = T.dot(uix, ixp_t.T) + T.dot( whx, hx_t_pre1.T) + bix # shape=(4, 20, n) ix, fx, gx, ox = sigmoid(gatesx[0]).T, sigmoid(gatesx[1]).T, tanh( gatesx[2]).T, sigmoid(gatesx[3]).T cx_t = fx * cx_t_pre1 + ix * gx # shape=(n, 20) hx_t = ox * tanh(cx_t) # shape=(n, 20) # 偏好误差 upq_t = T.sum(hx_t_pre1 * (yxp_t - yxq_t), axis=1) # shape=(n, ) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [cx_t, hx_t, loss_t] # shape=(n, 20), (n, ) [cx, hx, loss], _ = theano.scan(fn=recurrence, sequences=[ixps, yxps, yxqs, mask], outputs_info=[c0x, h0x, None], n_steps=seq_length) # 保证只循环到最长有效位 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = ( T.sum([T.sum(par**2) for par in [uix, whx, yxps, yxqs, ixps]]) + T.sum([T.sum(par**2) for par in [bix]]) / actual_batch_size) upq = T.sum(loss) seq_costs = (-upq / actual_batch_size + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.paramsx) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.paramsx, seq_grads)] update_ix = T.set_subtensor( uiq_ix, uiq_ix - lr * T.grad(seq_costs, self.lt)[uiq_ps]) update_yx = T.set_subtensor( uiq_yx, uiq_yx - lr * T.grad(seq_costs, self.vyx)[uiq_pqs]) seq_updates.append((self.lt, update_ix)) seq_updates.append((self.vyx, update_yx)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 # givens给数据 start_end = T.ivector() self.seq_trainx = theano.function( inputs=[start_end], outputs=-upq, updates=seq_updates, givens={ pidxs: self. tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[ start_end], # T.ivector()类型是 TensorType(int32, vector) tra_mask: self.tra_masks[start_end] })
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ ui, wh = self.ui, self.wh bi = self.bi tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 pidxs, qidxs = T.ivector(), T.ivector() xps, xqs = self.lt[pidxs], self.lt[qidxs] # shape((seq_length, n_in)) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((n, n), (n, ))得到shape=(n, ),且是矩阵(n, n)每行与(n, )相乘 # RNN h = sigmoid(ux * xp + wh * h_pre1) # 根据性质:T.dot((n, ), (n, ))得到(1, 1) upq = h_pre1 * (xp - xq) # 注意:这里是用上个隐层、当前正负样本计算偏好,不是当前隐层 loss = log(1.0 + e^(-upq)) """ def recurrence(xp_t, xq_t, h_t_pre1): h_t = sigmoid(T.dot(ui, xp_t) + T.dot(wh, h_t_pre1) + bi) # shape=(n, ), 计算当前隐层值 # 改成 h(t) * (xp(t+1) - xq(T+1))也是可以的。 upq_t = T.dot(h_t_pre1, xp_t - xq_t) # 注意: 基于上次的隐层值,h(t-1)*(xp(t)-xq(t)) loss_t = T.log(sigmoid(upq_t)) # 注意:log(x)它是以math.e为底的。 return [h_t, loss_t] [h, loss], _ = theano.scan( fn=recurrence, sequences=[xps, xqs], outputs_info=[self.h0, None], n_steps=seq_length, truncate_gradient=-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, ui, wh, bi]]) upq = T.sum(loss) seq_costs = ( - upq + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=-upq, updates=seq_updates, givens={ pidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[uidx], tra_mask: self.tra_masks[uidx]})
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] ui, wh = self.ui, self.wh tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 h0 = self.h0 bi = self.bi pidxs, qidxs = T.ivector(), T.ivector() xps, xqs = self.lt[pidxs], self.lt[qidxs] # shape((seq_length, n_in)) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(xp_t, xq_t, h_t_pre1): z_r = sigmoid(T.dot(ui[:2], xp_t) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh(T.dot(ui[2], xp_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c upq_t = T.dot(h_t_pre1, xp_t - xq_t) loss_t = T.log(sigmoid(upq_t)) return [h_t, loss_t] [h, loss], _ = theano.scan( fn=recurrence, sequences=[xps, xqs], outputs_info=[h0, None], n_steps=seq_length, truncate_gradient=-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, ui, wh, bi]]) upq = T.sum(loss) seq_costs = ( - upq + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=-upq, updates=seq_updates, givens={ pidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[uidx], tra_mask: self.tra_masks[uidx]})
def __theano_train2__(self): """ 训练阶段跑一遍训练序列 """ ui, wh = self.ui, self.wh vs, bs = self.vs, self.bs h0, bi = self.h0, self.bi wd = self.wd tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 xpidxs, xqidxs = T.ivector(), T.ivector() dpidxs, dqidxs = T.ivector(), T.ivector() xps = self.lt[xpidxs] # shape=(seq_length, n_in) xqs = self.lt[xqidxs] dps = self.di[dpidxs] ps = T.concatenate((xps, dps), axis=1) pqs = T.concatenate((xpidxs, xqidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 uiq_ds = Unique(False, False, False)(dpidxs) uiq_d = self.di[uiq_ds] def recurrence(p_t, xp_t1, xq_t1, dp_t1, dq_t1, h_t_pre1): # 隐层 z_r = sigmoid( T.dot(ui[:2], p_t) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh(T.dot(ui[2], p_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c # 下一时刻各间隔区间的概率 s_t = softrelu(T.dot(vs, h_t) + bs) # shape=(381, ) # loss. 使用下一时刻的样本。 upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * (s_t[dp_t1] - s_t[dq_t1]) loss_t_bpr = log(sigmoid(upq_t)) return [h_t, loss_t_bpr] [h, loss_bpr], _ = theano.scan( fn=recurrence, sequences=[ps, xps[1:], xqs[1:], dpidxs[1:], dqidxs[1:]], outputs_info=[h0, None], n_steps=seq_length - 1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] # ad = self.adam_default seq_l2_sq = T.sum( [T.sum(par**2) for par in [xps, xqs, ui, wh, bi, dps, vs, bs, wd]]) # TODO # 获得用户index为uidx的置信矩阵行shape(1, item_num) c_u = T.dvector() # 现在需要取出c_ul,即只需要得到参与训练的这些地点的置信值即可,所有访问过的地点(带mask的)xpidxs # c_ul = c_u[xpidxs]报错超出维度, xpidxs含有mask,但是遍历ivector # 想法是计算得到的loss_bpr shape(seq_length-1),只需要在loss前面乘对应的置信矩阵即可 # cul * loss_bpr_l最后同样-T.sum得到bpr loss的结果 c_ul = c_u[xpidxs] bpr = c_ul * -T.sum(loss_bpr) seq_costs = (bpr + 0.5 * l2 * seq_l2_sq) # seq_updates = self.adam(seq_costs, self.params+[self.lt, self.di], lr, ad[0], ad[1], ad[2], ad[3]) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) update_d = T.set_subtensor( uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 seq_updates.append((self.di, update_d)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.aux_seq_train = theano.function( inputs=[uidx], outputs=bpr, updates=seq_updates, givens={ xpidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) xqidxs: self.tra_buys_neg_masks[uidx], # negtive poi dpidxs: self.tra_dist_masks[uidx], # 别名表示的两地之间的距离 dqidxs: self.tra_dist_neg_masks[uidx], tra_mask: self.tra_masks[uidx], c_u: self.confidence_matrix[uidx] })
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ ui, wh = self.ui, self.wh vs, bs = self.vs, self.bs dd = self.dd tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 h0 = self.h0 bi = self.bi pidxs = T.ivector() qidxs = T.ivector() didxs = T.ivector() xps = self.lt[pidxs] # shape=(seq_length, n_in) xqs = self.lt[qidxs] xds = self.di[didxs] xs = T.concatenate((xps, xds), axis=1) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 uiq_ds = Unique(False, False, False)(didxs) uiq_d = self.di[uiq_ds] wd = self.wd ls = softmax(self.loss_weight) """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(x_t, xp_t1, xq_t1, d_t1, h_t_pre1): # 隐层 z_r = sigmoid( T.dot(ui[:2], x_t) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh(T.dot(ui[2], x_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c # 下一时刻各间隔区间的概率 s_t = softmax(T.dot(vs, h_t) + bs) # shape=(381, ) # loss. 使用下一时刻的样本。 upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * s_t[d_t1] # 上次的 loss_t_bpr = T.log(sigmoid(upq_t)) loss_t_sur = T.sum(s_t[:d_t1 + 1]) * dd - T.log(s_t[d_t1]) # s_t[:d_t + 1]:从0区间到该距离间隔区间,所有区间概率的和。 return [h_t, loss_t_sur, loss_t_bpr] [h, loss_sur, loss_bpr ], _ = theano.scan(fn=recurrence, sequences=[xs, xps[1:], xqs[1:], didxs[1:]], outputs_info=[h0, None, None], n_steps=seq_length - 1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([ T.sum(par**2) for par in [xps, xqs, ui, wh, bi, xds, vs, bs, wd, ls] ]) sur = T.sum(loss_sur) upq = -T.sum(loss_bpr) los = ls[0] * sur + ls[1] * upq seq_costs = (los + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) update_d = T.set_subtensor( uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 seq_updates.append((self.di, update_d)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=[los, sur, upq, ls], updates=seq_updates, givens={ pidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[uidx], # negtive poi didxs: self.tra_dist_masks[uidx], # 别名表示的两地之间的距离 tra_mask: self.tra_masks[uidx] })
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ ui, wh = self.ui, self.wh vs, bs = self.vs, self.bs h0, bi = self.h0, self.bi wd = self.wd av, ar, ae = self.av, self.ar, self.ae tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 xpidxs, xqidxs = T.ivector(), T.ivector() dpidxs, dqidxs = T.ivector(), T.ivector() xps = self.lt[xpidxs] # shape=(seq_length, n_in) xqs = self.lt[xqidxs] dps = self.di[dpidxs] ps = T.concatenate((xps, dps), axis=1) pqs = T.concatenate((xpidxs, xqidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 uiq_ds = Unique(False, False, False)(dpidxs) uiq_d = self.di[uiq_ds] """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(p_t, xp_t1, xq_t1, dp_t1, dq_t1, h_t_pre1): # 隐层 z_r = sigmoid( T.dot(ui[:2], p_t) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh(T.dot(ui[2], p_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c # 下一时刻各间隔区间的概率 s_t = softrelu(T.dot(vs, h_t) + bs) # shape=(381, ) # loss. 使用下一时刻的样本。 # upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * (s_t[dp_t1] - s_t[dq_t1]) # 试试以非线性方式组合两个preferences。 upq_t = T.dot( av, tanh(ar * T.dot(h_t, xp_t1) + ae * s_t[dp_t1]) - tanh(ar * T.dot(h_t, xq_t1) + ae * s_t[dq_t1])) loss_t_bpr = log(sigmoid(upq_t)) return [h_t, loss_t_bpr] [h, loss_bpr], _ = theano.scan( fn=recurrence, sequences=[ps, xps[1:], xqs[1:], dpidxs[1:], dqidxs[1:]], outputs_info=[h0, None], n_steps=seq_length - 1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] # ad = self.adam_default seq_l2_sq = T.sum([ T.sum(par**2) for par in [xps, xqs, ui, wh, bi, dps, vs, bs, wd, av, ar, ae] ]) bpr = -T.sum(loss_bpr) seq_costs = (bpr + 0.5 * l2 * seq_l2_sq) # seq_updates = self.adam(seq_costs, self.params+[self.lt, self.di], lr, ad[0], ad[1], ad[2], ad[3]) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) update_d = T.set_subtensor( uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 seq_updates.append((self.di, update_d)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=bpr, updates=seq_updates, givens={ xpidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) xqidxs: self.tra_buys_neg_masks[uidx], # negtive poi dpidxs: self.tra_dist_masks[uidx], # 别名表示的两地之间的距离 dqidxs: self.tra_dist_neg_masks[uidx], tra_mask: self.tra_masks[uidx] })
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ M = self.M tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 h0 = self.h0 xpidxs = T.ivector() xqidxs = T.ivector() dpidxs = T.ivector() dqidxs = T.ivector() xps = self.lt[xpidxs] # shape=(seq_length, n_in) xqs = self.lt[xqidxs] wdps = self.wd[dpidxs] wdqs = self.wd[dqidxs] pqs = T.concatenate((xpidxs, xqidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 dpqs = T.concatenate((dpidxs, dqidxs)) # 先拼接 uiq_dpqs = Unique(False, False, False)(dpqs) # 再去重 uiq_d = self.wd[uiq_dpqs] # 相应的items特征 def recurrence(x_t, xp_t1, xq_t1, wd_t, wdp_t1, wdq_t1, h_t_pre1): # 隐层 h_t = sigmoid(T.dot(M, x_t) + T.dot(wd_t, h_t_pre1)) yp = T.dot(T.dot(wdp_t1, h_t), T.dot(M, xp_t1).T) yq = T.dot(T.dot(wdq_t1, h_t), T.dot(M, xq_t1).T) loss_t_bpr = T.log(sigmoid(yp - yq)) return [h_t, loss_t_bpr] [h, loss_bpr], _ = theano.scan( fn=recurrence, sequences=[xps, xps[1:], xqs[1:], wdps, wdps[1:], wdqs[1:]], outputs_info=[h0, None], n_steps=seq_length-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, M, wdps, wdqs]]) los = - T.sum(loss_bpr) seq_costs = ( los + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) update_d = T.set_subtensor(uiq_d, uiq_d - lr * T.grad(seq_costs, self.wd)[uiq_dpqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 seq_updates.append((self.wd, update_d)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=los, updates=seq_updates, givens={ xpidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) xqidxs: self.tra_buys_neg_masks[uidx], # negtive poi dpidxs: self.tra_dist_masks[uidx], # 别名表示的两地之间的距离 dqidxs: self.tra_dist_neg_masks[uidx], tra_mask: self.tra_masks[uidx]})
def __theano_train__(self, n_in, n_hidden, n_img, n_txt): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda', 'lambda_ev', 'lambda_ae', 'fea_random_zero'] ui, wh = self.ui, self.wh ei, vt = self.ei, self.vt tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length mask = tra_mask.T # shape=(157, n) h0 = T.alloc(self.h0, actual_batch_size, n_hidden) # shape=(n, 40) bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden) # shape=(n, 3, 40), n_hidden放在最后 bi = bi.dimshuffle(1, 2, 0) # shape=(3, 40, n) pidxs, qidxs = T.imatrix(), T.imatrix() # TensorType(int32, matrix) xps, xqs = self.lt[pidxs], self.lt[ qidxs] # shape((actual_batch_size, seq_length, n_in)) ips, iqs = self.fi[pidxs], self.fi[ qidxs] # shape((actual_batch_size, seq_length, n_img)) tps, tqs = self.ft[pidxs], self.ft[ qidxs] # shape((actual_batch_size, seq_length, n_txt)) xps, xqs = xps.dimshuffle(1, 0, 2), xqs.dimshuffle( 1, 0, 2) # shape=(seq_len, batch_size, n_in) ips, iqs = ips.dimshuffle(1, 0, 2), iqs.dimshuffle(1, 0, 2) tps, tqs = tps.dimshuffle(1, 0, 2), tqs.dimshuffle(1, 0, 2) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ zero = self.alpha_lambda[4] if 0.0 == zero: # 用完全数据 def recurrence(xp_t, xq_t, ip_t, iq_t, tp_t, tq_t, mask_t, h_t_pre1): # item表达 mp_t = T.dot(ip_t, ei.T) + T.dot(tp_t, vt.T) # shape=(n, 20) mq_t = T.dot(iq_t, ei.T) + T.dot(tq_t, vt.T) p_t = T.concatenate((xp_t, mp_t), axis=1) # shape=(n, 40) q_t = T.concatenate((xq_t, mq_t), axis=1) # 隐层计算 z_r = sigmoid( T.dot(ui[:2], p_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 40) c = tanh( T.dot(ui[2], p_t.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # 偏好误差 upq_t = T.sum(h_t_pre1 * (p_t - q_t), axis=1) # shape=(n, ) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 重构误差 loss_ae_t_i = ( T.sum((ip_t - T.dot(mp_t, ei))**2) + T.sum( (iq_t - T.dot(mq_t, ei))**2) ) # T.sum(shape=(n, 1024), axis=1), 最后shape=(n,) loss_ae_t_t = (T.sum((tp_t - T.dot(mp_t, vt))**2) + T.sum( (tq_t - T.dot(mq_t, vt))**2)) loss_ae_t_i *= mask_t loss_ae_t_t *= mask_t return [h_t, loss_t, loss_ae_t_i, loss_ae_t_t] # shape=(n, 20), (n, ), (n, ) [h, loss, loss_ae_i, loss_ae_t ], _ = theano.scan(fn=recurrence, sequences=[xps, xqs, ips, iqs, tps, tqs, mask], outputs_info=[h0, None, None, None], n_steps=seq_length, truncate_gradient=-1) else: # 每条序列训练前都随机whole feature corrupted ipsc = self.get_corrupted_input_whole_minibatch(ips, zero) iqsc = self.get_corrupted_input_whole_minibatch(iqs, zero) tpsc = self.get_corrupted_input_whole_minibatch(tps, zero) tqsc = self.get_corrupted_input_whole_minibatch(tqs, zero) def recurrence(xp_t, xq_t, ip_t, iq_t, tp_t, tq_t, ipc_t, iqc_t, tpc_t, tqc_t, mask_t, h_t_pre1): # item表达 mp_t = T.dot(ipc_t, ei.T) + T.dot(tpc_t, vt.T) # shape=(n, 20) mq_t = T.dot(iqc_t, ei.T) + T.dot(tqc_t, vt.T) p_t = T.concatenate((xp_t, mp_t), axis=1) # shape=(n, 40) q_t = T.concatenate((xq_t, mq_t), axis=1) # 隐层计算 z_r = sigmoid( T.dot(ui[:2], p_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 40) c = tanh( T.dot(ui[2], p_t.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # 偏好误差 upq_t = T.sum(h_t_pre1 * (p_t - q_t), axis=1) # shape=(n, ) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 重构误差 loss_ae_t_i = ( T.sum((ip_t - T.dot(mp_t, ei))**2) + T.sum( (iq_t - T.dot(mq_t, ei))**2) ) # T.sum(shape=(n, 1024), axis=1), 最后shape=(n,) loss_ae_t_t = (T.sum((tp_t - T.dot(mp_t, vt))**2) + T.sum( (tq_t - T.dot(mq_t, vt))**2)) loss_ae_t_i *= mask_t loss_ae_t_t *= mask_t return [h_t, loss_t, loss_ae_t_i, loss_ae_t_t] # shape=(n, 20), (n, ), (n, ) [h, loss, loss_ae_i, loss_ae_t], _ = theano.scan(fn=recurrence, sequences=[ xps, xqs, ips, iqs, tps, tqs, ipsc, iqsc, tpsc, tqsc, mask ], outputs_info=[h0, None, None, None], n_steps=seq_length, truncate_gradient=-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] l2_ev = self.alpha_lambda[2] l2_ae = self.alpha_lambda[3] seq_l2_sq = (T.sum([T.sum(par**2) for par in [xps, xqs, ui, wh]]) + T.sum([T.sum(par**2) for par in [bi]]) / actual_batch_size) seq_l2_ev = (T.sum([T.sum(par**2) for par in [ei, vt]])) upq = T.sum(loss) ae = (0.5 * l2_ae * T.sum(loss_ae_i) / n_img + 0.5 * l2_ae * T.sum(loss_ae_t) / n_txt) seq_costs = ((-upq + ae) / actual_batch_size + 0.5 * l2 * seq_l2_sq + 0.5 * l2_ev * seq_l2_ev) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 # givens给数据 start_end = T.ivector() self.seq_train = theano.function( inputs=[start_end], outputs=-upq + ae, updates=seq_updates, givens={ pidxs: self. tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[ start_end], # T.ivector()类型是 TensorType(int32, vector) tra_mask: self.tra_masks[start_end] })
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] ui, wh, bi = self.ui, self.wh, self.bi qx, rx, vc = self.qx, self.rx, self.vc winx = self.window_input tra_mask = T.ivector() seq_length = T.sum(tra_mask) pidxs, qidxs = T.ivector(), T.ivector() xps, xqs = self.lt[pidxs], self.lt[qidxs] # shape((seq_length, n_in)) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(xp_t, xp_t1, xq_t1, h_t_pre1, cx): # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cx = T.concatenate((cx[1:], xp_t.reshape( (1, n_in)))) # shape=(winx, 20) ex = T.dot(tanh(T.dot(cx, qx)), rx) # shape=(winx, 1) ax = softmax(ex.T) # shape=(1, winx) xc = (T.dot(cx.T, ax.T)).reshape((n_in, )) # shape=(20, ) # gru_unit z_r = sigmoid( T.dot(ui[:2], xp_t) + T.dot(vc[:2], xc) + T.dot(wh[:2], h_t_pre1) + bi[:2]) z, r = z_r[0], z_r[1] c = tanh( T.dot(ui[2], xp_t) + T.dot(vc[2], xc) + T.dot(wh[2], (r * h_t_pre1)) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c # shape=(20, ) # loss upq_t = T.dot(h_t, xp_t1 - xq_t1) # 正负样本训练。h(t) * (xp(t+1) - xq(t+1)) loss_t = T.log(sigmoid(upq_t)) return [h_t, cx, loss_t] cumx = T.alloc(self.lt[-1], winx, n_in) # concatenate [_, _, loss], _ = theano.scan( # h是h1~ht。loss是h0~ht-1和x1~xt计算得到的。 fn=recurrence, sequences=[xps, xps[1:], xqs[1:]], outputs_info=[self.h0, cumx, None], n_steps=seq_length - 1, truncate_gradient=-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum( [T.sum(par**2) for par in [xps, xqs, ui, wh, bi, qx, rx, vc]]) upq = T.sum(loss) seq_costs = (-upq + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=-upq, updates=seq_updates, givens={ pidxs: self.tra_buys_masks[uidx], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[uidx], tra_mask: self.tra_masks[uidx] })
def __theano_traini__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda', 'fea_random_zero'] uix, whx = self.uix, self.whx uit, wht = self.uit, self.wht uii, whi = self.uii, self.whi tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length mask = tra_mask.T # shape=(157, n) h0x = T.alloc(self.h0x, actual_batch_size, n_hidden) # shape=(n, 40) h0t = T.alloc(self.h0t, actual_batch_size, n_hidden) h0i = T.alloc(self.h0i, actual_batch_size, n_hidden) bix = T.alloc(self.bix, actual_batch_size, 3, n_hidden) # shape=(n, 3, 40), n_hidden放在最后 bit = T.alloc(self.bit, actual_batch_size, 3, n_hidden) bii = T.alloc(self.bii, actual_batch_size, 3, n_hidden) bix = bix.dimshuffle(1, 2, 0) # shape=(3, 40, n) bit = bit.dimshuffle(1, 2, 0) bii = bii.dimshuffle(1, 2, 0) # 输入端:只输入购买的商品即可。 pidxs, qidxs = T.imatrix(), T.imatrix() # TensorType(int32, matrix) ixps = self.lt[pidxs] # shape((actual_batch_size, seq_length, n_in)) itps = self.ft[pidxs] # shape((actual_batch_size, seq_length, n_txt)) iips = self.fi[pidxs] # shape((actual_batch_size, seq_length, n_img)) ixps = ixps.dimshuffle(1, 0, 2) # shape=(seq_length, batch_size, n_in) itps = itps.dimshuffle(1, 0, 2) iips = iips.dimshuffle(1, 0, 2) # 输出端:h*w 得到score yxps, yxqs = self.vyx[pidxs], self.vyx[qidxs] ytps, ytqs = self.vyt[pidxs], self.vyt[qidxs] yips, yiqs = self.vyi[pidxs], self.vyi[qidxs] yxps, yxqs = yxps.dimshuffle(1, 0, 2), yxqs.dimshuffle(1, 0, 2) ytps, ytqs = ytps.dimshuffle(1, 0, 2), ytqs.dimshuffle(1, 0, 2) yips, yiqs = yips.dimshuffle(1, 0, 2), yiqs.dimshuffle(1, 0, 2) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_yi = self.vyi[uiq_pqs] """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(ixp_t, yxp_t, yxq_t, itp_t, ytp_t, ytq_t, iip_t, yip_t, yiq_t, mask_t, hx_t_pre1, ht_t_pre1, hi_t_pre1): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) z_rx = sigmoid(T.dot(uix[:2], ixp_t.T) + T.dot(whx[:2], hx_t_pre1.T) + bix[:2]) # shape=(2, 20, n) z_rt = sigmoid(T.dot(uit[:2], itp_t.T) + T.dot(wht[:2], ht_t_pre1.T) + bit[:2]) z_ri = sigmoid(T.dot(uii[:2], iip_t.T) + T.dot(whi[:2], hi_t_pre1.T) + bii[:2]) zx, rx = z_rx[0].T, z_rx[1].T # shape=(n, 20) zt, rt = z_rt[0].T, z_rt[1].T zi, ri = z_ri[0].T, z_ri[1].T cx = tanh(T.dot(uix[2], ixp_t.T) + T.dot(whx[2], (rx * hx_t_pre1).T) + bix[2]) # shape=(20, n) ct = tanh(T.dot(uit[2], itp_t.T) + T.dot(wht[2], (rt * ht_t_pre1).T) + bit[2]) ci = tanh(T.dot(uii[2], iip_t.T) + T.dot(whi[2], (ri * hi_t_pre1).T) + bii[2]) hx_t = (T.ones_like(zx) - zx) * hx_t_pre1 + zx * cx.T # shape=(n, 20) ht_t = (T.ones_like(zt) - zt) * ht_t_pre1 + zt * ct.T hi_t = (T.ones_like(zi) - zi) * hi_t_pre1 + zi * ci.T # 偏好误差 upq_t = ( T.sum(hx_t_pre1 * (yxp_t - yxq_t), axis=1) + T.sum(ht_t_pre1 * (ytp_t - ytq_t), axis=1) + T.sum(hi_t_pre1 * (yip_t - yiq_t), axis=1)) # shape=(n, ) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [hx_t, ht_t, hi_t, loss_t] # shape=(n, 20), (n, ) [hx, ht, hi, loss], _ = theano.scan( fn=recurrence, sequences=[ixps, yxps, yxqs, itps, ytps, ytqs, iips, yips, yiqs, mask], outputs_info=[h0x, h0t, h0i, None], n_steps=seq_length) # 保证只循环到最长有效位 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = ( T.sum([T.sum(par ** 2) for par in [uix, whx, yxps, yxqs, ixps, uit, wht, ytps, ytqs, uii, whi, yips, yiqs]]) + T.sum([T.sum(par ** 2) for par in [bix, bit, bii]]) / actual_batch_size) upq = T.sum(loss) seq_costs = ( - upq / actual_batch_size + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.paramsi) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.paramsi, seq_grads)] update_yi = T.set_subtensor(uiq_yi, uiq_yi - lr * T.grad(seq_costs, self.vyi)[uiq_pqs]) seq_updates.append((self.vyi, update_yi)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 # givens给数据 start_end = T.ivector() self.seq_traini = theano.function( inputs=[start_end], outputs=-upq, updates=seq_updates, givens={ pidxs: self.tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[start_end], # T.ivector()类型是 TensorType(int32, vector) tra_mask: self.tra_masks[start_end]})
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] ui, wh, bi = self.ui, self.wh, self.bi qx, rx, vc = self.qx, self.rx, self.vc winx = self.window_input tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length mask = tra_mask.T # shape=(157, n) bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden) # shape=(n, 3, 20), 原维度放在后边 bi = bi.dimshuffle(1, 2, 0) # shape=(3, 20, n) pidxs, qidxs = T.imatrix(), T.imatrix() xps, xqs = self.lt[pidxs], self.lt[ qidxs] # shape((actual_batch_size, seq_length, n_in)) xps = xps.dimshuffle( 1, 0, 2) # shape=(seq_length, batch_size, n_in)=(157, n, 20) xqs = xqs.dimshuffle(1, 0, 2) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_x = self.lt[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(xp_t, xp_t1, xq_t1, mask_t, h_t_pre1, cxs): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1) # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cxs = T.concatenate( ( cxs[:, 1:, :], # shape=(n, winx-1, 20) xp_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winx, 20) exs = T.dot(tanh(T.dot(cxs, qx)), rx) # shape=(n, winx, 1) exs = T.Rebroadcast((2, True))(exs) # axis=2进行broadcast, 使其可被丢掉 axs0 = softmax(exs.dimshuffle( 0, 1)) # shape=(n, winx),降一维。因为softmax按行处理。 axs = axs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去。 axs = T.Rebroadcast((2, True))(axs) # axis=2进行broadcast, 使其可做乘法。 # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1) xc = T.sum(cxs * axs, axis=1) # shape=(n, 20) # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) # loss upq_t = T.sum( h_t * (xp_t1 - xq_t1), axis=1) # shape=(n, ), h(t) * (xp(t+1) - xq(t+1)), 正负样本训练。 loss_t = T.log(sigmoid(upq_t)) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [h_t, cxs, loss_t] batch_h0 = T.alloc(self.h0, actual_batch_size, n_hidden) cumx = T.alloc(self.lt[-1], actual_batch_size, winx, n_in) # concatenate [_, _, loss], _ = theano.scan( # h是h1~ht。loss是h0~ht-1和x1~xt计算得到的。 fn=recurrence, sequences=[xps, xps[1:], xqs[1:], mask], outputs_info=[batch_h0, cumx, None], n_steps=seq_length - 1, truncate_gradient=-1) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = ( T.sum([T.sum(par**2) for par in [xps, xqs, ui, wh, qx, rx, vc]]) + T.sum([T.sum(par**2) for par in [bi]]) / actual_batch_size) upq = T.sum(loss) seq_costs = (-upq / actual_batch_size + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor( uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 # ---------------------------------------------------------------------------- # givens给数据 start_end = T.ivector() self.seq_train = theano.function( inputs=[start_end], outputs=-upq, updates=seq_updates, givens={ pidxs: self. tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[start_end], tra_mask: self.tra_masks[start_end] })
def __theano_train__(self, n_hidden): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] ui, wh = self.ui, self.wh tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length mask = tra_mask.T # shape=(157, n) h0 = T.alloc(self.h0, actual_batch_size, n_hidden) # shape=(n, 20) bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden) # shape=(n, 3, 20), n_hidden放在最后 bi = bi.dimshuffle(1, 2, 0) # shape=(3, 20, n) # 输入端:只输入购买的商品即可。 pidxs, qidxs = T.imatrix(), T.imatrix() # TensorType(int32, matrix) xps = self.lt[pidxs] # shape((actual_batch_size, seq_length, n_in)) xps = xps.dimshuffle(1, 0, 2) # shape=(seq_length, batch_size, n_in) uiq_ps = Unique(False, False, False)(pidxs) # 再去重 uiq_x = self.lt[uiq_ps] # 输出端:h*w 得到score yps, yqs = self.vy[pidxs], self.vy[qidxs] yps, yqs = yps.dimshuffle(1, 0, 2), yqs.dimshuffle(1, 0, 2) pqs = T.concatenate((pidxs, qidxs)) # 先拼接 uiq_pqs = Unique(False, False, False)(pqs) # 再去重 uiq_y = self.vy[uiq_pqs] # 相应的items特征 """ 输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘 # GRU z = sigmoid(ux_z * xp + wh_z * h_pre1) r = sigmoid(ux_r * xp + wh_r * h_pre1) c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1)) h = z * h_pre1 + (1.0 - z) * c # 根据性质:T.dot((n, ), (n, ))得到scalar upq = h_pre1 * (xp - xq) loss = log(1.0 + e^(-upq)) """ def recurrence(xp_t, yp_t, yq_t, mask_t, h_t_pre1): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) z_r = sigmoid(T.dot(ui[:2], xp_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) # shape=(2, 20, n) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh(T.dot(ui[2], xp_t.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) # shape=(20, n) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) # 偏好误差 upq_t = T.sum(h_t_pre1 * (yp_t - yq_t), axis=1) # shape=(n, ) loss_t = T.log(sigmoid(upq_t)) # shape=(n, ) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [h_t, loss_t] # shape=(n, 20), (n, ) [h, loss], _ = theano.scan( fn=recurrence, sequences=[xps, yps, yqs, mask], outputs_info=[h0, None], n_steps=seq_length) # 保证只循环到最长有效位 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = ( T.sum([T.sum(par ** 2) for par in [xps, ui, wh, yps, yqs]]) + T.sum([T.sum(par ** 2) for par in [bi]]) / actual_batch_size) upq = T.sum(loss) seq_costs = ( - upq / actual_batch_size + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_ps]) update_y = T.set_subtensor(uiq_y, uiq_y - lr * T.grad(seq_costs, self.vy)[uiq_pqs]) seq_updates.append((self.lt, update_x)) # 会直接更改到seq_updates里 seq_updates.append((self.vy, update_y)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 # givens给数据 start_end = T.ivector() # int32 self.seq_train = theano.function( inputs=[start_end], outputs=-upq, updates=seq_updates, givens={ pidxs: self.tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) qidxs: self.tra_buys_neg_masks[start_end], # T.ivector()类型是 TensorType(int32, vector) tra_mask: self.tra_masks[start_end]})
def __theano_train__(self): """ 训练阶段跑一遍训练序列 """ # 各种usr_itm输入 uidx, pqidx, cidx = T.iscalar(), T.ivector(), T.ivector() urx = self.ux[ uidx] # shape=(n_in, ) # user全局变量。 xpq = self.lx[pqidx] # shape=(2, n_in), 这俩肯定不是同一个item。 # t+1时刻的正负样本 cpt = self.lc[ cidx] # shape=(set_size, d) # t时刻的items输入。需要unique后再做更新。 cpq = self.lc[ pqidx] # shape=(2, d) # t+1时刻的正负样本(该方法中两套item表达) # 每时刻输入一个item_set,做unique cpqs = T.concatenate((cidx, pqidx)) # 先拼接 uiq_cps = Unique(False, False, False)(cpqs) # 去重 uiq_c = self.lc[uiq_cps] # 相应的items特征 # 各种权重矩阵。【注意:统一格式,权重 * 变量】 lay = self.layer wru, wrc, wrl = self.wru, self.wrc, self.wrl # resnet wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3 # 一阶att wb1, wb2 = self.wb1, self.wb2 # 二阶att """ 输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t # 根据性质:T.dot((n, ), (n, ))得到(1, 1) uij = user * (xp - xq) upq = log(sigmoid(uij)) """ # ============================================================================================================== # 得分1 uij_x = T.dot(urx, xpq[0] - xpq[1]) # ============================================================================================================== # 得分2, ResNet部分。 # ----------------------------------------------------------- # # check: 向量 + 矩阵, (5, ) + (3, 5) -> (3, 5) # rang = 0.5 # wi = uniform(-rang, rang, (5, 5)) # d * d # ii = uniform(-rang, rang, (3, 5)) # 3 itm * d # wu = uniform(-rang, rang, (5, 5)) # d * d # uu = uniform(-rang, rang, (5, )) # (d, ) # import numpy as np # a = np.dot(wu, uu) # (5, ) # b = np.dot(ii, wi) # (3, 5) # c = np.dot(wi.T, ii.T).T # b = c # d = a + b # (3, 5),a可以正常添加到b的每一行中 # ----------------------------------------------------------- # 得分2 # 第0层的att e0 = T.dot(tanh(T.dot(wa2, urx) + T.dot(cpt, wa3)), wa1) # (size, ) a0 = hsoftmax(e0) # (size, ) c0 = T.sum(cpt * a0.dimshuffle(0, 'x'), axis=0) # (d, ) # 得分2 # ResNet里的att def recurrence1(wrut, wrct, urx_pre1, cpt_pre1): # ResNet更新 ur_t = relu(T.dot(wrut, urx_pre1) + urx_pre1) # (d, ) cp_t = relu(T.dot(cpt_pre1, wrct) + cpt_pre1) # (size, d) # att计算生成上下文向量 e_t = T.dot(tanh(T.dot(wa2, ur_t) + T.dot(cp_t, wa3)), wa1) a_t = hsoftmax(e_t) # (size, ) c_t = T.sum(cp_t * a_t.dimshuffle(0, 'x'), axis=0) # (d, ) return [ur_t, cp_t, c_t] [urs, cps, cs], _ = theano.scan( fn=recurrence1, sequences=[wru, wrc], # bru, brc outputs_info=[urx, cpt, None], n_steps=lay, truncate_gradient=-1) # 得分2 # 二阶att c0 = c0.dimshuffle('x', 0) # (1, d) context = T.concatenate((c0, cs), axis=0) # shape=(layer+1, d) e1 = T.dot(tanh(T.dot(context, wb2)), wb1) # shape=(layer+1, ) a1 = hsoftmax(e1) c1 = T.sum(context * a1.dimshuffle(0, 'x'), axis=0) # shape=(d, ) # 得分2 uij_c = T.dot(c1, cpq[0] - cpq[1]) # ============================================================================================================== # 得分3 # 以resnet的输出c1重新计算一个新的resnet def recurrence2(wrlt, h_pre1): # ResNet更新 hl_t = relu(T.dot(wrlt, h_pre1) + h_pre1) # (d, ) return hl_t hls, _ = theano.scan(fn=recurrence2, sequences=wrl, outputs_info=c1, n_steps=lay, truncate_gradient=-1) # 得分3 uij_l = T.dot(hls[-1], cpq[0] - cpq[1]) # ============================================================================================================== # 总的得分 loss = T.log(sigmoid(uij_x + uij_c + uij_l)) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, L2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] l2_sqr = (T.sum([ T.sum(par**2) for par in [urx, xpq, cpt, cpq, wru, wrc, wrl, wa1, wa2, wa3, wb1, wb2] ])) upq = loss costs = (-upq + 0.5 * l2 * l2_sqr) # self.params grads = T.grad(costs, self.params) updates = [(par, par - lr * gra) for par, gra in zip(self.params, grads)] # 1个user,2个items,这种更新求导是最快的。直接对sub求导,并非对par求导。 subs_pars_idxs = [[urx, self.ux, uidx], [xpq, self.lx, pqidx], [uiq_c, self.lc, uiq_cps]] tmp = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idx])) for sub, par, idx in subs_pars_idxs] updates.extend(tmp) # ---------------------------------------------------------------------------- # 输入用户、正负样本及其它参数后,更新变量,返回损失。 self.train = theano.function(inputs=[uidx, pqidx, cidx], outputs=-upq, updates=updates, on_unused_input='warning')
def __theano_train__(self): """ 训练阶段跑一遍训练序列 """ # self.alpha_lambda = ['alpha', 'lambda'] # 各种usr_itm输入 uidxs = T.ivector() # n个用户 pqidxs = T.imatrix() # (2, n) 0行: n个正样本。1行: 负样本s。 cidxs = T.imatrix() # (n, set_size) mask = T.ivector() # 当前时刻的mask,标明哪些用户的行为有效/无效。 urxs = self.ux[uidxs] # shape=(n, d) xpqs = self.lx[pqidxs] # shape=(2, n, d) cpts = self.lc[cidxs] # shape=(n, set_size, d) cpqs = self.lc[pqidxs] # shape=(2, n, d) actual_batch_size = mask.shape[0] # 每时刻输入一个item_set,做unique ncpqs = T.concatenate((cidxs, pqidxs.T), axis=1) # 先拼接, shape=(n, set_size+2) uiq_cps = Unique(False, False, False)(ncpqs) # 去重 uiq_c = self.lc[uiq_cps] # 相应的items特征 # 各种权重矩阵。【注意:统一格式,权重 * 变量】 lay = self.layer wru, wrc, wrl = self.wru, self.wrc, self.wrl # resnet wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3 # 一阶att wb1, wb2 = self.wb1, self.wb2 # 二阶att """ 输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t # 根据性质:T.dot((n, ), (n, ))得到(1, 1) uij = user * (xp - xq) upq = log(sigmoid(uij)) """ # ============================================================================================================== # 得分1 uij_x = T.sum(urxs * (xpqs[0] - xpqs[1]), axis=1) # shape=(n, ) # ============================================================================================================== # 得分2 # 第0层的att, 获得(batch_size, d)的att vector。 urx_emb = T.dot(wa2, urxs.T).T.dimshuffle(0, 'x', 1) # shape=(batch_size, 1, d) e0 = T.dot(tanh(urx_emb + T.dot(cpts, wa3)), wa1) # shape=(batch_size, set_size) a0 = softmax(e0) # (batch_size, set_size) c0 = T.sum(cpts * a0.dimshuffle(0, 1, 'x'), axis=1) # shape=(batch_size, d), broadcast # 得分2 # ResNet里的att def recurrence1(wrut, wrct, urx_pre1, cpt_pre1): # ResNet更新 ur_t = relu(T.dot(wrut, urx_pre1.T).T + urx_pre1) # (batch_size, d) cp_t = relu(T.dot(cpt_pre1, wrct) + cpt_pre1) # (batch_size, set_size, d) # att计算生成上下文向量 ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1) e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)), wa1) # shape=(batch_size, set_size) a_t = softmax(e_t) c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1) return [ ur_t, cp_t, c_t ] # (batch_size, d), (batch_size, set_size, d), (batch_size, d) [urs, cps, cs], _ = theano.scan( # cs.shape = (layer, batch_size, d) fn=recurrence1, sequences=[wru, wrc], outputs_info=[urxs, cpts, None], n_steps=lay, truncate_gradient=-1) # 得分2 # 二阶att c0 = c0.dimshuffle(0, 'x', 1) # (batch_size, 1, d) cs = cs.dimshuffle(1, 0, 2) # (batch_size, layer, d) context = T.concatenate((c0, cs), axis=1) # (batch_size, layer+1, d) e1 = T.dot(tanh(T.dot(context, wb2)), wb1) # shape=(batch_size, layer+1) a1 = softmax(e1) c1 = T.sum(context * a1.dimshuffle(0, 1, 'x'), axis=1) # shape=(batch_size, d) # 得分2 uij_c = T.sum(c1 * (cpqs[0] - cpqs[1]), axis=1) # shape=(n, ) # ============================================================================================================== # 得分3 # 以resnet的输出c1重新计算一个新的resnet def recurrence2(wrlt, h_pre1): # ResNet更新 hl_t = relu(T.dot(wrlt, h_pre1.T).T + h_pre1) # shape=(batch_size, d) return hl_t hls, _ = theano.scan(fn=recurrence2, sequences=wrl, outputs_info=c1, n_steps=lay, truncate_gradient=-1) # 得分3 uij_l = T.sum(hls[-1] * (cpqs[0] - cpqs[1]), axis=1) # shape=(n, ) # ============================================================================================================== # 总的得分 loss = T.log(sigmoid(uij_x + uij_c + uij_l)) # shape=(n,) # loss *= mask # 只在损失这里乘一下0/1向量就可以了 # ---------------------------------------------------------------------------- # cost, gradients, learning rate, L2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] l2_sqr = (T.sum([ T.sum(par**2) for par in [urxs, xpqs, cpts, cpqs, wru, wrc, wrl, wa1, wa2, wa3, wb1, wb2] ])) upq = T.sum(loss) / actual_batch_size costs = (-upq + 0.5 * l2 * l2_sqr) # self.params grads = T.grad(costs, self.params) updates = [(par, par - lr * gra) for par, gra in zip(self.params, grads)] # 1个user,2个items,这种更新求导是最快的。直接对sub求导,并非对par求导。 subs_pars_idxs = [[urxs, self.ux, uidxs], [xpqs, self.lx, pqidxs], [uiq_c, self.lc, uiq_cps]] tmp = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idx])) for sub, par, idx in subs_pars_idxs] updates.extend(tmp) # ---------------------------------------------------------------------------- # 输入用户、正负样本及其它参数后,更新变量,返回损失。 self.train = theano.function(inputs=[uidxs, pqidxs, cidxs, mask], outputs=-upq, updates=updates, on_unused_input='warning')
def __theano_train__(self, n_in, n_hidden): """ 训练阶段跑一遍训练序列 """ uidx = T.iscalar() msk = T.imatrix() dist_pos = T.fmatrix() dist_neg = T.fmatrix() seq_n, seq_len = msk.shape # 315 x 315 tu = self.t[uidx] # (20, ) xpidxs = self.tra_buys_masks[uidx] # (1264, ) xqidxs = self.tra_buys_neg_masks[uidx] # (1264, ) gps = self.g[xpidxs[:seq_len]] # (315, 20) hps, hqs = self.h[xpidxs[1:seq_len + 1]], self.h[xqidxs[1:seq_len + 1]] # (315, 20) zps, zqs = self.z[xpidxs[1:seq_len + 1]], self.z[xqidxs[1:seq_len + 1]] guiq_pqs = Unique(False, False, False)(xpidxs) uiq_g = self.g[guiq_pqs] pqs = T.concatenate((xpidxs, xqidxs)) uiq_pqs = Unique(False, False, False)(pqs) uiq_h = self.h[uiq_pqs] uiq_z = self.z[uiq_pqs] t_z = T.sum(tu * zps, 1) # (315, ) n_h = T.sum(msk, 1) # (315, ) expand_g = gps.reshape((1, seq_len, n_hidden)) * msk.reshape( (seq_n, seq_len, 1)) # (315, 315, 20) sp = T.sum( T.sum(expand_g * hps.reshape( (seq_n, 1, n_hidden)), 2) * self.f_d(dist_pos), 1 ) / n_h + t_z # [(315, 315) * (315, 315)] -> (315, ) / (315, ) + (315, ) sq = T.sum( T.sum(expand_g * hqs.reshape( (seq_n, 1, n_hidden)), 2) * self.f_d(dist_neg), 1) / n_h + t_z # sp = T.sum(T.sum(expand_g * hps.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z # sq = T.sum(T.sum(expand_g * hqs.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z loss = T.sum(T.log(sigmoid(sp - sq))) # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par**2) for par in [gps, hps, hqs, zps, zqs]]) seq_costs = (-loss + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] update_g = T.set_subtensor( uiq_g, uiq_g - lr * T.grad(seq_costs, self.g)[guiq_pqs]) update_h = T.set_subtensor( uiq_h, uiq_h - lr * T.grad(seq_costs, self.h)[uiq_pqs]) update_t = T.set_subtensor(tu, tu - lr * T.grad(seq_costs, self.t)[uidx]) update_z = T.set_subtensor( uiq_z, uiq_z - lr * T.grad(seq_costs, self.z)[uiq_pqs]) seq_updates.append((self.g, update_g)) seq_updates.append((self.h, update_h)) seq_updates.append((self.t, update_t)) seq_updates.append((self.z, update_z)) # ---------------------------------------------------------------------------- # 输入正、负样本序列及其它参数后,更新变量,返回损失。 self.seq_train = theano.function( inputs=[uidx, dist_pos, dist_neg, msk], outputs=loss, updates=seq_updates)