示例#1
0
 def setUp(self):
     super(test_Unique, self).setUp()
     self.op_class = Unique
     self.ops = [Unique(),
                 Unique(True),
                 Unique(False, True),
                 Unique(True, True),
                 Unique(False, False, True),
                 Unique(True, False, True),
                 Unique(False, True, True),
                 Unique(True, True, True)]
示例#2
0
 def setUp(self):
     super(test_Unique, self).setUp()
     self.op_class = Unique
     self.ops = [Unique(), 
                 Unique(True), 
                 Unique(False, True), 
                 Unique(True, True)]
     if bool(numpy_ver >= [1, 9]) :
         self.ops.extend([
                     Unique(False, False, True), 
                     Unique(True, False, True), 
                     Unique(False, True, True), 
                     Unique(True, True, True)])
示例#3
0
 def setup_method(self):
     super().setup_method()
     self.op_class = Unique
     self.ops = [
         Unique(),
         Unique(True),
         Unique(False, True),
         Unique(True, True),
         Unique(False, False, True),
         Unique(True, False, True),
         Unique(False, True, True),
         Unique(True, True, True),
     ]
示例#4
0
    def __theano_train__(self):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        uidxs = T.ivector()
        pq_idxs_t = T.imatrix()  # 0行:正样本s,1行:负样本s。
        mask_t = T.ivector()
        usrs = self.ui[uidxs]  # shape=(n, 20)
        xpqs = self.lt[pq_idxs_t]  # shape=(2, n, 20)

        uiq_pqs = Unique(False, False, False)(pq_idxs_t)  # 直接去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        """
        输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            uij  = user * (xp - xq)
            upq = log(sigmoid(uij))
        """
        upq_t = T.sum(usrs * (xpqs[0] - xpqs[1]), axis=1)
        loss_t = T.log(sigmoid(upq_t))  # shape=(n, )
        loss_t *= mask_t  # 只在损失这里乘一下0/1向量就可以了

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, L2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        bpr_l2_sqr = (T.sum([T.sum(par**2) for par in [usrs, xpqs]]))
        upq = T.sum(loss_t)
        costs = (
            -upq +  # 这里不需要除以batch_size,跟RNN不一样。
            0.5 * l2 * bpr_l2_sqr)
        # 这样速度很快,但self.lt的计算并不准确,因为xpqs里可能有重复。
        # n个user,2n个items,这种更新求导是最快的。直接对sub求导,并非对par求导。
        # pars_subs = [(self.ui, usrs), (self.lt, xpqs)]      # 不知道行不行,这里用xpqs……
        # seq_updates = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, sub)))
        #                for par, sub in pars_subs]
        # 以下是准确写法,但比one_by_one要慢。
        pars_subs = [(self.ui, usrs)]
        seq_updates = [(par, T.set_subtensor(sub,
                                             sub - lr * T.grad(costs, sub)))
                       for par, sub in pars_subs]
        pars_subs = [(self.lt, uiq_x, uiq_pqs)]
        seq_updates.extend([
            (par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idxs])
             )  # 但这里太耗时。
            for par, sub, idxs in pars_subs
        ])
        # ----------------------------------------------------------------------------

        # 输入用户、正负样本及其它参数后,更新变量,返回损失。
        self.bpr_train = theano.function(inputs=[uidxs, pq_idxs_t, mask_t],
                                         outputs=-upq,
                                         updates=seq_updates)
    def __theano_train__(self):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        pidxs_t, qidxs_t = T.ivector(), T.ivector()
        mask_t, uidxs = T.ivector(), T.ivector()
        users = self.ux[uidxs]  # shape=(n, 20)
        xps = self.lt[pidxs_t]  # shape=(n, 20)
        xqs = self.lt[qidxs_t]

        pqs = T.concatenate((pidxs_t, qidxs_t))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        """
        输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            uij  = user * (xp - xq)
            upq = log(sigmoid(uij))
        """
        upq_t = T.sum(users * (xps - xqs), axis=1)
        loss_t = T.log(sigmoid(upq_t))  # shape=(n, )
        loss_t *= mask_t  # 只在损失这里乘一下0/1向量就可以了

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, L2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        bpr_l2_sqr = (T.sum([T.sum(par**2) for par in [users, xps, xqs]]))
        upq = T.sum(loss_t)
        costs = (-upq + 0.5 * l2 * bpr_l2_sqr)
        pars_subs = [(self.ux, users, uidxs), (self.lt, uiq_x, uiq_pqs)]
        bpr_updates = [(par,
                        T.set_subtensor(sub,
                                        sub - lr * T.grad(costs, par)[idxs]))
                       for par, sub, idxs in pars_subs]
        # ----------------------------------------------------------------------------

        # 输入用户、正负样本及其它参数后,更新变量,返回损失。
        self.bpr_train = theano.function(
            inputs=[pidxs_t, qidxs_t, mask_t, uidxs],
            outputs=-upq,
            updates=bpr_updates)
示例#6
0
    def __theano_trainx__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda', 'fea_random_zero']
        uix, whx = self.uix, self.whx

        tra_mask = T.imatrix()  # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask,
                                 axis=1))  # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T  # shape=(157, n)

        c0x = T.alloc(self.c0x, actual_batch_size, n_hidden)  # shape=(n, 20)
        h0x = T.alloc(self.h0x, actual_batch_size, n_hidden)  # shape=(n, 40)
        bix = T.alloc(self.bix, actual_batch_size, 4,
                      n_hidden)  # shape=(n, 3, 40), n_hidden放在最后
        bix = bix.dimshuffle(1, 2, 0)  # shape=(3, 40, n)

        # 输入端:只输入购买的商品即可。
        pidxs, qidxs = T.imatrix(), T.imatrix()  # TensorType(int32, matrix)
        ixps = self.lt[pidxs]  # shape((actual_batch_size, seq_length, n_in))
        ixps = ixps.dimshuffle(1, 0, 2)  # shape=(seq_length, batch_size, n_in)

        uiq_ps = Unique(False, False, False)(pidxs)  # 再去重
        uiq_ix = self.lt[uiq_ps]

        # 输出端:h*w 得到score
        yxps, yxqs = self.vyx[pidxs], self.vyx[qidxs]
        yxps, yxqs = yxps.dimshuffle(1, 0, 2), yxqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_yx = self.vyx[uiq_pqs]
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(ixp_t, yxp_t, yxq_t, mask_t, cx_t_pre1, hx_t_pre1):
            # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
            gatesx = T.dot(uix, ixp_t.T) + T.dot(
                whx, hx_t_pre1.T) + bix  # shape=(4, 20, n)
            ix, fx, gx, ox = sigmoid(gatesx[0]).T, sigmoid(gatesx[1]).T, tanh(
                gatesx[2]).T, sigmoid(gatesx[3]).T
            cx_t = fx * cx_t_pre1 + ix * gx  # shape=(n, 20)
            hx_t = ox * tanh(cx_t)  # shape=(n, 20)
            # 偏好误差
            upq_t = T.sum(hx_t_pre1 * (yxp_t - yxq_t), axis=1)  # shape=(n, )
            loss_t = T.log(sigmoid(upq_t))  # shape=(n, )
            loss_t *= mask_t  # 只在损失这里乘一下0/1向量就可以了
            return [cx_t, hx_t, loss_t]  # shape=(n, 20), (n, )

        [cx, hx, loss], _ = theano.scan(fn=recurrence,
                                        sequences=[ixps, yxps, yxqs, mask],
                                        outputs_info=[c0x, h0x, None],
                                        n_steps=seq_length)  # 保证只循环到最长有效位

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = (
            T.sum([T.sum(par**2) for par in [uix, whx, yxps, yxqs, ixps]]) +
            T.sum([T.sum(par**2) for par in [bix]]) / actual_batch_size)
        upq = T.sum(loss)
        seq_costs = (-upq / actual_batch_size + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.paramsx)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.paramsx, seq_grads)]
        update_ix = T.set_subtensor(
            uiq_ix, uiq_ix - lr * T.grad(seq_costs, self.lt)[uiq_ps])
        update_yx = T.set_subtensor(
            uiq_yx, uiq_yx - lr * T.grad(seq_costs, self.vyx)[uiq_pqs])
        seq_updates.append((self.lt, update_ix))
        seq_updates.append((self.vyx, update_yx))  # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        # givens给数据
        start_end = T.ivector()
        self.seq_trainx = theano.function(
            inputs=[start_end],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.
                tra_buys_masks[start_end],  # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[
                    start_end],  # T.ivector()类型是 TensorType(int32, vector)
                tra_mask: self.tra_masks[start_end]
            })
示例#7
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        ui, wh = self.ui, self.wh
        bi = self.bi

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)                # 有效长度

        pidxs, qidxs = T.ivector(), T.ivector()
        xps, xqs = self.lt[pidxs], self.lt[qidxs]   # shape((seq_length, n_in))

        pqs = T.concatenate((pidxs, qidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]                    # 相应的items特征

        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((n, n), (n, ))得到shape=(n, ),且是矩阵(n, n)每行与(n, )相乘
            # RNN
            h = sigmoid(ux * xp + wh * h_pre1)
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            upq  = h_pre1 * (xp - xq)               # 注意:这里是用上个隐层、当前正负样本计算偏好,不是当前隐层
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(xp_t, xq_t, h_t_pre1):
            h_t = sigmoid(T.dot(ui, xp_t) +
                          T.dot(wh, h_t_pre1) + bi)     # shape=(n, ), 计算当前隐层值
            # 改成 h(t) * (xp(t+1) - xq(T+1))也是可以的。
            upq_t = T.dot(h_t_pre1, xp_t - xq_t)         # 注意: 基于上次的隐层值,h(t-1)*(xp(t)-xq(t))
            loss_t = T.log(sigmoid(upq_t))              # 注意:log(x)它是以math.e为底的。
            return [h_t, loss_t]
        [h, loss], _ = theano.scan(
            fn=recurrence,
            sequences=[xps, xqs],
            outputs_info=[self.h0, None],
            n_steps=seq_length,
            truncate_gradient=-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, ui, wh, bi]])
        upq = T.sum(loss)
        seq_costs = (
            - upq +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        seq_updates.append((self.lt, update_x))     # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()                              # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.tra_buys_masks[uidx],       # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx]})
示例#8
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        ui, wh = self.ui, self.wh

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)                # 有效长度

        h0 = self.h0
        bi = self.bi

        pidxs, qidxs = T.ivector(), T.ivector()
        xps, xqs = self.lt[pidxs], self.lt[qidxs]   # shape((seq_length, n_in))

        pqs = T.concatenate((pidxs, qidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]                    # 相应的items特征

        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(xp_t, xq_t, h_t_pre1):
            z_r = sigmoid(T.dot(ui[:2], xp_t) +
                          T.dot(wh[:2], h_t_pre1) + bi[:2])
            z, r = z_r[0], z_r[1]
            c = tanh(T.dot(ui[2], xp_t) +
                     T.dot(wh[2], (r * h_t_pre1)) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c
            upq_t = T.dot(h_t_pre1, xp_t - xq_t)
            loss_t = T.log(sigmoid(upq_t))
            return [h_t, loss_t]
        [h, loss], _ = theano.scan(
            fn=recurrence,
            sequences=[xps, xqs],
            outputs_info=[h0, None],
            n_steps=seq_length,
            truncate_gradient=-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, ui, wh, bi]])
        upq = T.sum(loss)
        seq_costs = (
            - upq +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        seq_updates.append((self.lt, update_x))     # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()                              # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.tra_buys_masks[uidx],       # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx]})
示例#9
0
    def __theano_train2__(self):
        """
        训练阶段跑一遍训练序列
        """
        ui, wh = self.ui, self.wh
        vs, bs = self.vs, self.bs
        h0, bi = self.h0, self.bi
        wd = self.wd

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)  # 有效长度

        xpidxs, xqidxs = T.ivector(), T.ivector()
        dpidxs, dqidxs = T.ivector(), T.ivector()

        xps = self.lt[xpidxs]  # shape=(seq_length, n_in)
        xqs = self.lt[xqidxs]
        dps = self.di[dpidxs]
        ps = T.concatenate((xps, dps), axis=1)

        pqs = T.concatenate((xpidxs, xqidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        uiq_ds = Unique(False, False, False)(dpidxs)
        uiq_d = self.di[uiq_ds]

        def recurrence(p_t, xp_t1, xq_t1, dp_t1, dq_t1, h_t_pre1):
            # 隐层
            z_r = sigmoid(
                T.dot(ui[:2], p_t) + T.dot(wh[:2], h_t_pre1) + bi[:2])
            z, r = z_r[0], z_r[1]
            c = tanh(T.dot(ui[2], p_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c
            # 下一时刻各间隔区间的概率
            s_t = softrelu(T.dot(vs, h_t) + bs)  # shape=(381, )
            # loss. 使用下一时刻的样本。
            upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * (s_t[dp_t1] - s_t[dq_t1])

            loss_t_bpr = log(sigmoid(upq_t))
            return [h_t, loss_t_bpr]

        [h, loss_bpr], _ = theano.scan(
            fn=recurrence,
            sequences=[ps, xps[1:], xqs[1:], dpidxs[1:], dqidxs[1:]],
            outputs_info=[h0, None],
            n_steps=seq_length - 1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        # ad = self.adam_default
        seq_l2_sq = T.sum(
            [T.sum(par**2) for par in [xps, xqs, ui, wh, bi, dps, vs, bs, wd]])

        # TODO
        # 获得用户index为uidx的置信矩阵行shape(1, item_num)
        c_u = T.dvector()
        # 现在需要取出c_ul,即只需要得到参与训练的这些地点的置信值即可,所有访问过的地点(带mask的)xpidxs
        # c_ul = c_u[xpidxs]报错超出维度, xpidxs含有mask,但是遍历ivector

        # 想法是计算得到的loss_bpr shape(seq_length-1),只需要在loss前面乘对应的置信矩阵即可
        # cul * loss_bpr_l最后同样-T.sum得到bpr loss的结果

        c_ul = c_u[xpidxs]
        bpr = c_ul * -T.sum(loss_bpr)

        seq_costs = (bpr + 0.5 * l2 * seq_l2_sq)
        # seq_updates = self.adam(seq_costs, self.params+[self.lt, self.di], lr, ad[0], ad[1], ad[2], ad[3])
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        update_d = T.set_subtensor(
            uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        seq_updates.append((self.di, update_d))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
        self.aux_seq_train = theano.function(
            inputs=[uidx],
            outputs=bpr,
            updates=seq_updates,
            givens={
                xpidxs:
                self.tra_buys_masks[uidx],  # 类型是 TensorType(int32, matrix)
                xqidxs: self.tra_buys_neg_masks[uidx],  # negtive poi
                dpidxs: self.tra_dist_masks[uidx],  # 别名表示的两地之间的距离
                dqidxs: self.tra_dist_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx],
                c_u: self.confidence_matrix[uidx]
            })
示例#10
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        ui, wh = self.ui, self.wh
        vs, bs = self.vs, self.bs
        dd = self.dd

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)  # 有效长度

        h0 = self.h0
        bi = self.bi

        pidxs = T.ivector()
        qidxs = T.ivector()
        didxs = T.ivector()
        xps = self.lt[pidxs]  # shape=(seq_length, n_in)
        xqs = self.lt[qidxs]
        xds = self.di[didxs]
        xs = T.concatenate((xps, xds), axis=1)

        pqs = T.concatenate((pidxs, qidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        uiq_ds = Unique(False, False, False)(didxs)
        uiq_d = self.di[uiq_ds]

        wd = self.wd
        ls = softmax(self.loss_weight)
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(x_t, xp_t1, xq_t1, d_t1, h_t_pre1):
            # 隐层
            z_r = sigmoid(
                T.dot(ui[:2], x_t) + T.dot(wh[:2], h_t_pre1) + bi[:2])
            z, r = z_r[0], z_r[1]
            c = tanh(T.dot(ui[2], x_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c
            # 下一时刻各间隔区间的概率
            s_t = softmax(T.dot(vs, h_t) + bs)  # shape=(381, )
            # loss. 使用下一时刻的样本。
            upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * s_t[d_t1]  # 上次的
            loss_t_bpr = T.log(sigmoid(upq_t))
            loss_t_sur = T.sum(s_t[:d_t1 + 1]) * dd - T.log(s_t[d_t1])
            # s_t[:d_t + 1]:从0区间到该距离间隔区间,所有区间概率的和。
            return [h_t, loss_t_sur, loss_t_bpr]

        [h, loss_sur, loss_bpr
         ], _ = theano.scan(fn=recurrence,
                            sequences=[xs, xps[1:], xqs[1:], didxs[1:]],
                            outputs_info=[h0, None, None],
                            n_steps=seq_length - 1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([
            T.sum(par**2)
            for par in [xps, xqs, ui, wh, bi, xds, vs, bs, wd, ls]
        ])
        sur = T.sum(loss_sur)
        upq = -T.sum(loss_bpr)
        los = ls[0] * sur + ls[1] * upq
        seq_costs = (los + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        update_d = T.set_subtensor(
            uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        seq_updates.append((self.di, update_d))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=[los, sur, upq, ls],
            updates=seq_updates,
            givens={
                pidxs:
                self.tra_buys_masks[uidx],  # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[uidx],  # negtive poi
                didxs: self.tra_dist_masks[uidx],  # 别名表示的两地之间的距离
                tra_mask: self.tra_masks[uidx]
            })
示例#11
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        ui, wh = self.ui, self.wh
        vs, bs = self.vs, self.bs
        h0, bi = self.h0, self.bi
        wd = self.wd
        av, ar, ae = self.av, self.ar, self.ae

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)  # 有效长度

        xpidxs, xqidxs = T.ivector(), T.ivector()
        dpidxs, dqidxs = T.ivector(), T.ivector()
        xps = self.lt[xpidxs]  # shape=(seq_length, n_in)
        xqs = self.lt[xqidxs]
        dps = self.di[dpidxs]
        ps = T.concatenate((xps, dps), axis=1)

        pqs = T.concatenate((xpidxs, xqidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        uiq_ds = Unique(False, False, False)(dpidxs)
        uiq_d = self.di[uiq_ds]
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(p_t, xp_t1, xq_t1, dp_t1, dq_t1, h_t_pre1):
            # 隐层
            z_r = sigmoid(
                T.dot(ui[:2], p_t) + T.dot(wh[:2], h_t_pre1) + bi[:2])
            z, r = z_r[0], z_r[1]
            c = tanh(T.dot(ui[2], p_t) + T.dot(wh[2], (r * h_t_pre1)) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c
            # 下一时刻各间隔区间的概率
            s_t = softrelu(T.dot(vs, h_t) + bs)  # shape=(381, )
            # loss. 使用下一时刻的样本。
            # upq_t = T.dot(h_t, xp_t1 - xq_t1) + wd * (s_t[dp_t1] - s_t[dq_t1])

            # 试试以非线性方式组合两个preferences。
            upq_t = T.dot(
                av,
                tanh(ar * T.dot(h_t, xp_t1) + ae * s_t[dp_t1]) -
                tanh(ar * T.dot(h_t, xq_t1) + ae * s_t[dq_t1]))

            loss_t_bpr = log(sigmoid(upq_t))
            return [h_t, loss_t_bpr]

        [h, loss_bpr], _ = theano.scan(
            fn=recurrence,
            sequences=[ps, xps[1:], xqs[1:], dpidxs[1:], dqidxs[1:]],
            outputs_info=[h0, None],
            n_steps=seq_length - 1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        # ad = self.adam_default
        seq_l2_sq = T.sum([
            T.sum(par**2)
            for par in [xps, xqs, ui, wh, bi, dps, vs, bs, wd, av, ar, ae]
        ])
        bpr = -T.sum(loss_bpr)
        seq_costs = (bpr + 0.5 * l2 * seq_l2_sq)
        # seq_updates = self.adam(seq_costs, self.params+[self.lt, self.di], lr, ad[0], ad[1], ad[2], ad[3])
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        update_d = T.set_subtensor(
            uiq_d, uiq_d - lr * T.grad(seq_costs, self.di)[uiq_ds])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        seq_updates.append((self.di, update_d))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=bpr,
            updates=seq_updates,
            givens={
                xpidxs:
                self.tra_buys_masks[uidx],  # 类型是 TensorType(int32, matrix)
                xqidxs: self.tra_buys_neg_masks[uidx],  # negtive poi
                dpidxs: self.tra_dist_masks[uidx],  # 别名表示的两地之间的距离
                dqidxs: self.tra_dist_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx]
            })
示例#12
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        M = self.M

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)  # 有效长度

        h0 = self.h0

        xpidxs = T.ivector()
        xqidxs = T.ivector()
        dpidxs = T.ivector()
        dqidxs = T.ivector()
        xps = self.lt[xpidxs]    # shape=(seq_length, n_in)
        xqs = self.lt[xqidxs]
        wdps = self.wd[dpidxs]
        wdqs = self.wd[dqidxs]

        pqs = T.concatenate((xpidxs, xqidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]                    # 相应的items特征

        dpqs = T.concatenate((dpidxs, dqidxs))         # 先拼接
        uiq_dpqs = Unique(False, False, False)(dpqs)  # 再去重
        uiq_d = self.wd[uiq_dpqs]                    # 相应的items特征

        def recurrence(x_t, xp_t1, xq_t1, wd_t, wdp_t1, wdq_t1,
                       h_t_pre1):
            # 隐层
            h_t = sigmoid(T.dot(M, x_t) + T.dot(wd_t, h_t_pre1))
            yp = T.dot(T.dot(wdp_t1, h_t), T.dot(M, xp_t1).T)
            yq = T.dot(T.dot(wdq_t1, h_t), T.dot(M, xq_t1).T)
            loss_t_bpr = T.log(sigmoid(yp - yq))

            return [h_t, loss_t_bpr]

        [h, loss_bpr], _ = theano.scan(
            fn=recurrence,
            sequences=[xps, xps[1:], xqs[1:], wdps, wdps[1:], wdqs[1:]],
            outputs_info=[h0, None],
            n_steps=seq_length-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([T.sum(par ** 2) for par in [xps, xqs, M, wdps, wdqs]])
        los = - T.sum(loss_bpr)
        seq_costs = (
            los +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        update_d = T.set_subtensor(uiq_d, uiq_d - lr * T.grad(seq_costs, self.wd)[uiq_dpqs])
        seq_updates.append((self.lt, update_x))     # 会直接更改到seq_updates里
        seq_updates.append((self.wd, update_d))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=los,
            updates=seq_updates,
            givens={
                xpidxs: self.tra_buys_masks[uidx],  # 类型是 TensorType(int32, matrix)
                xqidxs: self.tra_buys_neg_masks[uidx],  # negtive poi
                dpidxs: self.tra_dist_masks[uidx],  # 别名表示的两地之间的距离
                dqidxs: self.tra_dist_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx]})
示例#13
0
    def __theano_train__(self, n_in, n_hidden, n_img, n_txt):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda', 'lambda_ev', 'lambda_ae', 'fea_random_zero']
        ui, wh = self.ui, self.wh
        ei, vt = self.ei, self.vt

        tra_mask = T.imatrix()  # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask,
                                 axis=1))  # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T  # shape=(157, n)

        h0 = T.alloc(self.h0, actual_batch_size, n_hidden)  # shape=(n, 40)
        bi = T.alloc(self.bi, actual_batch_size, 3,
                     n_hidden)  # shape=(n, 3, 40), n_hidden放在最后
        bi = bi.dimshuffle(1, 2, 0)  # shape=(3, 40, n)

        pidxs, qidxs = T.imatrix(), T.imatrix()  # TensorType(int32, matrix)
        xps, xqs = self.lt[pidxs], self.lt[
            qidxs]  # shape((actual_batch_size, seq_length, n_in))
        ips, iqs = self.fi[pidxs], self.fi[
            qidxs]  # shape((actual_batch_size, seq_length, n_img))
        tps, tqs = self.ft[pidxs], self.ft[
            qidxs]  # shape((actual_batch_size, seq_length, n_txt))
        xps, xqs = xps.dimshuffle(1, 0, 2), xqs.dimshuffle(
            1, 0, 2)  # shape=(seq_len, batch_size, n_in)
        ips, iqs = ips.dimshuffle(1, 0, 2), iqs.dimshuffle(1, 0, 2)
        tps, tqs = tps.dimshuffle(1, 0, 2), tqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        zero = self.alpha_lambda[4]
        if 0.0 == zero:  # 用完全数据

            def recurrence(xp_t, xq_t, ip_t, iq_t, tp_t, tq_t, mask_t,
                           h_t_pre1):
                # item表达
                mp_t = T.dot(ip_t, ei.T) + T.dot(tp_t, vt.T)  # shape=(n, 20)
                mq_t = T.dot(iq_t, ei.T) + T.dot(tq_t, vt.T)
                p_t = T.concatenate((xp_t, mp_t), axis=1)  # shape=(n, 40)
                q_t = T.concatenate((xq_t, mq_t), axis=1)
                # 隐层计算
                z_r = sigmoid(
                    T.dot(ui[:2], p_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2])
                z, r = z_r[0].T, z_r[1].T  # shape=(n, 40)
                c = tanh(
                    T.dot(ui[2], p_t.T) + T.dot(wh[2], (r * h_t_pre1).T) +
                    bi[2])
                h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T
                # 偏好误差
                upq_t = T.sum(h_t_pre1 * (p_t - q_t), axis=1)  # shape=(n, )
                loss_t = T.log(sigmoid(upq_t))  # shape=(n, )
                loss_t *= mask_t
                # 重构误差
                loss_ae_t_i = (
                    T.sum((ip_t - T.dot(mp_t, ei))**2) + T.sum(
                        (iq_t - T.dot(mq_t, ei))**2)
                )  # T.sum(shape=(n, 1024), axis=1), 最后shape=(n,)
                loss_ae_t_t = (T.sum((tp_t - T.dot(mp_t, vt))**2) + T.sum(
                    (tq_t - T.dot(mq_t, vt))**2))
                loss_ae_t_i *= mask_t
                loss_ae_t_t *= mask_t
                return [h_t, loss_t, loss_ae_t_i,
                        loss_ae_t_t]  # shape=(n, 20), (n, ), (n, )

            [h, loss, loss_ae_i, loss_ae_t
             ], _ = theano.scan(fn=recurrence,
                                sequences=[xps, xqs, ips, iqs, tps, tqs, mask],
                                outputs_info=[h0, None, None, None],
                                n_steps=seq_length,
                                truncate_gradient=-1)
        else:
            # 每条序列训练前都随机whole feature corrupted
            ipsc = self.get_corrupted_input_whole_minibatch(ips, zero)
            iqsc = self.get_corrupted_input_whole_minibatch(iqs, zero)
            tpsc = self.get_corrupted_input_whole_minibatch(tps, zero)
            tqsc = self.get_corrupted_input_whole_minibatch(tqs, zero)

            def recurrence(xp_t, xq_t, ip_t, iq_t, tp_t, tq_t, ipc_t, iqc_t,
                           tpc_t, tqc_t, mask_t, h_t_pre1):
                # item表达
                mp_t = T.dot(ipc_t, ei.T) + T.dot(tpc_t, vt.T)  # shape=(n, 20)
                mq_t = T.dot(iqc_t, ei.T) + T.dot(tqc_t, vt.T)
                p_t = T.concatenate((xp_t, mp_t), axis=1)  # shape=(n, 40)
                q_t = T.concatenate((xq_t, mq_t), axis=1)
                # 隐层计算
                z_r = sigmoid(
                    T.dot(ui[:2], p_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2])
                z, r = z_r[0].T, z_r[1].T  # shape=(n, 40)
                c = tanh(
                    T.dot(ui[2], p_t.T) + T.dot(wh[2], (r * h_t_pre1).T) +
                    bi[2])
                h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T
                # 偏好误差
                upq_t = T.sum(h_t_pre1 * (p_t - q_t), axis=1)  # shape=(n, )
                loss_t = T.log(sigmoid(upq_t))  # shape=(n, )
                loss_t *= mask_t
                # 重构误差
                loss_ae_t_i = (
                    T.sum((ip_t - T.dot(mp_t, ei))**2) + T.sum(
                        (iq_t - T.dot(mq_t, ei))**2)
                )  # T.sum(shape=(n, 1024), axis=1), 最后shape=(n,)
                loss_ae_t_t = (T.sum((tp_t - T.dot(mp_t, vt))**2) + T.sum(
                    (tq_t - T.dot(mq_t, vt))**2))
                loss_ae_t_i *= mask_t
                loss_ae_t_t *= mask_t
                return [h_t, loss_t, loss_ae_t_i,
                        loss_ae_t_t]  # shape=(n, 20), (n, ), (n, )

            [h, loss, loss_ae_i,
             loss_ae_t], _ = theano.scan(fn=recurrence,
                                         sequences=[
                                             xps, xqs, ips, iqs, tps, tqs,
                                             ipsc, iqsc, tpsc, tqsc, mask
                                         ],
                                         outputs_info=[h0, None, None, None],
                                         n_steps=seq_length,
                                         truncate_gradient=-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        l2_ev = self.alpha_lambda[2]
        l2_ae = self.alpha_lambda[3]
        seq_l2_sq = (T.sum([T.sum(par**2) for par in [xps, xqs, ui, wh]]) +
                     T.sum([T.sum(par**2)
                            for par in [bi]]) / actual_batch_size)
        seq_l2_ev = (T.sum([T.sum(par**2) for par in [ei, vt]]))
        upq = T.sum(loss)
        ae = (0.5 * l2_ae * T.sum(loss_ae_i) / n_img +
              0.5 * l2_ae * T.sum(loss_ae_t) / n_txt)
        seq_costs = ((-upq + ae) / actual_batch_size + 0.5 * l2 * seq_l2_sq +
                     0.5 * l2_ev * seq_l2_ev)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        # givens给数据
        start_end = T.ivector()
        self.seq_train = theano.function(
            inputs=[start_end],
            outputs=-upq + ae,
            updates=seq_updates,
            givens={
                pidxs: self.
                tra_buys_masks[start_end],  # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[
                    start_end],  # T.ivector()类型是 TensorType(int32, vector)
                tra_mask: self.tra_masks[start_end]
            })
示例#14
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        ui, wh, bi = self.ui, self.wh, self.bi
        qx, rx, vc = self.qx, self.rx, self.vc
        winx = self.window_input

        tra_mask = T.ivector()
        seq_length = T.sum(tra_mask)

        pidxs, qidxs = T.ivector(), T.ivector()
        xps, xqs = self.lt[pidxs], self.lt[qidxs]  # shape((seq_length, n_in))

        pqs = T.concatenate((pidxs, qidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(xp_t, xp_t1, xq_t1, h_t_pre1, cx):
            # context_x
            # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。
            cx = T.concatenate((cx[1:], xp_t.reshape(
                (1, n_in))))  # shape=(winx, 20)
            ex = T.dot(tanh(T.dot(cx, qx)), rx)  # shape=(winx, 1)
            ax = softmax(ex.T)  # shape=(1, winx)
            xc = (T.dot(cx.T, ax.T)).reshape((n_in, ))  # shape=(20, )
            # gru_unit
            z_r = sigmoid(
                T.dot(ui[:2], xp_t) + T.dot(vc[:2], xc) +
                T.dot(wh[:2], h_t_pre1) + bi[:2])
            z, r = z_r[0], z_r[1]
            c = tanh(
                T.dot(ui[2], xp_t) + T.dot(vc[2], xc) +
                T.dot(wh[2], (r * h_t_pre1)) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c  # shape=(20, )
            # loss
            upq_t = T.dot(h_t,
                          xp_t1 - xq_t1)  # 正负样本训练。h(t) * (xp(t+1) - xq(t+1))
            loss_t = T.log(sigmoid(upq_t))
            return [h_t, cx, loss_t]

        cumx = T.alloc(self.lt[-1], winx, n_in)  # concatenate
        [_, _, loss], _ = theano.scan(  # h是h1~ht。loss是h0~ht-1和x1~xt计算得到的。
            fn=recurrence,
            sequences=[xps, xps[1:], xqs[1:]],
            outputs_info=[self.h0, cumx, None],
            n_steps=seq_length - 1,
            truncate_gradient=-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum(
            [T.sum(par**2) for par in [xps, xqs, ui, wh, bi, qx, rx, vc]])
        upq = T.sum(loss)
        seq_costs = (-upq + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
        self.seq_train = theano.function(
            inputs=[uidx],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs:
                self.tra_buys_masks[uidx],  # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[uidx],
                tra_mask: self.tra_masks[uidx]
            })
示例#15
0
    def __theano_traini__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda', 'fea_random_zero']
        uix, whx = self.uix, self.whx
        uit, wht = self.uit, self.wht
        uii, whi = self.uii, self.whi

        tra_mask = T.imatrix()                          # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask, axis=1))     # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T                               # shape=(157, n)

        h0x = T.alloc(self.h0x, actual_batch_size, n_hidden)      # shape=(n, 40)
        h0t = T.alloc(self.h0t, actual_batch_size, n_hidden)
        h0i = T.alloc(self.h0i, actual_batch_size, n_hidden)
        bix = T.alloc(self.bix, actual_batch_size, 3, n_hidden)   # shape=(n, 3, 40), n_hidden放在最后
        bit = T.alloc(self.bit, actual_batch_size, 3, n_hidden)
        bii = T.alloc(self.bii, actual_batch_size, 3, n_hidden)
        bix = bix.dimshuffle(1, 2, 0)                             # shape=(3, 40, n)
        bit = bit.dimshuffle(1, 2, 0)
        bii = bii.dimshuffle(1, 2, 0)

        # 输入端:只输入购买的商品即可。
        pidxs, qidxs = T.imatrix(), T.imatrix()     # TensorType(int32, matrix)
        ixps = self.lt[pidxs]       # shape((actual_batch_size, seq_length, n_in))
        itps = self.ft[pidxs]       # shape((actual_batch_size, seq_length, n_txt))
        iips = self.fi[pidxs]       # shape((actual_batch_size, seq_length, n_img))
        ixps = ixps.dimshuffle(1, 0, 2)               # shape=(seq_length, batch_size, n_in)
        itps = itps.dimshuffle(1, 0, 2)
        iips = iips.dimshuffle(1, 0, 2)

        # 输出端:h*w 得到score
        yxps, yxqs = self.vyx[pidxs], self.vyx[qidxs]
        ytps, ytqs = self.vyt[pidxs], self.vyt[qidxs]
        yips, yiqs = self.vyi[pidxs], self.vyi[qidxs]
        yxps, yxqs = yxps.dimshuffle(1, 0, 2), yxqs.dimshuffle(1, 0, 2)
        ytps, ytqs = ytps.dimshuffle(1, 0, 2), ytqs.dimshuffle(1, 0, 2)
        yips, yiqs = yips.dimshuffle(1, 0, 2), yiqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_yi = self.vyi[uiq_pqs]

        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(ixp_t, yxp_t, yxq_t,
                       itp_t, ytp_t, ytq_t,
                       iip_t, yip_t, yiq_t,
                       mask_t, hx_t_pre1, ht_t_pre1, hi_t_pre1):
            # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
            z_rx = sigmoid(T.dot(uix[:2], ixp_t.T) + T.dot(whx[:2], hx_t_pre1.T) + bix[:2])   # shape=(2, 20, n)
            z_rt = sigmoid(T.dot(uit[:2], itp_t.T) + T.dot(wht[:2], ht_t_pre1.T) + bit[:2])
            z_ri = sigmoid(T.dot(uii[:2], iip_t.T) + T.dot(whi[:2], hi_t_pre1.T) + bii[:2])
            zx, rx = z_rx[0].T, z_rx[1].T                           # shape=(n, 20)
            zt, rt = z_rt[0].T, z_rt[1].T
            zi, ri = z_ri[0].T, z_ri[1].T
            cx = tanh(T.dot(uix[2], ixp_t.T) + T.dot(whx[2], (rx * hx_t_pre1).T) + bix[2])    # shape=(20, n)
            ct = tanh(T.dot(uit[2], itp_t.T) + T.dot(wht[2], (rt * ht_t_pre1).T) + bit[2])
            ci = tanh(T.dot(uii[2], iip_t.T) + T.dot(whi[2], (ri * hi_t_pre1).T) + bii[2])
            hx_t = (T.ones_like(zx) - zx) * hx_t_pre1 + zx * cx.T     # shape=(n, 20)
            ht_t = (T.ones_like(zt) - zt) * ht_t_pre1 + zt * ct.T
            hi_t = (T.ones_like(zi) - zi) * hi_t_pre1 + zi * ci.T
            # 偏好误差
            upq_t = (
                T.sum(hx_t_pre1 * (yxp_t - yxq_t), axis=1) +
                T.sum(ht_t_pre1 * (ytp_t - ytq_t), axis=1) +
                T.sum(hi_t_pre1 * (yip_t - yiq_t), axis=1))     # shape=(n, )
            loss_t = T.log(sigmoid(upq_t))                      # shape=(n, )
            loss_t *= mask_t                                    # 只在损失这里乘一下0/1向量就可以了
            return [hx_t, ht_t, hi_t, loss_t]                         # shape=(n, 20), (n, )
        [hx, ht, hi, loss], _ = theano.scan(
            fn=recurrence,
            sequences=[ixps, yxps, yxqs,
                       itps, ytps, ytqs,
                       iips, yips, yiqs, mask],
            outputs_info=[h0x, h0t, h0i, None],
            n_steps=seq_length)     # 保证只循环到最长有效位

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = (
            T.sum([T.sum(par ** 2) for par in [uix, whx, yxps, yxqs, ixps,
                                               uit, wht, ytps, ytqs,
                                               uii, whi, yips, yiqs]]) +
            T.sum([T.sum(par ** 2) for par in [bix, bit, bii]]) / actual_batch_size)
        upq = T.sum(loss)
        seq_costs = (
            - upq / actual_batch_size +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.paramsi)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.paramsi, seq_grads)]
        update_yi = T.set_subtensor(uiq_yi, uiq_yi - lr * T.grad(seq_costs, self.vyi)[uiq_pqs])
        seq_updates.append((self.vyi, update_yi))   # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        # givens给数据
        start_end = T.ivector()
        self.seq_traini = theano.function(
            inputs=[start_end],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.tra_buys_masks[start_end],       # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[start_end],   # T.ivector()类型是 TensorType(int32, vector)
                tra_mask: self.tra_masks[start_end]})
示例#16
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        ui, wh, bi = self.ui, self.wh, self.bi
        qx, rx, vc = self.qx, self.rx, self.vc
        winx = self.window_input

        tra_mask = T.imatrix()  # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask,
                                 axis=1))  # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T  # shape=(157, n)

        bi = T.alloc(self.bi, actual_batch_size, 3,
                     n_hidden)  # shape=(n, 3, 20), 原维度放在后边
        bi = bi.dimshuffle(1, 2, 0)  # shape=(3, 20, n)

        pidxs, qidxs = T.imatrix(), T.imatrix()
        xps, xqs = self.lt[pidxs], self.lt[
            qidxs]  # shape((actual_batch_size, seq_length, n_in))
        xps = xps.dimshuffle(
            1, 0, 2)  # shape=(seq_length, batch_size, n_in)=(157, n, 20)
        xqs = xqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))  # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_x = self.lt[uiq_pqs]  # 相应的items特征
        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(xp_t, xp_t1, xq_t1, mask_t, h_t_pre1, cxs):
            # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
            # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1)
            # context_x
            # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。
            cxs = T.concatenate(
                (
                    cxs[:, 1:, :],  # shape=(n, winx-1, 20)
                    xp_t.dimshuffle(0, 'x', 1)),  # shape=(n, 1, 20)
                axis=1)  # shape=(n, winx, 20)
            exs = T.dot(tanh(T.dot(cxs, qx)), rx)  # shape=(n, winx, 1)
            exs = T.Rebroadcast((2, True))(exs)  # axis=2进行broadcast, 使其可被丢掉
            axs0 = softmax(exs.dimshuffle(
                0, 1))  # shape=(n, winx),降一维。因为softmax按行处理。
            axs = axs0.dimshuffle(0, 1, 'x')  # shape=(n, winx, 1), 升一维。还原回去。
            axs = T.Rebroadcast((2, True))(axs)  # axis=2进行broadcast, 使其可做乘法。
            # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1)
            xc = T.sum(cxs * axs, axis=1)  # shape=(n, 20)
            # gru unit
            z_r = sigmoid(
                T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) +
                T.dot(wh[:2], h_t_pre1.T) + bi[:2])
            z, r = z_r[0].T, z_r[1].T  # shape=(n, 20)
            c = tanh(
                T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) +
                T.dot(wh[2], (r * h_t_pre1).T) + bi[2])
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T  # shape=(n, 20)
            # loss
            upq_t = T.sum(
                h_t * (xp_t1 - xq_t1),
                axis=1)  # shape=(n, ), h(t) * (xp(t+1) - xq(t+1)), 正负样本训练。
            loss_t = T.log(sigmoid(upq_t))
            loss_t *= mask_t  # 只在损失这里乘一下0/1向量就可以了
            return [h_t, cxs, loss_t]

        batch_h0 = T.alloc(self.h0, actual_batch_size, n_hidden)
        cumx = T.alloc(self.lt[-1], actual_batch_size, winx,
                       n_in)  # concatenate
        [_, _, loss], _ = theano.scan(  # h是h1~ht。loss是h0~ht-1和x1~xt计算得到的。
            fn=recurrence,
            sequences=[xps, xps[1:], xqs[1:], mask],
            outputs_info=[batch_h0, cumx, None],
            n_steps=seq_length - 1,
            truncate_gradient=-1)

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = (
            T.sum([T.sum(par**2) for par in [xps, xqs, ui, wh, qx, rx, vc]]) +
            T.sum([T.sum(par**2) for par in [bi]]) / actual_batch_size)
        upq = T.sum(loss)
        seq_costs = (-upq / actual_batch_size + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(
            uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_pqs])
        seq_updates.append((self.lt, update_x))  # 会直接更改到seq_updates里
        # ----------------------------------------------------------------------------

        # givens给数据
        start_end = T.ivector()
        self.seq_train = theano.function(
            inputs=[start_end],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.
                tra_buys_masks[start_end],  # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[start_end],
                tra_mask: self.tra_masks[start_end]
            })
示例#17
0
    def __theano_train__(self, n_hidden):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        ui, wh = self.ui, self.wh

        tra_mask = T.imatrix()                          # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        seq_length = T.max(T.sum(tra_mask, axis=1))     # 获取mini-batch里各序列的长度最大值作为seq_length
        mask = tra_mask.T                               # shape=(157, n)

        h0 = T.alloc(self.h0, actual_batch_size, n_hidden)      # shape=(n, 20)
        bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden)   # shape=(n, 3, 20), n_hidden放在最后
        bi = bi.dimshuffle(1, 2, 0)                             # shape=(3, 20, n)

        # 输入端:只输入购买的商品即可。
        pidxs, qidxs = T.imatrix(), T.imatrix()         # TensorType(int32, matrix)
        xps = self.lt[pidxs]       # shape((actual_batch_size, seq_length, n_in))
        xps = xps.dimshuffle(1, 0, 2)     # shape=(seq_length, batch_size, n_in)

        uiq_ps = Unique(False, False, False)(pidxs)  # 再去重
        uiq_x = self.lt[uiq_ps]

        # 输出端:h*w 得到score
        yps, yqs = self.vy[pidxs], self.vy[qidxs]
        yps, yqs = yps.dimshuffle(1, 0, 2), yqs.dimshuffle(1, 0, 2)

        pqs = T.concatenate((pidxs, qidxs))         # 先拼接
        uiq_pqs = Unique(False, False, False)(pqs)  # 再去重
        uiq_y = self.vy[uiq_pqs]                    # 相应的items特征

        """
        输入t时刻正负样本、t-1时刻隐层,计算当前隐层、当前损失. 公式里省略了时刻t
        # 根据性质:T.dot((m, n), (n, ))得到shape=(m, ),且是矩阵每行与(n, )相乘
            # GRU
            z = sigmoid(ux_z * xp + wh_z * h_pre1)
            r = sigmoid(ux_r * xp + wh_r * h_pre1)
            c = tanh(ux_c * xp + wh_c * (r 点乘 h_pre1))
            h = z * h_pre1 + (1.0 - z) * c
        # 根据性质:T.dot((n, ), (n, ))得到scalar
            upq  = h_pre1 * (xp - xq)
            loss = log(1.0 + e^(-upq))
        """
        def recurrence(xp_t, yp_t, yq_t, mask_t, h_t_pre1):
            # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20)
            z_r = sigmoid(T.dot(ui[:2], xp_t.T) +
                          T.dot(wh[:2], h_t_pre1.T) + bi[:2])   # shape=(2, 20, n)
            z, r = z_r[0].T, z_r[1].T                           # shape=(n, 20)
            c = tanh(T.dot(ui[2], xp_t.T) +
                     T.dot(wh[2], (r * h_t_pre1).T) + bi[2])    # shape=(20, n)
            h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T     # shape=(n, 20)
            # 偏好误差
            upq_t = T.sum(h_t_pre1 * (yp_t - yq_t), axis=1)     # shape=(n, )
            loss_t = T.log(sigmoid(upq_t))                      # shape=(n, )
            loss_t *= mask_t                            # 只在损失这里乘一下0/1向量就可以了
            return [h_t, loss_t]                        # shape=(n, 20), (n, )
        [h, loss], _ = theano.scan(
            fn=recurrence,
            sequences=[xps, yps, yqs, mask],
            outputs_info=[h0, None],
            n_steps=seq_length)     # 保证只循环到最长有效位

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = (
            T.sum([T.sum(par ** 2) for par in [xps, ui, wh, yps, yqs]]) +
            T.sum([T.sum(par ** 2) for par in [bi]]) / actual_batch_size)
        upq = T.sum(loss)
        seq_costs = (
            - upq / actual_batch_size +
            0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)]
        update_x = T.set_subtensor(uiq_x, uiq_x - lr * T.grad(seq_costs, self.lt)[uiq_ps])
        update_y = T.set_subtensor(uiq_y, uiq_y - lr * T.grad(seq_costs, self.vy)[uiq_pqs])
        seq_updates.append((self.lt, update_x))     # 会直接更改到seq_updates里
        seq_updates.append((self.vy, update_y))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        # givens给数据
        start_end = T.ivector()     # int32
        self.seq_train = theano.function(
            inputs=[start_end],
            outputs=-upq,
            updates=seq_updates,
            givens={
                pidxs: self.tra_buys_masks[start_end],       # 类型是 TensorType(int32, matrix)
                qidxs: self.tra_buys_neg_masks[start_end],   # T.ivector()类型是 TensorType(int32, vector)
                tra_mask: self.tra_masks[start_end]})
示例#18
0
    def __theano_train__(self):
        """
        训练阶段跑一遍训练序列
        """
        # 各种usr_itm输入
        uidx, pqidx, cidx = T.iscalar(), T.ivector(), T.ivector()
        urx = self.ux[
            uidx]  # shape=(n_in, )                        # user全局变量。
        xpq = self.lx[pqidx]  # shape=(2, n_in), 这俩肯定不是同一个item。 # t+1时刻的正负样本
        cpt = self.lc[
            cidx]  # shape=(set_size, d)                   # t时刻的items输入。需要unique后再做更新。
        cpq = self.lc[
            pqidx]  # shape=(2, d)                          # t+1时刻的正负样本(该方法中两套item表达)
        # 每时刻输入一个item_set,做unique
        cpqs = T.concatenate((cidx, pqidx))  # 先拼接
        uiq_cps = Unique(False, False, False)(cpqs)  # 去重
        uiq_c = self.lc[uiq_cps]  # 相应的items特征
        # 各种权重矩阵。【注意:统一格式,权重 * 变量】
        lay = self.layer
        wru, wrc, wrl = self.wru, self.wrc, self.wrl  # resnet
        wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3  # 一阶att
        wb1, wb2 = self.wb1, self.wb2  # 二阶att
        """
        输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            uij  = user * (xp - xq)
            upq = log(sigmoid(uij))
        """
        # ==============================================================================================================
        # 得分1
        uij_x = T.dot(urx, xpq[0] - xpq[1])

        # ==============================================================================================================
        # 得分2, ResNet部分。
        # -----------------------------------------------------------
        # # check: 向量 + 矩阵, (5, ) + (3, 5) -> (3, 5)
        # rang = 0.5
        # wi = uniform(-rang, rang, (5, 5))    # d * d
        # ii = uniform(-rang, rang, (3, 5))    # 3 itm * d
        # wu = uniform(-rang, rang, (5, 5))    # d * d
        # uu = uniform(-rang, rang, (5, ))     # (d, )
        # import numpy as np
        # a = np.dot(wu, uu)          # (5, )
        # b = np.dot(ii, wi)          # (3, 5)
        # c = np.dot(wi.T, ii.T).T    # b = c
        # d = a + b   # (3, 5),a可以正常添加到b的每一行中
        # -----------------------------------------------------------
        # 得分2   # 第0层的att
        e0 = T.dot(tanh(T.dot(wa2, urx) + T.dot(cpt, wa3)), wa1)  # (size, )
        a0 = hsoftmax(e0)  # (size, )
        c0 = T.sum(cpt * a0.dimshuffle(0, 'x'), axis=0)  # (d, )

        # 得分2   # ResNet里的att
        def recurrence1(wrut, wrct, urx_pre1, cpt_pre1):
            # ResNet更新
            ur_t = relu(T.dot(wrut, urx_pre1) + urx_pre1)  # (d, )
            cp_t = relu(T.dot(cpt_pre1, wrct) + cpt_pre1)  # (size, d)
            # att计算生成上下文向量
            e_t = T.dot(tanh(T.dot(wa2, ur_t) + T.dot(cp_t, wa3)), wa1)
            a_t = hsoftmax(e_t)  # (size, )
            c_t = T.sum(cp_t * a_t.dimshuffle(0, 'x'), axis=0)  # (d, )

            return [ur_t, cp_t, c_t]

        [urs, cps, cs], _ = theano.scan(
            fn=recurrence1,
            sequences=[wru, wrc],  # bru, brc
            outputs_info=[urx, cpt, None],
            n_steps=lay,
            truncate_gradient=-1)
        # 得分2   # 二阶att
        c0 = c0.dimshuffle('x', 0)  # (1, d)
        context = T.concatenate((c0, cs), axis=0)  # shape=(layer+1, d)
        e1 = T.dot(tanh(T.dot(context, wb2)), wb1)  # shape=(layer+1, )
        a1 = hsoftmax(e1)
        c1 = T.sum(context * a1.dimshuffle(0, 'x'), axis=0)  # shape=(d, )
        # 得分2
        uij_c = T.dot(c1, cpq[0] - cpq[1])

        # ==============================================================================================================
        # 得分3   # 以resnet的输出c1重新计算一个新的resnet
        def recurrence2(wrlt, h_pre1):
            # ResNet更新
            hl_t = relu(T.dot(wrlt, h_pre1) + h_pre1)  # (d, )
            return hl_t

        hls, _ = theano.scan(fn=recurrence2,
                             sequences=wrl,
                             outputs_info=c1,
                             n_steps=lay,
                             truncate_gradient=-1)
        # 得分3
        uij_l = T.dot(hls[-1], cpq[0] - cpq[1])

        # ==============================================================================================================
        # 总的得分
        loss = T.log(sigmoid(uij_x + uij_c + uij_l))

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, L2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        l2_sqr = (T.sum([
            T.sum(par**2) for par in
            [urx, xpq, cpt, cpq, wru, wrc, wrl, wa1, wa2, wa3, wb1, wb2]
        ]))
        upq = loss
        costs = (-upq + 0.5 * l2 * l2_sqr)
        # self.params
        grads = T.grad(costs, self.params)
        updates = [(par, par - lr * gra)
                   for par, gra in zip(self.params, grads)]
        # 1个user,2个items,这种更新求导是最快的。直接对sub求导,并非对par求导。
        subs_pars_idxs = [[urx, self.ux, uidx], [xpq, self.lx, pqidx],
                          [uiq_c, self.lc, uiq_cps]]
        tmp = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idx]))
               for sub, par, idx in subs_pars_idxs]
        updates.extend(tmp)
        # ----------------------------------------------------------------------------

        # 输入用户、正负样本及其它参数后,更新变量,返回损失。
        self.train = theano.function(inputs=[uidx, pqidx, cidx],
                                     outputs=-upq,
                                     updates=updates,
                                     on_unused_input='warning')
示例#19
0
    def __theano_train__(self):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        # 各种usr_itm输入
        uidxs = T.ivector()  # n个用户
        pqidxs = T.imatrix()  # (2, n) 0行: n个正样本。1行: 负样本s。
        cidxs = T.imatrix()  # (n, set_size)
        mask = T.ivector()  # 当前时刻的mask,标明哪些用户的行为有效/无效。
        urxs = self.ux[uidxs]  # shape=(n, d)
        xpqs = self.lx[pqidxs]  # shape=(2, n, d)
        cpts = self.lc[cidxs]  # shape=(n, set_size, d)
        cpqs = self.lc[pqidxs]  # shape=(2, n, d)
        actual_batch_size = mask.shape[0]
        # 每时刻输入一个item_set,做unique
        ncpqs = T.concatenate((cidxs, pqidxs.T),
                              axis=1)  # 先拼接, shape=(n, set_size+2)
        uiq_cps = Unique(False, False, False)(ncpqs)  # 去重
        uiq_c = self.lc[uiq_cps]  # 相应的items特征
        # 各种权重矩阵。【注意:统一格式,权重 * 变量】
        lay = self.layer
        wru, wrc, wrl = self.wru, self.wrc, self.wrl  # resnet
        wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3  # 一阶att
        wb1, wb2 = self.wb1, self.wb2  # 二阶att
        """
        输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            uij  = user * (xp - xq)
            upq = log(sigmoid(uij))
        """
        # ==============================================================================================================
        # 得分1
        uij_x = T.sum(urxs * (xpqs[0] - xpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 得分2   # 第0层的att, 获得(batch_size, d)的att vector。
        urx_emb = T.dot(wa2,
                        urxs.T).T.dimshuffle(0, 'x',
                                             1)  # shape=(batch_size, 1, d)
        e0 = T.dot(tanh(urx_emb + T.dot(cpts, wa3)),
                   wa1)  # shape=(batch_size, set_size)
        a0 = softmax(e0)  # (batch_size, set_size)
        c0 = T.sum(cpts * a0.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d), broadcast

        # 得分2   # ResNet里的att
        def recurrence1(wrut, wrct, urx_pre1, cpt_pre1):
            # ResNet更新
            ur_t = relu(T.dot(wrut, urx_pre1.T).T +
                        urx_pre1)  # (batch_size, d)
            cp_t = relu(T.dot(cpt_pre1, wrct) +
                        cpt_pre1)  # (batch_size, set_size, d)
            # att计算生成上下文向量
            ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1)
            e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)),
                        wa1)  # shape=(batch_size, set_size)
            a_t = softmax(e_t)
            c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1)

            return [
                ur_t, cp_t, c_t
            ]  # (batch_size, d), (batch_size, set_size, d), (batch_size, d)

        [urs, cps, cs], _ = theano.scan(  # cs.shape = (layer, batch_size, d)
            fn=recurrence1,
            sequences=[wru, wrc],
            outputs_info=[urxs, cpts, None],
            n_steps=lay,
            truncate_gradient=-1)
        # 得分2   # 二阶att
        c0 = c0.dimshuffle(0, 'x', 1)  # (batch_size, 1, d)
        cs = cs.dimshuffle(1, 0, 2)  # (batch_size, layer, d)
        context = T.concatenate((c0, cs), axis=1)  # (batch_size, layer+1, d)
        e1 = T.dot(tanh(T.dot(context, wb2)),
                   wb1)  # shape=(batch_size, layer+1)
        a1 = softmax(e1)
        c1 = T.sum(context * a1.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d)
        # 得分2
        uij_c = T.sum(c1 * (cpqs[0] - cpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 得分3   # 以resnet的输出c1重新计算一个新的resnet
        def recurrence2(wrlt, h_pre1):
            # ResNet更新
            hl_t = relu(T.dot(wrlt, h_pre1.T).T +
                        h_pre1)  # shape=(batch_size, d)
            return hl_t

        hls, _ = theano.scan(fn=recurrence2,
                             sequences=wrl,
                             outputs_info=c1,
                             n_steps=lay,
                             truncate_gradient=-1)
        # 得分3
        uij_l = T.sum(hls[-1] * (cpqs[0] - cpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 总的得分
        loss = T.log(sigmoid(uij_x + uij_c + uij_l))  # shape=(n,)    #
        loss *= mask  # 只在损失这里乘一下0/1向量就可以了

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, L2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        l2_sqr = (T.sum([
            T.sum(par**2) for par in
            [urxs, xpqs, cpts, cpqs, wru, wrc, wrl, wa1, wa2, wa3, wb1, wb2]
        ]))
        upq = T.sum(loss) / actual_batch_size
        costs = (-upq + 0.5 * l2 * l2_sqr)
        # self.params
        grads = T.grad(costs, self.params)
        updates = [(par, par - lr * gra)
                   for par, gra in zip(self.params, grads)]
        # 1个user,2个items,这种更新求导是最快的。直接对sub求导,并非对par求导。
        subs_pars_idxs = [[urxs, self.ux, uidxs], [xpqs, self.lx, pqidxs],
                          [uiq_c, self.lc, uiq_cps]]
        tmp = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idx]))
               for sub, par, idx in subs_pars_idxs]
        updates.extend(tmp)
        # ----------------------------------------------------------------------------

        # 输入用户、正负样本及其它参数后,更新变量,返回损失。
        self.train = theano.function(inputs=[uidxs, pqidxs, cidxs, mask],
                                     outputs=-upq,
                                     updates=updates,
                                     on_unused_input='warning')
示例#20
0
    def __theano_train__(self, n_in, n_hidden):
        """
        训练阶段跑一遍训练序列
        """

        uidx = T.iscalar()
        msk = T.imatrix()
        dist_pos = T.fmatrix()
        dist_neg = T.fmatrix()

        seq_n, seq_len = msk.shape  # 315 x 315
        tu = self.t[uidx]  # (20, )
        xpidxs = self.tra_buys_masks[uidx]  # (1264, )
        xqidxs = self.tra_buys_neg_masks[uidx]  # (1264, )
        gps = self.g[xpidxs[:seq_len]]  # (315, 20)
        hps, hqs = self.h[xpidxs[1:seq_len +
                                 1]], self.h[xqidxs[1:seq_len +
                                                    1]]  # (315, 20)
        zps, zqs = self.z[xpidxs[1:seq_len + 1]], self.z[xqidxs[1:seq_len + 1]]

        guiq_pqs = Unique(False, False, False)(xpidxs)
        uiq_g = self.g[guiq_pqs]

        pqs = T.concatenate((xpidxs, xqidxs))
        uiq_pqs = Unique(False, False, False)(pqs)
        uiq_h = self.h[uiq_pqs]
        uiq_z = self.z[uiq_pqs]

        t_z = T.sum(tu * zps, 1)  # (315, )
        n_h = T.sum(msk, 1)  # (315, )
        expand_g = gps.reshape((1, seq_len, n_hidden)) * msk.reshape(
            (seq_n, seq_len, 1))  # (315, 315, 20)
        sp = T.sum(
            T.sum(expand_g * hps.reshape(
                (seq_n, 1, n_hidden)), 2) * self.f_d(dist_pos), 1
        ) / n_h + t_z  # [(315, 315) * (315, 315)] -> (315, ) / (315, ) + (315, )
        sq = T.sum(
            T.sum(expand_g * hqs.reshape(
                (seq_n, 1, n_hidden)), 2) * self.f_d(dist_neg), 1) / n_h + t_z

        # sp = T.sum(T.sum(expand_g * hps.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z
        # sq = T.sum(T.sum(expand_g * hqs.reshape((seq_n, 1, n_hidden)), 2), 1) / n_h + t_z

        loss = T.sum(T.log(sigmoid(sp - sq)))
        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, l2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        seq_l2_sq = T.sum([T.sum(par**2) for par in [gps, hps, hqs, zps, zqs]])
        seq_costs = (-loss + 0.5 * l2 * seq_l2_sq)
        seq_grads = T.grad(seq_costs, self.params)
        seq_updates = [(par, par - lr * gra)
                       for par, gra in zip(self.params, seq_grads)]
        update_g = T.set_subtensor(
            uiq_g, uiq_g - lr * T.grad(seq_costs, self.g)[guiq_pqs])
        update_h = T.set_subtensor(
            uiq_h, uiq_h - lr * T.grad(seq_costs, self.h)[uiq_pqs])
        update_t = T.set_subtensor(tu,
                                   tu - lr * T.grad(seq_costs, self.t)[uidx])
        update_z = T.set_subtensor(
            uiq_z, uiq_z - lr * T.grad(seq_costs, self.z)[uiq_pqs])
        seq_updates.append((self.g, update_g))
        seq_updates.append((self.h, update_h))
        seq_updates.append((self.t, update_t))
        seq_updates.append((self.z, update_z))
        # ----------------------------------------------------------------------------

        # 输入正、负样本序列及其它参数后,更新变量,返回损失。
        self.seq_train = theano.function(
            inputs=[uidx, dist_pos, dist_neg, msk],
            outputs=loss,
            updates=seq_updates)