def sample_sticky_only(model_matrix, sample_kwargs=None):

    # load the data
    x_sc = model_matrix['x_sc']
    subj_idx = model_matrix['subj_idx']
    y = model_matrix['y']
    n_subj = model_matrix['n_subj']

    n, d = model_matrix['x_mu_kal'].shape
    if sample_kwargs is None:
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    with pm.Model() as hier_sticky:

        mu_1 = pm.Normal('mu_beta_stick', mu=0., sd=100.)
        sigma_1 = pm.HalfCauchy('sigma_stick', beta=100)

        b_1 = pm.Normal('beta_sticky', mu=mu_1, sd=sigma_1, shape=n_subj)

        rho = tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_sc

        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_kal_scram = pm.sample(**sample_kwargs)

    return hier_sticky, trace_kal_scram
Пример #2
0
 def forward(x_t, c_tm1, s_tm1, Wxi, Wsi, Wxf, Wsf, Wxo, Wso, Wxg, Wsg,
             Wsy, bi, bf, bo, bg, by):
     i = sigmoid(T.dot(x_t, Wxi) + T.dot(s_tm1, Wsi) + bi)
     f = sigmoid(T.dot(x_t, Wxf) + T.dot(s_tm1, Wsf) + bf)
     o = sigmoid(T.dot(x_t, Wxo) + T.dot(s_tm1, Wso) + bo)
     g = tanh(T.dot(x_t, Wxg) + T.dot(s_tm1, Wsg) + bg)
     c = c_tm1 * f + g * i
     s = tanh(c) * o
     y = softmax(T.dot(s, Wsy) + by)
     return [c, s, y]
Пример #3
0
 def forward(x_t, c_tm1, s_tm1, Wx, Ws, Wy, b, by):
     preact = T.dot(x_t, Wx) + T.dot(s_tm1, Ws) + b
     i = sigmoid(_slice(preact, 0))
     f = sigmoid(_slice(preact, 1))
     o = sigmoid(_slice(preact, 2))
     g = tanh(_slice(preact, 3))
     c = c_tm1 * f + g * i
     s = tanh(c) * o
     y = softmax(T.dot(s, Wy) + by)
     return [c, s, y]
Пример #4
0
 def forward(x_t, h_tm1, Wx, Wh, bh, am, ax, ah, Wy, by):
     h_t = 1
     preact = am*T.dot(x_t,Wx)*T.dot(h_tm1,Wh) \
             +ax*T.dot(x_t,Wx) \
             +ah*T.dot(h_tm1,Wh) \
             +bh
     for i in range(self.order):
         h_t = h_t * act(_slice(preact, i))
     y_t = softmax(T.dot(h_t, Wy) + by)
     return h_t, y_t, preact
def sample_heir_rbf_kal(model_matrix, sample_kwargs=None):

    # load the data
    x_mu_rbf = model_matrix['x_mu_rbf']
    x_sd_rbf = model_matrix['x_sd_rbf']
    x_mu_kal = model_matrix['x_mu_kal']
    x_sd_kal = model_matrix['x_sd_kal']
    x_sc = model_matrix['x_sc']
    subj_idx = model_matrix['subj_idx']
    y = model_matrix['y']
    n_subj = model_matrix['n_subj']

    n, d = x_mu_rbf.shape
    if sample_kwargs is None:
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    with pm.Model() as hier_rbf_kal:

        mu_1 = pm.Normal('mu_beta_rbf_mean', mu=0., sd=100.)
        mu_2 = pm.Normal('mu_beta_rbf_stdv', mu=0., sd=100.)
        mu_3 = pm.Normal('mu_beta_kal_mean', mu=0., sd=100.)
        mu_4 = pm.Normal('mu_beta_kal_stdv', mu=0., sd=100.)
        mu_5 = pm.Normal('mu_beta_stick', mu=0., sd=100.)

        sigma_1 = pm.HalfCauchy('sigma_rbf_means', beta=100)
        sigma_2 = pm.HalfCauchy('sigma_rbf_stdev', beta=100)
        sigma_3 = pm.HalfCauchy('sigma_kal_means', beta=100)
        sigma_4 = pm.HalfCauchy('sigma_kal_stdev', beta=100)
        sigma_5 = pm.HalfCauchy('sigma_stick', beta=100)

        b_1 = pm.Normal('beta_rbf_mu', mu=mu_1, sd=sigma_1, shape=n_subj)
        b_2 = pm.Normal('beta_rbf_std', mu=mu_2, sd=sigma_2, shape=n_subj)
        b_3 = pm.Normal('beta_kal_mu', mu=mu_3, sd=sigma_3, shape=n_subj)
        b_4 = pm.Normal('beta_kal_std', mu=mu_4, sd=sigma_4, shape=n_subj)
        b_5 = pm.Normal('beta_sc', mu=mu_5, sd=sigma_5, shape=n_subj)

        rho = \
            tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_rbf + \
            tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_rbf + \
            tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_mu_kal + \
            tt.tile(tt.reshape(b_4[subj_idx], (n, 1)), d) * x_sd_kal + \
            tt.tile(tt.reshape(b_5[subj_idx], (n, 1)), d) * x_sc

        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_gprbf_kal = pm.sample(**sample_kwargs)

    return hier_rbf_kal, trace_gprbf_kal
Пример #6
0
        def recurrence1(wrut, wrct, urx_pre1, cpt_pre1):
            # ResNet更新
            ur_t = relu(T.dot(wrut, urx_pre1.T).T +
                        urx_pre1)  # (batch_size, d)
            cp_t = relu(T.dot(cpt_pre1, wrct) +
                        cpt_pre1)  # (batch_size, set_size, d)
            # att计算生成上下文向量
            ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1)
            e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)),
                        wa1)  # shape=(batch_size, set_size)
            a_t = softmax(e_t)
            c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1)

            return [
                ur_t, cp_t, c_t
            ]  # (batch_size, d), (batch_size, set_size, d), (batch_size, d)
Пример #7
0
 def forward_propagation(self, x):
     # The total number of time steps
     T = len(x)
     # During forward propagation we save all hidden states in s because need them later.
     # We add one additional element for the initial hidden, which we set to 0
     s = np.zeros((T + 1, self.hidden_dim))
     s[-1] = np.zeros(self.hidden_dim)
     # The outputs at each time step. Again, we save them for later.
     o = np.zeros((T, self.word_dim))
     # For each time step...
     for t in np.arange(T):
         # Note that we are indxing U by x[t]. This is the same as multiplying U with a one-hot vector.
         s[t] = np.tanh(self.U[:, x[t]] + self.W.dot(s[t - 1]))
         print self.V.dot(s[t])
         o[t] = softmax(self.V.dot(s[t]))
     return [o, s]
Пример #8
0
def test_softmax_optimizations():
    from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot
    x = tensor.fmatrix('x')
    one_of_n = tensor.lvector('one_of_n')
    op = crossentropy_categorical_1hot

    xe = op(x, one_of_n)

    env = theano.gof.Env([x, one_of_n], [op(softmax(x), one_of_n)])
    assert env.outputs[0].owner.op == op

    mode_with_gpu.optimizer.optimize(env)

    assert str(env.outputs[0].owner.op) == 'OutputGuard'
    assert env.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu
    assert env.outputs[0].owner.inputs[0].owner.inputs[
        0].owner.op == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias
def sample_heir_scram_kal(model_matrix, sample_kwargs=None):

    # load the data + scramble Kalman filter data
    x_mu_kal_scrambled = np.random.permutation(model_matrix['x_mu_kal'])
    x_sd_kal_scrambled = np.random.permutation(model_matrix['x_sd_kal'])
    x_sc = model_matrix['x_sc']
    subj_idx = model_matrix['subj_idx']
    y = model_matrix['y']
    n_subj = model_matrix['n_subj']

    n, d = x_mu_kal_scrambled.shape
    if sample_kwargs is None:
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    with pm.Model() as hier_kal_scrambeled:

        mu_1 = pm.Normal('mu_beta_kal_sc_mean', mu=0., sd=100.)
        mu_2 = pm.Normal('mu_beta_kal_sc_stdv', mu=0., sd=100.)
        mu_3 = pm.Normal('mu_beta_stick', mu=0., sd=100.)

        sigma_1 = pm.HalfCauchy('sigma_kal_sc_means', beta=100)
        sigma_2 = pm.HalfCauchy('sigma_kal_sc_stdev', beta=100)
        sigma_3 = pm.HalfCauchy('sigma_stick', beta=100)

        b_1 = pm.Normal('beta_kal_sc_mu', mu=mu_1, sd=sigma_1, shape=n_subj)
        b_2 = pm.Normal('beta_kal_sc_std', mu=mu_2, sd=sigma_2, shape=n_subj)
        b_3 = pm.Normal('beta_sc', mu=mu_3, sd=sigma_3, shape=n_subj)

        rho = \
            tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_kal_scrambled + \
            tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_kal_scrambled + \
            tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_sc

        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_kal_scram = pm.sample(**sample_kwargs)

    return hier_kal_scrambeled, trace_kal_scram
Пример #10
0
def test_softmax_optimizations():
    from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot
    x = tensor.fmatrix('x')
    one_of_n = tensor.lvector('one_of_n')
    op = crossentropy_categorical_1hot

    xe = op(x, one_of_n)

    env = theano.gof.Env(
        [x, one_of_n],
        [op(softmax(x), one_of_n)])
    assert env.outputs[0].owner.op == op

    mode_with_gpu.optimizer.optimize(env)

    assert str(env.outputs[0].owner.op) == 'OutputGuard'
    assert env.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu
    assert env.outputs[0].owner.inputs[0].owner.inputs[0].owner.op == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias
Пример #11
0
def test_softmax_optimizations():
    from theano.tensor.nnet.nnet import softmax, crossentropy_categorical_1hot

    x = tensor.fmatrix("x")
    one_of_n = tensor.lvector("one_of_n")
    op = crossentropy_categorical_1hot

    xe = op(x, one_of_n)

    fgraph = theano.gof.FunctionGraph([x, one_of_n], [op(softmax(x), one_of_n)])
    assert fgraph.outputs[0].owner.op == op

    mode_with_gpu.optimizer.optimize(fgraph)

    assert str(fgraph.outputs[0].owner.op) == "OutputGuard"
    assert fgraph.outputs[0].owner.inputs[0].owner.op == cuda.host_from_gpu
    assert (
        fgraph.outputs[0].owner.inputs[0].owner.inputs[0].owner.op
        == cuda.nnet.gpu_crossentropy_softmax_argmax_1hot_with_bias
    )
Пример #12
0
    def __init__(self, rng, input, n_in, n_out, W=None, b=None, layer_index=0):

        self.layername = 'Softmax' + str(layer_index)
        self.input = input

        # W
        if W is None:
            W_bound = numpy.sqrt(6. / (n_in + n_out))
            self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                             high=W_bound,
                                                             size=(n_in,
                                                                   n_out)),
                                                 dtype=theano.config.floatX),
                                   borrow=True)
        else:
            self.W = theano.shared(value=W.astype(theano.config.floatX),
                                   borrow=True)
        self.W.name = self.layername + '#W'

        # b
        if b is None:
            self.b = theano.shared(numpy.zeros((n_out, ),
                                               dtype=theano.config.floatX),
                                   borrow=True)
        else:
            self.b = theano.shared(value=b.astype(theano.config.floatX),
                                   borrow=True)
        self.b.name = self.layername + '#b'

        output = relu(T.dot(input, self.W) + self.b)

        self.softmax_output = softmax(output)

        self.pred = self.softmax_output.argmax(axis=1)

        # store parameters of this layer
        self.params = [self.W, self.b]
Пример #13
0
    def __theano_train__(self):
        """
        训练阶段跑一遍训练序列
        """
        # self.alpha_lambda = ['alpha', 'lambda']
        # 各种usr_itm输入
        uidxs = T.ivector()  # n个用户
        pqidxs = T.imatrix()  # (2, n) 0行: n个正样本。1行: 负样本s。
        cidxs = T.imatrix()  # (n, set_size)
        mask = T.ivector()  # 当前时刻的mask,标明哪些用户的行为有效/无效。
        urxs = self.ux[uidxs]  # shape=(n, d)
        xpqs = self.lx[pqidxs]  # shape=(2, n, d)
        cpts = self.lc[cidxs]  # shape=(n, set_size, d)
        cpqs = self.lc[pqidxs]  # shape=(2, n, d)
        actual_batch_size = mask.shape[0]
        # 每时刻输入一个item_set,做unique
        ncpqs = T.concatenate((cidxs, pqidxs.T),
                              axis=1)  # 先拼接, shape=(n, set_size+2)
        uiq_cps = Unique(False, False, False)(ncpqs)  # 去重
        uiq_c = self.lc[uiq_cps]  # 相应的items特征
        # 各种权重矩阵。【注意:统一格式,权重 * 变量】
        lay = self.layer
        wru, wrc, wrl = self.wru, self.wrc, self.wrl  # resnet
        wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3  # 一阶att
        wb1, wb2 = self.wb1, self.wb2  # 二阶att
        """
        输入t时刻正负样本,计算当前损失并更新user/正负样本. 公式里省略了时刻t
        # 根据性质:T.dot((n, ), (n, ))得到(1, 1)
            uij  = user * (xp - xq)
            upq = log(sigmoid(uij))
        """
        # ==============================================================================================================
        # 得分1
        uij_x = T.sum(urxs * (xpqs[0] - xpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 得分2   # 第0层的att, 获得(batch_size, d)的att vector。
        urx_emb = T.dot(wa2,
                        urxs.T).T.dimshuffle(0, 'x',
                                             1)  # shape=(batch_size, 1, d)
        e0 = T.dot(tanh(urx_emb + T.dot(cpts, wa3)),
                   wa1)  # shape=(batch_size, set_size)
        a0 = softmax(e0)  # (batch_size, set_size)
        c0 = T.sum(cpts * a0.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d), broadcast

        # 得分2   # ResNet里的att
        def recurrence1(wrut, wrct, urx_pre1, cpt_pre1):
            # ResNet更新
            ur_t = relu(T.dot(wrut, urx_pre1.T).T +
                        urx_pre1)  # (batch_size, d)
            cp_t = relu(T.dot(cpt_pre1, wrct) +
                        cpt_pre1)  # (batch_size, set_size, d)
            # att计算生成上下文向量
            ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1)
            e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)),
                        wa1)  # shape=(batch_size, set_size)
            a_t = softmax(e_t)
            c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1)

            return [
                ur_t, cp_t, c_t
            ]  # (batch_size, d), (batch_size, set_size, d), (batch_size, d)

        [urs, cps, cs], _ = theano.scan(  # cs.shape = (layer, batch_size, d)
            fn=recurrence1,
            sequences=[wru, wrc],
            outputs_info=[urxs, cpts, None],
            n_steps=lay,
            truncate_gradient=-1)
        # 得分2   # 二阶att
        c0 = c0.dimshuffle(0, 'x', 1)  # (batch_size, 1, d)
        cs = cs.dimshuffle(1, 0, 2)  # (batch_size, layer, d)
        context = T.concatenate((c0, cs), axis=1)  # (batch_size, layer+1, d)
        e1 = T.dot(tanh(T.dot(context, wb2)),
                   wb1)  # shape=(batch_size, layer+1)
        a1 = softmax(e1)
        c1 = T.sum(context * a1.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d)
        # 得分2
        uij_c = T.sum(c1 * (cpqs[0] - cpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 得分3   # 以resnet的输出c1重新计算一个新的resnet
        def recurrence2(wrlt, h_pre1):
            # ResNet更新
            hl_t = relu(T.dot(wrlt, h_pre1.T).T +
                        h_pre1)  # shape=(batch_size, d)
            return hl_t

        hls, _ = theano.scan(fn=recurrence2,
                             sequences=wrl,
                             outputs_info=c1,
                             n_steps=lay,
                             truncate_gradient=-1)
        # 得分3
        uij_l = T.sum(hls[-1] * (cpqs[0] - cpqs[1]), axis=1)  # shape=(n, )

        # ==============================================================================================================
        # 总的得分
        loss = T.log(sigmoid(uij_x + uij_c + uij_l))  # shape=(n,)    #
        loss *= mask  # 只在损失这里乘一下0/1向量就可以了

        # ----------------------------------------------------------------------------
        # cost, gradients, learning rate, L2 regularization
        lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
        l2_sqr = (T.sum([
            T.sum(par**2) for par in
            [urxs, xpqs, cpts, cpqs, wru, wrc, wrl, wa1, wa2, wa3, wb1, wb2]
        ]))
        upq = T.sum(loss) / actual_batch_size
        costs = (-upq + 0.5 * l2 * l2_sqr)
        # self.params
        grads = T.grad(costs, self.params)
        updates = [(par, par - lr * gra)
                   for par, gra in zip(self.params, grads)]
        # 1个user,2个items,这种更新求导是最快的。直接对sub求导,并非对par求导。
        subs_pars_idxs = [[urxs, self.ux, uidxs], [xpqs, self.lx, pqidxs],
                          [uiq_c, self.lc, uiq_cps]]
        tmp = [(par, T.set_subtensor(sub, sub - lr * T.grad(costs, par)[idx]))
               for sub, par, idx in subs_pars_idxs]
        updates.extend(tmp)
        # ----------------------------------------------------------------------------

        # 输入用户、正负样本及其它参数后,更新变量,返回损失。
        self.train = theano.function(inputs=[uidxs, pqidxs, cidxs, mask],
                                     outputs=-upq,
                                     updates=updates,
                                     on_unused_input='warning')
Пример #14
0
    def __theano_predict__(self):
        """
        测试阶段再跑一遍训练序列得到各个隐层。用全部数据一次性得出所有用户的表达
        """
        # 各种权重矩阵。【注意:统一格式,权重 * 变量】
        lay = self.layer
        wru, wrc, wrl = self.wru, self.wrc, self.wrl  # resnet
        wa1, wa2, wa3 = self.wa1, self.wa2, self.wa3  # 一阶att
        wb1, wb2 = self.wb1, self.wb2  # 二阶att

        # givens给数据
        start_end = T.ivector()
        tra_mask = T.imatrix()  # shape=(n, 157)
        actual_batch_size = tra_mask.shape[0]
        # user vector
        urxs = T.fmatrix()  # shape=(batch_size, d)
        cps_idxs = T.itensor3()  # shape=(batch_size, 各用户set形式的序列)
        cpt_idxs = cps_idxs[  # shape=(batch_size, set_size)
            T.arange(actual_batch_size),  # 花式索引,取出每个用户序列的最后一组item_idxs。
            T.sum(tra_mask, axis=1) - 1]
        # item vector (每个user一个set)
        cpts = self.lc[cpt_idxs]  # shape=(batch_size, set_size, d)

        # ==============================================================================================================
        # 得分2   # 第0层的att, 获得(batch_size, d)的att vector。
        urx_emb = T.dot(wa2,
                        urxs.T).T.dimshuffle(0, 'x',
                                             1)  # shape=(batch_size, 1, d)
        e0 = T.dot(tanh(urx_emb + T.dot(cpts, wa3)),
                   wa1)  # shape=(batch_size, set_size)
        a0 = softmax(e0)  # (batch_size, set_size)
        c0 = T.sum(cpts * a0.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d), broadcast

        # 得分2   # ResNet里的att
        def recurrence1(wrut, wrct, urx_pre1, cpt_pre1):
            # ResNet更新
            ur_t = relu(T.dot(wrut, urx_pre1.T).T +
                        urx_pre1)  # (batch_size, d)
            cp_t = relu(T.dot(cpt_pre1, wrct) +
                        cpt_pre1)  # (batch_size, set_size, d)
            # att计算生成上下文向量
            ur_t_emb = T.dot(wa2, ur_t.T).T.dimshuffle(0, 'x', 1)
            e_t = T.dot(tanh(ur_t_emb + T.dot(cp_t, wa3)),
                        wa1)  # shape=(batch_size, set_size)
            a_t = softmax(e_t)
            c_t = T.sum(cp_t * a_t.dimshuffle(0, 1, 'x'), axis=1)

            return [
                ur_t, cp_t, c_t
            ]  # (batch_size, d), (batch_size, set_size, d), (batch_size, d)

        [urs, cps, cs], _ = theano.scan(  # cs.shape = (layer, batch_size, d)
            fn=recurrence1,
            sequences=[wru, wrc],
            outputs_info=[urxs, cpts, None],
            n_steps=lay,
            truncate_gradient=-1)

        # 得分2   # 二阶att
        c0 = c0.dimshuffle(0, 'x', 1)  # (batch_size, 1, d)
        cs = cs.dimshuffle(1, 0, 2)  # (batch_size, layer, d)
        context = T.concatenate((c0, cs), axis=1)  # (batch_size, layer+1, d)
        e1 = T.dot(tanh(T.dot(context, wb2)),
                   wb1)  # shape=(batch_size, layer+1)
        a1 = softmax(e1)
        c1 = T.sum(context * a1.dimshuffle(0, 1, 'x'),
                   axis=1)  # shape=(batch_size, d)

        # ==============================================================================================================
        # 得分3   # 以resnet的输出c1重新计算一个新的resnet
        def recurrence2(wrlt, h_pre1):
            # ResNet更新
            hl_t = relu(T.dot(wrlt, h_pre1.T).T +
                        h_pre1)  # shape=(batch_size, d)
            return hl_t

        hls, _ = theano.scan(fn=recurrence2,
                             sequences=wrl,
                             outputs_info=c1,
                             n_steps=lay,
                             truncate_gradient=-1)

        # ==============================================================================================================
        # 最终总的user vector。经由resnet计算得到的部分
        usr_vec_c = c1
        usr_vec_l = hls[-1]

        self.seq_predict = theano.function(
            inputs=[start_end],
            outputs=[usr_vec_c, usr_vec_l],  # shape=(batch_size, d)
            givens={
                urxs: self.trained_usr_x[start_end],  # shape=(batch_size, d)
                tra_mask: self.tra_masks[start_end],
                cps_idxs: self.tra_set_masks[start_end]
            })
Пример #15
0
def exp_shifted_mKalman(sample_kwargs=None):

    clustering_data = pd.read_pickle(
        'Data/exp_shifted/exp_shifted_clustering_means_std.pkl')
    clustering_data.index = range(len(clustering_data))

    lin_gp_data = pd.read_csv('Data/exp_shifted/gplinshifted.csv')
    lin_gp_data.index = range(len(lin_gp_data))

    rbf_gp_data = pd.read_csv('Data/exp_shifted/gprbfshifted.csv')
    rbf_gp_data.index = range(len(rbf_gp_data))

    kalman_data = pd.read_pickle('Data/exp_shifted/kalmanshifted.pkl')
    kalman_data.index = range(len(kalman_data))

    bayes_gp_data = pd.read_pickle('Data/exp_shifted/bayes_gp_exp_shifted.pkl')
    bayes_gp_data.index = range(len(bayes_gp_data))

    raw_data = pd.read_csv('Data/exp_shifted/datashifted_withoffset.csv',
                           header=0)

    # the GP-RBF can fail if subject always choose the same response. For simplicity, we are dropping those
    # subjects
    subjects_to_drop = set()
    for s in set(raw_data.id):
        if s not in set(rbf_gp_data.id):
            subjects_to_drop.add(s)

    for s in subjects_to_drop:
        clustering_data = clustering_data[
            clustering_data['Subject'] != s].copy()
        lin_gp_data = lin_gp_data[lin_gp_data.id != s].copy()
        raw_data = raw_data[raw_data.id != s].copy()
        kalman_data = kalman_data[kalman_data.Subject != s].copy()
        bayes_gp_data = bayes_gp_data[bayes_gp_data['Subject'] != s].copy()

    # construct a sticky choice predictor. This is the same for all of the models
    x_sc = construct_sticky_choice(raw_data)

    # PYMC3 doesn't care about the actual subject numbers, so remap these to a sequential list
    subj_idx = construct_subj_idx(lin_gp_data)
    n_subj = len(set(subj_idx))

    intercept = raw_data['int'].values

    # prep the predictor vectors
    x_mu_cls = np.array(
        [clustering_data.loc[:, 'mu_%d' % ii].values for ii in range(8)]).T
    x_sd_cls = np.array(
        [clustering_data.loc[:, 'std_%d' % ii].values for ii in range(8)]).T

    x_mu_bayes_gp = np.array(
        [bayes_gp_data.loc[:, 'mu_%d' % ii].values for ii in range(8)]).T
    x_sd_bayes_gp = np.array(
        [bayes_gp_data.loc[:, 'std_%d' % ii].values for ii in range(8)]).T

    x_mu_lin = np.array([
        lin_gp_data.loc[:, 'mu_%d' % ii].values + intercept for ii in range(8)
    ]).T
    x_sd_lin = np.array(
        [lin_gp_data.loc[:, 'std_%d' % ii].values for ii in range(8)]).T

    x_mu_rbf = np.array([
        rbf_gp_data.loc[:, 'mu_%d' % ii].values + intercept for ii in range(8)
    ]).T
    x_sd_rbf = np.array(
        [rbf_gp_data.loc[:, 'std_%d' % ii].values for ii in range(8)]).T

    x_mu_kal = np.array([
        kalman_data.loc[:, 'mu_%d' % ii].values + intercept for ii in range(8)
    ]).T
    x_sd_kal = np.array(
        [kalman_data.loc[:, 'std_%d' % ii].values for ii in range(8)]).T

    y = raw_data['arm'].values - 1  # convert to 0 indexing

    n, d = x_mu_kal.shape
    if sample_kwargs is None:
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    with pm.Model() as hier_kal:
        mu_1 = pm.Normal('mu_beta_kal_mean', mu=0., sd=100.)
        mu_2 = pm.Normal('mu_beta_kal_stdv', mu=0., sd=100.)
        mu_3 = pm.Normal('mu_beta_stick', mu=0., sd=100.)

        sigma_1 = pm.HalfCauchy('sigma_rbf_means', beta=100)
        sigma_2 = pm.HalfCauchy('sigma_rbf_stdev', beta=100)
        sigma_3 = pm.HalfCauchy('sigma_stick', beta=100)

        b_1 = pm.Normal('beta_rbf_mu', mu=mu_1, sd=sigma_1, shape=n_subj)
        b_2 = pm.Normal('beta_rbf_std', mu=mu_2, sd=sigma_2, shape=n_subj)
        b_3 = pm.Normal('beta_sc', mu=mu_3, sd=sigma_3, shape=n_subj)

        rho = \
            tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_kal + \
            tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_kal + \
            tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_sc
        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_kal = pm.sample(**sample_kwargs)

    ppc = pm.sample_ppc(trace_kal, samples=500, model=hier_kal)

    for ii in range(500):
        sim_draws = raw_data.copy()
        sim_draws['arm_sim'] = ppc['yl'][ii, :] + 1
        sim_draws.to_pickle('./Data/PPC/exp_shifted/sim_kal_%d.pkl' % ii)
Пример #16
0
 def forward(x_t, h_tm1, Wx, Wh, bh, am, ax, ah, Wy, by):
     preact = T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh
     h_t = act(preact)
     y_t = softmax(T.dot(h_t, Wy) + by)
     return h_t, y_t, preact
Пример #17
0
# standardize the x's
x_s = (x_s - x_s.mean(0)) / x_s.std(0)
# get the groups number
groups_number = len(np.unique(iris["species"]))

# --------------- specify the probabilistic model ------------------------- #

with pm.Model() as softmax_model:
    alpha = pm.Normal("alpha", mu=0, sd=10, shape=groups_number - 1)
    beta = pm.Normal("beta",
                     mu=0,
                     sd=10,
                     shape=(x_s.shape[1], groups_number - 1))
    alpha_f = tt.tensor.concatenate([[0], alpha])
    beta_f = tt.tensor.concatenate([np.zeros((x_s.shape[1], 1)), beta], axis=1)
    # get the mu
    mu = pm.Deterministic("mu", alpha_f + pm.math.dot(x_s, beta_f))
    # apply the softmax function to the mu
    theta = softmax(mu)
    # specify the likelihood of the data
    y_obs = pm.Categorical("y_obs", p=theta, observed=y_s)
    # inference step
    trace = pm.sample()

# -------------- check how many cases are classified correctly ----------- #

data_pred = trace["mu"].mean(0)
log.info("The data pred is: %s", data_pred)
y_pred = [np.exp(point) / np.sum(np.exp(point), axis=0) for point in data_pred]
print(f'{np.sum(y_s == np.argmax(y_pred, axis=1)) / len(y_s):.2f}')
def sample_hier_rbf(model_matrix, sample_kwargs=None):

    # load the data
    x_mu_rbf = model_matrix['x_mu_rbf']
    x_sd_rbf = model_matrix['x_sd_rbf']
    x_sc = model_matrix['x_sc']
    subj_idx = model_matrix['subj_idx']
    y = model_matrix['y']
    n_subj = model_matrix['n_subj']

    # fit the first model
    n, d = x_mu_rbf.shape
    if sample_kwargs is None:
        # Here, we use specify NUTS as our sampler (implicitly this is the default)
        # and use variational inference to initialize
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    # to do inference, all we have to do is write down the model in our
    # probabilistic programming language (PYMC3) and the software will
    # do inference over it (we can control how this happens, e.g. with
    # Gibbs sampling, MCMC, Variational Inference, but PYMC3 will default
    # to hamiltonian-MCMC with the No U-turn sampler ("NUTS"))

    with pm.Model() as hier_rbf:
        # here, we write down the model

        # Define hierarchical parameters
        # (normal means and standard deviation for regression weights)
        mu_1 = pm.Normal('mu_beta_rbf_mean', mu=0., sd=100.)
        mu_2 = pm.Normal('mu_beta_rbf_stdv', mu=0., sd=100.)
        mu_3 = pm.Normal('mu_beta_stick', mu=0., sd=100.)

        sigma_1 = pm.HalfCauchy('sigma_rbf_means', beta=100)
        sigma_2 = pm.HalfCauchy('sigma_rbf_stdev', beta=100)
        sigma_3 = pm.HalfCauchy('sigma_stick', beta=100)

        # define subject predictor variables (i.e. regression parameters,
        # 1 per subject per condition with a hierarchical prior)
        b_1 = pm.Normal('beta_rbf_mu', mu=mu_1, sd=sigma_1, shape=n_subj)
        b_2 = pm.Normal('beta_rbf_std', mu=mu_2, sd=sigma_2, shape=n_subj)
        b_3 = pm.Normal('beta_sc', mu=mu_3, sd=sigma_3, shape=n_subj)

        # linearly combine the predictors with the subject-specific coefficients
        # as a scaling factor. In practice, the coefficients have to be broadcast
        # in to an NxD matric via theano for element-wise multiplication
        rho = \
            tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_rbf + \
            tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_rbf + \
            tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_sc

        # pass the resultant vector through a softmax to convert to a probability
        # distribution. Note, we don't need an additional noise parameter as that
        # would be collinear with the coefficients.
        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_rbf = pm.sample(**sample_kwargs)

    return hier_rbf, trace_rbf