Пример #1
0
    def log_prob(self, x):
        """ Computes the log-probability of an action $\mathbf u$

        $$
        \log p(\mathbf u|\mathbf v, \mathbf u_{t-1}) = \\big(\\beta \mathbf v + \\beta^\\rho \mathbf u_{t-1}) - \log \sum_{v_i} e^{\\beta \mathbf v_i + \\beta^\\rho u_{t-1}^{(i)}}
        $$

        Arguments:

            x: State vector of type `ndarray((nactions,))`

        Returns:

            Scalar log-probability
        """
        # Compute logits
        Bx  = self.inverse_softmax_temp*x
        stickiness = self.perseveration*self.a_last
        self.logits = Bx + stickiness

        # Hessians
        HB, Hp, HBp, Hx, _ = hess.log_stickysoftmax(self.inverse_softmax_temp,
                                                    self.perseveration,
                                                    x,
                                                    self.a_last)
        self.hess_logprob['inverse_softmax_temp'] = HB
        self.hess_logprob['perseveration'] = Hp
        self.hess_logprob['action_values'] = Hx
        self.hess_logprob['inverse_softmax_temp_perseveration'] = HBp

        # Derivatives
        #  Grad LSE wrt Logits
        Dlse = grad.logsumexp(self.logits)

        # Grad logprob wrt logits
        self.d_logprob['logits'] = np.eye(x.size) - Dlse

        #  Partial derivative with respect to inverse softmax temp
        self.d_logits['inverse_softmax_temp'] = x
        self.d_logits['perseveration'] = self.a_last
        self.d_logprob['inverse_softmax_temp'] = x - np.dot(Dlse, x)
        self.d_logprob['perseveration'] = self.a_last - np.dot(Dlse, self.a_last)

        # Gradient with respect to x
        B = np.eye(x.size)*self.inverse_softmax_temp
        Dlsetile = np.tile(self.inverse_softmax_temp*Dlse, [x.size, 1])
        self.d_logprob['action_values'] = B - Dlsetile

        LSE = fu.logsumexp(self.logits)
        if not np.isfinite(LSE): LSE = 0.
        return self.logits - LSE
Пример #2
0
    def log_prob(self, x):
        """ Computes the log-probability of an action $\mathbf u$, in addition to computing derivatives up to second order

        $$
        \log p(\mathbf u|\mathbf v) = \\beta \mathbf v - \log \sum_{v_i} e^{\\beta \mathbf v_i}
        $$

        Arguments:

            x: State vector of type `ndarray((nstates,))`

        Returns:

            Scalar log-probability
        """
        # Compute logits
        self.logits  = self.inverse_softmax_temp*x

        # Hessians
        HB, Hx = hess.log_softmax(self.inverse_softmax_temp, x)
        self.hess_logprob['inverse_softmax_temp'] = HB
        self.hess_logprob['action_values'] = Hx

        # Derivatives
        #  Grad LSE wrt Logits
        Dlse = grad.logsumexp(self.logits)

        # Grad logprob wrt logits
        self.d_logprob['logits'] = np.eye(x.size) - Dlse

        #  Grad logprob wrt inverse softmax temp
        self.d_logits['inverse_softmax_temp'] = x
        self.d_logprob['inverse_softmax_temp'] = np.dot(self.d_logprob['logits'], self.d_logits['inverse_softmax_temp'])

        # Grad logprob wrt action values `x`
        B = np.eye(x.size)*self.inverse_softmax_temp
        Dlsetile = np.tile(self.inverse_softmax_temp*Dlse, [x.size, 1])
        self.d_logprob['action_values'] = B - Dlsetile

        # Compute log-probability of actions
        LSE = fu.logsumexp(self.logits)
        if not np.isfinite(LSE): LSE = 0.
        return self.logits - LSE
Пример #3
0
def test_logsumexp():
    x = np.array([1., 0., 0.])
    grad_fitr = grad.logsumexp(x)
    grad_autograd = gradient(utils.logsumexp)(x)
    grad_err = np.linalg.norm(grad_fitr - grad_autograd)
    assert (grad_err < 1e-6)
Пример #4
0
Q = np.zeros((env.nactions, env.nstates))
L = 0
a_last = np.zeros(env.nactions)
for t in range(R.size):
    x = X[t]
    u = U[t]
    r = R[t]
    x_ = X_[t]
    u_ = U_[t]
    q = np.einsum('ij,j->i', Q, x)
    q_ = np.einsum('ij,j->i', Qmf, x_)
    logits1 = B1 * q + persev * a_last
    logits2 = B2 * q_
    pu1 = fu.softmax(logits1)
    pu2 = fu.softmax(logits2)
    Dlp_logit1 = np.eye(q.size) - np.tile(grad.logsumexp(logits1), [q.size, 1])
    Dlp_logit2 = np.eye(q_.size) - np.tile(grad.logsumexp(logits2),
                                           [q_.size, 1])
    Dlogit1_q1 = B1
    Dlogit2_q2 = B2
    Dlogit1_B1 = q
    Dlogit2_B2 = q_
    Dlogit1_persev = a_last
    Dlp_q1 = B1 * np.eye(q.size) - np.tile(B1 * grad.logsumexp(logits1),
                                           [q.size, 1])
    Dlp_q2 = B2 * np.eye(q_.size) - np.tile(B2 * grad.logsumexp(logits2),
                                            [q_.size, 1])
    Dq1_Q = x
    Dq2_Q = x_
    Dq2_Qmf = x_
    DQ_w = Qmb - Qmf