示例#1
0
def gradient(samples,
             params,
             Q,
             c_bar,
             mu_bar,
             Sigma_bar,
             operator,
             n_samples,
             phi,
             psi,
             n_weights,
             lambda_,
             max_iter_ukl,
             C,
             K,
             precision=None,
             t_step=0,
             ukl_tight_freq=1):
    """Computes the objective function gradient"""
    c, mu, L = unpack(params, C, K)
    grad_c = np.zeros(c.shape)

    _, vs = utils.sample_mvn(n_weights * C, mu[0, :], L[0, :, :])

    ws = np.matmul(vs.reshape(C, n_weights, K), np.transpose(
        L, (0, 2, 1))) + mu[:, np.newaxis]
    be_grad = operator.gradient_be(Q, samples,
                                   ws.reshape(C * n_weights,
                                              K)).reshape(C, n_weights, K)
    # Gradient of the expected Bellman error wrt mu
    ebe_grad_mu = np.average(be_grad, axis=1)
    # Gradient of the expected Bellman error wrt L.
    ebe_grad_L = np.average(np.matmul(
        be_grad[:, :, :, np.newaxis],
        vs.reshape(C, n_weights, K)[:, :, np.newaxis]),
                            axis=1)
    ebe_grad_mu = c[:, np.newaxis] * ebe_grad_mu
    ebe_grad_L = c[:, np.newaxis, np.newaxis] * ebe_grad_L

    kl_grad_c, kl_grad_mu, kl_grad_L, phi, psi = gradient_KL(
        c,
        mu,
        L,
        c_bar,
        mu_bar,
        Sigma_bar,
        phi,
        psi,
        max_iter_ukl,
        C,
        K,
        precision=precision,
        tight_bound=(t_step % ukl_tight_freq == 0))
    grad_mu = ebe_grad_mu + lambda_ * kl_grad_mu / n_samples
    grad_L = ebe_grad_L + lambda_ * kl_grad_L / n_samples

    return pack(grad_c, grad_mu, grad_L)
示例#2
0
def objective(samples, params, Q, mu_bar, Sigma_bar_inv, operator, n_samples,
              lambda_, n_weights):
    """Computes the negative ELBO"""
    mu, L = unpack(params, Q._w.size)
    # We add a small constant to make sure Sigma is always positive definite
    Sigma = np.dot(L, L.T)
    weights, _ = utils.sample_mvn(n_weights, mu, L)
    likelihood = operator.expected_bellman_error(Q, samples, weights)
    assert likelihood >= 0
    kl = utils.KL(mu, Sigma, mu_bar, Sigma_bar_inv)
    assert kl >= 0
    return likelihood + lambda_ * kl / n_samples
示例#3
0
def gradient(samples, params, Q, mu_bar, Sigma_bar_inv, operator, n_samples,
             lambda_, n_weights):
    """Computes the objective function gradient"""
    mu, L = unpack(params, Q._w.size)
    ws, vs = utils.sample_mvn(n_weights, mu, L)
    be_grad = operator.gradient_be(Q, samples, ws)
    # Gradient of the expected Bellman error wrt mu
    ebe_grad_mu = np.average(be_grad, axis=0)
    # Gradient of the expected Bellman error wrt L.
    ebe_grad_L = np.average(be_grad[:, :, np.newaxis] * vs[:, np.newaxis, :],
                            axis=0)
    kl_grad_mu, kl_grad_L = utils.gradient_KL(mu, L, mu_bar, Sigma_bar_inv)
    grad_mu = ebe_grad_mu + lambda_ * kl_grad_mu / n_samples
    grad_L = ebe_grad_L + lambda_ * kl_grad_L / n_samples

    return pack(grad_mu, grad_L)