示例#1
0
        agent1 = agents[i]
        agent2 = agents[j]
        if i == j:
            cov_mat[i][j] = variances[agents[i]]
        elif (i != j) and (agent1 == agent2):
            cov_mat[i][j] = intra_covs[agent1 + agent2]
            cov_mat[j][i] = intra_covs[agent1 + agent2]
        else:
            cov_mat[i][j] = inter_covs[''.join(sorted(agent1 + agent2))]
            cov_mat[j][i] = inter_covs[''.join(sorted(agent1 + agent2))]

cov_mat = np.dot(np.matrix(cov_mat), np.matrix(cov_mat).T)
print(cov_mat)
means = [V for i in range(num_agents)]

draw = multi_norm(means, cov_mat)

cov_mat = cov_mat.tolist()

variances = {
    "A": cov_mat[0][0],
    "B": cov_mat[len(cov_mat) - 1][len(cov_mat) - 1]
}

intra_covs = {
    "AA": cov_mat[0][1],
    "BB": cov_mat[len(cov_mat) - 1][len(cov_mat) - 2]
}

inter_covs = {
    "AB": cov_mat[type_populations["A"] - 1][type_populations["A"] + 1]
示例#2
0
from numpy.random import multivariate_normal as multi_norm

cov = [[0.9, 0.7, 0.4], [0.7, 0.8, 0.3], [0.4, 0.3, 0.7]]
means = [0, 0, 0]
print(multi_norm(means, cov))
def smc2(actions, rewards, idx_blocks, choices, subj_idx, show_progress,
         apply_rep, apply_weber, beta_softmax, temperature,
         observational_noise):

    assert (2 not in actions)
    assert (0 in actions)
    assert (1 in actions)
    assert (apply_rep == 0 or apply_rep == 1)
    assert (apply_weber == 0 or apply_weber == 1)

    # Extract parameters from task description
    actions = np.asarray(actions, dtype=np.intc)
    rewards = np.ascontiguousarray(rewards)
    idx_blocks = np.asarray(idx_blocks, dtype=np.intc)
    N_samples = 1000
    n_theta = 1000
    coefficient = .5
    T = actions.shape[0]
    prev_action = -1
    upp_bound_eta = 10.

    if apply_rep:
        n_param = 5
    else:
        n_param = 4
    if apply_weber == 1:
        upp_bound_eps = 1.
    else:
        upp_bound_eps = .5

    # samples
    samples = np.random.rand(n_theta, n_param)
    if beta_softmax > 0:
        temperature = False
        samples[:, 2] = beta_softmax
        sample_beta = False
        upp_bound_beta = beta_softmax
    else:
        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.
        samples[:, 2] = np.random.rand(n_theta) * upp_bound_beta
        sample_beta = True
    samples[:, 3] = np.random.rand(n_theta) * upp_bound_eps
    if apply_rep:
        samples[:, 4] = (2 * np.random.rand(n_theta) - 1) * upp_bound_eta

    # variable memory
    noisy_descendants = np.zeros([n_theta, N_samples, 2])
    noisy_ancestors = np.zeros([n_theta, N_samples, 2])
    weights_norm = np.zeros([n_theta, N_samples])
    log_weights_a = np.zeros([n_theta])
    ancestorsIndexes = np.ascontiguousarray(np.zeros(n_theta, dtype=np.intc))
    logThetaWeights = np.zeros(n_theta)
    logThetalkd = np.zeros(n_theta)
    log_lkd = np.zeros(n_theta)
    essList = np.zeros(T)
    acceptance_list = []
    marg_loglkd = 0

    #move step variables
    ancestors_indexes_p = np.ascontiguousarray(
        np.zeros(N_samples, dtype=np.intc))
    samples_new = np.zeros([n_theta, n_param])
    weights_new = np.zeros([n_theta, N_samples])
    states_new = np.zeros([n_theta, N_samples, 2])
    logThetalkd_new = np.zeros(n_theta)
    state_candidates = np.zeros([N_samples, 2])
    state_candidates_a = np.zeros([N_samples, 2])
    weights_candidates = np.zeros(N_samples)

    # history of samples
    noisy_history = np.zeros([T, 2])

    if show_progress:
        plt.figure(figsize=(15, 9))
        plt.suptitle("noisy rl", fontsize=14)
        plt.ion()

    for t_idx in range(T):

        # Print progress
        if (t_idx + 1) % 10 == 0:
            sys.stdout.write(' ' + str(t_idx + 1))
            sys.stdout.flush()
            print ' marg_loglkd ' + str(marg_loglkd)

        prev_rew = np.ascontiguousarray(rewards[:, max(0, t_idx - 1)])
        log_weights_a[:] = logThetaWeights
        if t_idx > 0 and choices[t_idx - 1]:
            assert (actions[max(0, t_idx - 1)] == prev_action)

        smc_c.smc_update_2q_c(log_lkd, logThetalkd, noisy_descendants, noisy_ancestors, weights_norm, logThetaWeights, ancestorsIndexes, samples, \
                     idx_blocks, choices, prev_action, actions, prev_rew, t_idx, apply_rep, apply_weber, 2, temperature, observational_noise)

        # save and update
        marg_loglkd += logsumexp(log_weights_a +
                                 log_lkd) - logsumexp(log_weights_a)
        normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights)
        noisy_history[t_idx] = np.sum((normalisedThetaWeights * np.sum(
            np.transpose(weights_norm * noisy_descendants.T), axis=1).T),
                                      axis=1)

        # Degeneray criterion
        logEss = 2 * uf.log_sum(logThetaWeights) - uf.log_sum(
            2 * logThetaWeights)
        essList[t_idx] = np.exp(logEss)

        # update repetition action
        if choices[t_idx] == 1:
            prev_action = actions[t_idx]

        # Move step
        if (essList[t_idx] < coefficient * n_theta):
            acceptance_proba = 0
            if not sample_beta:
                samples_tmp = np.delete(samples, 2, axis=1)
                mu_p = np.sum(samples_tmp.T * normalisedThetaWeights, axis=1)
                Sigma_p = np.dot(
                    (samples_tmp - mu_p).T * normalisedThetaWeights,
                    (samples_tmp - mu_p))
            else:
                mu_p = np.sum(samples.T * normalisedThetaWeights, axis=1)
                Sigma_p = np.dot((samples - mu_p).T * normalisedThetaWeights,
                                 (samples - mu_p))

            ancestorsIndexes[:] = uf.stratified_resampling(
                normalisedThetaWeights)

            for theta_idx in range(n_theta):
                idx_traj = ancestorsIndexes[theta_idx]
                while True:
                    sample_cand = np.array(samples[idx_traj])
                    sample_p = multi_norm(mu_p, Sigma_p)
                    sample_p_copy = np.array(sample_p)
                    if (not sample_beta) and apply_rep:
                        sample_p = np.array([
                            sample_p[0], sample_p[1], beta_softmax,
                            sample_p[2], sample_p[3]
                        ])
                        sample_cand = np.delete(sample_cand, 2)
                    elif not sample_beta:
                        sample_p = np.array([
                            sample_p[0], sample_p[1], beta_softmax, sample_p[2]
                        ])
                        sample_cand = np.delete(sample_cand, 2)

                    if apply_rep:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \
                                and sample_p[3] > 0 and sample_p[3] < upp_bound_eps and sample_p[4] > -upp_bound_eta and sample_p[4] < upp_bound_eta:
                            break
                    else:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \
                                and sample_p[3] > 0 and sample_p[3] < upp_bound_eps:
                            break

                # Launch SMC
                logmarglkd_p = smc_c.smc_2q_c(state_candidates, state_candidates_a, weights_candidates, sample_p, ancestors_indexes_p, \
                              idx_blocks, actions, rewards, choices, t_idx + 1, apply_rep, apply_weber, 2, temperature, observational_noise)

                logAlpha     = np.minimum(0, logmarglkd_p - logThetalkd[idx_traj]  \
                         + get_logtruncnorm(sample_cand, mu_p, Sigma_p) - get_logtruncnorm(sample_p_copy, mu_p, Sigma_p) )

                # accept or reject
                if np.log(np.random.rand()) < logAlpha:
                    acceptance_proba += 1.
                    samples_new[theta_idx] = sample_p
                    weights_new[theta_idx] = weights_candidates
                    states_new[theta_idx] = state_candidates
                    logThetalkd_new[theta_idx] = logmarglkd_p
                else:
                    samples_new[theta_idx] = samples[idx_traj]
                    weights_new[theta_idx] = weights_norm[idx_traj]
                    states_new[theta_idx] = noisy_descendants[idx_traj]
                    logThetalkd_new[theta_idx] = logThetalkd[idx_traj]

            print('\n')
            print('acceptance ratio is ')
            print(acceptance_proba / n_theta)
            print('\n')
            acceptance_list.append(acceptance_proba / n_theta)

            weights_norm[:] = weights_new
            logThetalkd[:] = logThetalkd_new
            logThetaWeights[:] = np.zeros(n_theta)
            noisy_descendants[:] = states_new
            samples[:] = samples_new
            normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights)

        if show_progress and t_idx % 10:
            plt.subplot(3, 2, 1)
            plt.plot(range(t_idx), noisy_history[:t_idx, 0], 'r')
            plt.hold(True)
            plt.plot(range(t_idx), noisy_history[:t_idx, 1], 'b')
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Q-value 0 (red), and 1 (blue)')

            plt.subplot(3, 2, 4)
            plt.plot(range(t_idx), essList[:t_idx], 'b', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [n_theta / 2, n_theta / 2],
                     'b--',
                     linewidth=2)
            plt.axis([0, t_idx - 1, 0, n_theta])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ess')

            if temperature:
                mean_beta = np.sum(normalisedThetaWeights *
                                   (1. / samples[:, 2]))
                std_beta = np.sqrt(
                    np.sum(normalisedThetaWeights * (1. / samples[:, 2])**2) -
                    mean_beta**2)
                x = np.linspace(0., 200, 5000)
            else:
                mean_beta = np.sum(normalisedThetaWeights *
                                   (10**samples[:, 2]))
                std_beta = np.sqrt(
                    np.sum(normalisedThetaWeights * (10**samples[:, 2])**2) -
                    mean_beta**2)
                x = np.linspace(0., 10**upp_bound_beta, 5000)
            plt.subplot(3, 2, 3)
            plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g')
            plt.hold(True)
            plt.plot([mean_beta, mean_beta],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('beta softmax')
            plt.ylabel('pdf')

            mean_alpha_0 = np.sum(normalisedThetaWeights * samples[:, 0])
            std_alpha_0 = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 0]**2) -
                mean_alpha_0**2)
            mean_alpha_1 = np.sum(normalisedThetaWeights * samples[:, 1])
            std_alpha_1 = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 1]**2) -
                mean_alpha_1**2)
            plt.subplot(3, 2, 2)
            x = np.linspace(0., 1., 5000)
            plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm')
            plt.hold(True)
            plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm')
            plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c')
            plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c')
            plt.hold(False)
            plt.xlabel('learning rates')
            plt.ylabel('pdf')

            mean_epsilon = np.sum(normalisedThetaWeights * samples[:, 3])
            std_epsilon = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 3]**2) -
                mean_epsilon**2)
            plt.subplot(3, 2, 6)
            x = np.linspace(0., upp_bound_eps, 5000)
            if apply_rep == 1:
                mean_rep = np.sum(normalisedThetaWeights * samples[:, 4])
                std_rep = np.sqrt(
                    np.sum(normalisedThetaWeights * samples[:, 4]**2) -
                    mean_rep**2)
                x = np.linspace(-2., 2., 5000)
                plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'y')
                plt.hold(True)
                plt.plot([mean_rep, mean_rep],
                         plt.gca().get_ylim(),
                         'y',
                         linewidth=2)
            plt.plot(x, norm.pdf(x, mean_epsilon, std_epsilon), 'g')
            plt.hold(True)
            plt.plot([mean_epsilon, mean_epsilon],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('epsilon std (green), rep_bias (yellow)')
            plt.ylabel('pdf')
            plt.draw()
            plt.show()
            plt.pause(0.05)

    return [
        samples, noisy_history, acceptance_list, normalisedThetaWeights,
        logThetalkd, marg_loglkd
    ]
示例#4
0
def ibis(actions, rewards, choices, idx_blocks, subj_idx, apply_rep_bias,
         apply_weber_decision_noise, curiosity_bias, show_progress,
         temperature):

    assert (2 not in actions)
    assert (0 in actions)
    assert (1 in actions)

    actions = np.asarray(actions, dtype=np.intc)
    rewards = np.ascontiguousarray(rewards)
    choices = np.asarray(choices, dtype=np.intc)
    idx_blocks = np.asarray(idx_blocks, dtype=np.intc)
    nb_samples = 1000
    T = actions.shape[0]
    upp_bound_eta = 10.

    # sample initialisation
    if (apply_rep_bias or curiosity_bias) and apply_weber_decision_noise == 0:
        samples = np.random.rand(nb_samples, 4)

        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.
        samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta
        samples[:, 3] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)
    elif apply_weber_decision_noise == 0:
        samples = np.random.rand(nb_samples, 3)
        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.

        samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta
    elif apply_weber_decision_noise == 1:

        if apply_rep_bias:
            samples = np.random.rand(nb_samples, 5)
            if temperature:
                upp_bound_beta = np.sqrt(6) / (np.pi * 5)
            else:
                upp_bound_beta = 2.

            samples[:,
                    4] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)
        else:
            samples = np.random.rand(nb_samples, 4)
            if temperature:
                upp_bound_beta = np.sqrt(6) / (np.pi * 5)
            else:
                upp_bound_beta = 2.

        upp_bound_k = 10
        samples[:, 2] = np.random.rand(
            nb_samples) * upp_bound_beta  # bound on the beta
        samples[:, 3] = np.random.rand(nb_samples) * upp_bound_k

    Q_samples = np.zeros([nb_samples, 2])
    prev_action = np.zeros(nb_samples) - 1

    # ibis param
    esslist = np.zeros(T)
    log_weights = np.zeros(nb_samples)
    weights_a = np.zeros(nb_samples)
    p_loglkd = np.zeros(nb_samples)
    loglkd = np.zeros(nb_samples)
    marg_loglkd = 0
    coefficient = .5
    marg_loglkd_l = np.zeros(T)
    acceptance_l = []

    # move step param
    if apply_rep_bias and apply_weber_decision_noise:
        move_samples = np.zeros([nb_samples, 5])
    elif apply_rep_bias or curiosity_bias:
        move_samples = np.zeros([nb_samples, 4])
    elif apply_weber_decision_noise:
        move_samples = np.zeros([nb_samples, 4])
    else:
        move_samples = np.zeros([nb_samples, 3])

    move_p_loglkd = np.zeros(nb_samples)
    Q_samples_move = np.zeros([nb_samples, 2])
    prev_action_move = np.zeros(nb_samples)
    mean_Q = np.zeros([T, 2])
    prediction_err = np.zeros(nb_samples)
    prediction_err[:] = -np.inf
    prediction_err_move = np.zeros(nb_samples)

    if show_progress:
        plt.figure(figsize=(15, 9))
        plt.suptitle("noiseless rl", fontsize=14)
        plt.ion()

    # loop
    for t_idx in range(T):

        if (t_idx + 1) % 10 == 0:
            sys.stdout.write(' ' + str(t_idx + 1) + ' ')
            print 'marg_loglkd ' + str(marg_loglkd)
        if (t_idx + 1) % 100 == 0: print('\n')
        assert (len(np.unique(prev_action)) == 1)

        # update step
        weights_a[:] = log_weights
        if idx_blocks[t_idx]:
            Q_samples[:] = 0.5
            prev_action[:] = -1

        # loop over samples
        for n_idx in range(nb_samples):
            alpha_c = samples[n_idx, 0]
            alpha_u = samples[n_idx, 1]

            if temperature:
                beta = 1. / samples[n_idx, 2]
            else:
                beta = 10**samples[n_idx, 2]

            if apply_rep_bias or curiosity_bias:
                eta = samples[n_idx, -1]
            if apply_weber_decision_noise:
                k_beta = samples[n_idx, 3]

            # reweighting
            if choices[t_idx] == 1 and prev_action[n_idx] != -1 and (
                    apply_rep_bias == 1
                    or curiosity_bias) and apply_weber_decision_noise == 0:
                if apply_rep_bias:
                    value = 1. / (
                        1. +
                        np.exp(beta *
                               (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) -
                               np.sign(prev_action[n_idx] - .5) * eta))
                    loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                           (1 - value)**((1 - actions[t_idx])))
                    prev_action[n_idx] = actions[t_idx]
                elif curiosity_bias:
                    try:
                        count_samples = t_idx - 1 - np.where(
                            actions[:t_idx] != actions[t_idx - 1])[0][-1]
                    except:
                        count_samples = t_idx
                    assert (count_samples > 0)
                    value = 1. / (1. + np.exp(
                        beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) +
                        np.sign(prev_action[n_idx] - .5) * eta * count_samples)
                                  )
                    loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                           (1 - value)**((1 - actions[t_idx])))
                    prev_action[n_idx] = actions[t_idx]

            elif choices[t_idx] == 1 and apply_weber_decision_noise == 0:
                value = 1. / (
                    1. + np.exp(beta *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            elif choices[
                    t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 0:
                beta_modified = beta / (1. + k_beta * prediction_err[n_idx])
                value = 1. / (
                    1. + np.exp(beta_modified *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            elif choices[
                    t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 1:
                beta_modified = beta / (1. + k_beta * prediction_err[n_idx])
                value = 1. / (
                    1. + np.exp(beta_modified *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) -
                                np.sign(prev_action[n_idx] - .5) * eta))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            else:
                value = 1.
                loglkd[n_idx] = 0.

            if np.isnan(loglkd[n_idx]):
                print t_idx
                print n_idx
                print beta
                print value
                raise Exception

            p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx]

            log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx]

            # update step
            if actions[t_idx] == 0:
                prediction_err[n_idx] = np.abs(Q_samples[n_idx, 0] -
                                               rewards[0, t_idx])

                Q_samples[n_idx, 0] = (1 - alpha_c) * Q_samples[
                    n_idx, 0] + alpha_c * rewards[0, t_idx]
                if not curiosity_bias:
                    Q_samples[n_idx, 1] = (1 - alpha_u) * Q_samples[
                        n_idx, 1] + alpha_u * rewards[1, t_idx]
            else:
                prediction_err[n_idx] = np.abs(Q_samples[n_idx, 1] -
                                               rewards[1, t_idx])
                if not curiosity_bias:
                    Q_samples[n_idx, 0] = (1 - alpha_u) * Q_samples[
                        n_idx, 0] + alpha_u * rewards[0, t_idx]
                Q_samples[n_idx, 1] = (1 - alpha_c) * Q_samples[
                    n_idx, 1] + alpha_c * rewards[1, t_idx]

        marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a)
        marg_loglkd_l[t_idx] = marg_loglkd
        ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights))
        esslist[t_idx] = ess

        weights_a[:] = uf.to_normalized_weights(log_weights)
        mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0)

        # move step
        if ess < coefficient * nb_samples:
            idxTrajectories = uf.stratified_resampling(weights_a)
            mu_p = np.sum(samples.T * weights_a, axis=1)
            Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p))
            nb_acceptance = 0.

            for n_idx in range(nb_samples):
                idx_traj = idxTrajectories[n_idx]
                while True:
                    sample_p = multi_norm(mu_p, Sigma_p)
                    if not apply_rep_bias and not apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[
                                1] > 0 and sample_p[1] < 1 and sample_p[
                                    2] > 0 and sample_p[2] <= upp_bound_beta:
                            break
                    elif not apply_rep_bias and apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] <= upp_bound_k:
                            break
                    elif apply_rep_bias and not apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                     and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > -upp_bound_eta and sample_p[3] < upp_bound_eta:
                            break
                    else:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                     and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] < upp_bound_k \
                                             and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta:
                            break

                [loglkd_prop, Q_prop, prev_action_prop, prediction_err_prop
                 ] = get_loglikelihood(sample_p, rewards, actions, choices,
                                       idx_blocks, t_idx + 1, apply_rep_bias,
                                       apply_weber_decision_noise,
                                       curiosity_bias, temperature)

                log_ratio                               = loglkd_prop - p_loglkd[idx_traj] \
                                                             + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p)

                log_ratio = np.minimum(log_ratio, 0)
                if (np.log(np.random.rand()) < log_ratio):
                    nb_acceptance += 1.
                    move_samples[n_idx] = sample_p
                    move_p_loglkd[n_idx] = loglkd_prop
                    Q_samples_move[n_idx] = Q_prop
                    prediction_err_move[n_idx] = prediction_err_prop
                else:
                    move_samples[n_idx] = samples[idx_traj]
                    move_p_loglkd[n_idx] = p_loglkd[idx_traj]
                    Q_samples_move[n_idx] = Q_samples[idx_traj]
                    prediction_err_move[n_idx] = prediction_err[idx_traj]

            print 'acceptance ratio %s' % str(nb_acceptance / nb_samples)
            assert (prev_action_prop == prev_action[0])

            acceptance_l.append(nb_acceptance / nb_samples)
            # move samples
            samples[:] = move_samples
            p_loglkd[:] = move_p_loglkd

            log_weights[:] = 0.
            Q_samples[:] = Q_samples_move
            prediction_err[:] = prediction_err_move

        if show_progress and t_idx % 10 == 0:
            weights_a[:] = uf.to_normalized_weights(log_weights)

            plt.subplot(3, 2, 1)
            plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2)
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Q values')

            if apply_rep_bias == 1:
                mean_rep = np.sum(weights_a * samples[:, 3])
                std_rep = np.sqrt(
                    np.sum(weights_a * samples[:, 3]**2) - mean_rep**2)
                plt.subplot(3, 2, 2)
                x = np.linspace(-2., 2., 5000)
                plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g')
                plt.hold(True)
                plt.plot([mean_rep, mean_rep],
                         plt.gca().get_ylim(),
                         'g',
                         linewidth=2)
                plt.hold(False)
                plt.xlabel('trials')
                plt.ylabel('rep param')

            if temperature:
                mean_beta = np.sum(weights_a * 1. / samples[:, 2])
                std_beta = np.sqrt(
                    np.sum(weights_a * ((1. / samples[:, 2])**2)) -
                    mean_beta**2)
            else:
                mean_beta = np.sum(weights_a * 10**samples[:, 2])
                std_beta = np.sqrt(
                    np.sum(weights_a * ((10**samples[:, 2])**2)) -
                    mean_beta**2)

            if apply_weber_decision_noise:
                mean_k = np.sum(weights_a * samples[:, 3])
                std_k = np.sqrt(
                    np.sum(weights_a * (samples[:, 3]**2)) - mean_k**2)

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, 200., 5000)
            plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2)
            plt.hold(True)
            plt.plot([mean_beta, mean_beta],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('beta softmax')
            plt.ylabel('pdf')

            mean_alpha_0 = np.sum(weights_a * samples[:, 0])
            std_alpha_0 = np.sqrt(
                np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2)
            mean_alpha_1 = np.sum(weights_a * samples[:, 1])
            std_alpha_1 = np.sqrt(
                np.sum(weights_a * (samples[:, 1]**2)) - mean_alpha_1**2)
            plt.subplot(3, 2, 4)
            x = np.linspace(0., 1., 5000)
            plt.plot(x,
                     norm.pdf(x, mean_alpha_0, std_alpha_0),
                     'm',
                     linewidth=2)
            plt.hold(True)
            plt.plot([mean_alpha_0, mean_alpha_0],
                     plt.gca().get_ylim(),
                     'm',
                     linewidth=2)
            plt.plot(x,
                     norm.pdf(x, mean_alpha_1, std_alpha_1),
                     'c',
                     linewidth=2)
            plt.plot([mean_alpha_1, mean_alpha_1],
                     plt.gca().get_ylim(),
                     'c',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('learning rate chosen (majenta) an unchosen (cyan)')
            plt.ylabel('pdf')

            plt.subplot(3, 2, 5)
            plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [nb_samples / 2, nb_samples / 2],
                     'b--',
                     linewidth=2)
            plt.axis([0, t_idx - 1, 0, nb_samples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ess')

            # modified here add the plot for k
            plt.subplot(3, 2, 6)
            x = np.linspace(0.01, 10., 5000)
            plt.plot(x, norm.pdf(x, mean_k, std_k), 'k', linewidth=2)
            plt.hold(True)
            plt.plot([mean_k, mean_k], plt.gca().get_ylim(), 'k', linewidth=2)
            plt.hold(False)
            plt.xlabel('scaling parameter for softmax 1/[0 1]')
            plt.ylabel('pdf')

            plt.draw()
            plt.show()
            plt.pause(0.05)

    return [
        samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd,
        marg_loglkd_l
    ]
示例#5
0
px = np.array(list(np.arange(0, 2, 0.02)))
for j in range(0, m):
    mj = 2 * j / m
    pphi[:, j] = ((px - mj) / s).reshape(100)

phi = 1 / (1 + np.exp(-phi))
pphi = 1 / (1 + np.exp(-pphi))

alpha = 10**(-6)
s0_inv = alpha * np.identity(3)
beta = 1

sn_inv = s0_inv + beta * phi.T.dot(phi)
sn = inv(sn_inv)
mn = sn.dot(beta * phi.T.dot(t[0:N]))
w = multi_norm(np.squeeze(mn), sn, 5)
plt.figure(figsize=(10, 8))
for i in range(5):
    pt = pphi.dot(w[i, :])
    plt.plot(px, pt, 'r')
plt.plot(x[0:N], t[0:N], 'ok', markerfacecolor='none')
pt_m = pphi.dot(mn)
plt.plot(px, pt_m, color='r')
plt.ylim(-1, 4)

sigma2 = 1 / beta + pphi.dot(sn).dot(pphi.T)
sigma = np.sqrt(sigma2)
sigma = np.diag(sigma)

plt.figure(figsize=(10, 8))
plt.plot(px, pt_m, 'r')
示例#6
0
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, model_id = 0):

	'''
	model_id = 0 : 1 alpha, 1 beta
	model_id = 1 : n alpha, 1 beta
	model_id = 2 : n alpha, n beta 
	'''

	actions       = np.asarray(actions, dtype=np.intc)
	rewards       = np.ascontiguousarray(rewards)
	nb_samples    = 1000
	T             = actions.shape[0]
	upp_bound_eta = 10.

	# sample initialisation
	if model_id == 2:
		n_alpha    = 6
		n_beta     = 6
		tau_unique = np.unique(tau)
		x_coor_a   = np.array([np.where(tau_unique == t)[0][0] for t in tau])	
		x_coor_b   = np.array([np.where(tau_unique == t)[0][0] for t in tau]) + n_alpha		
	elif model_id == 1:
		n_alpha    = 6
		n_beta     = 1
		tau_unique = np.unique(tau)
		x_coor_a   = np.array([np.where(tau_unique == t)[0][0] for t in tau])
		x_coor_b   = np.zeros(len(tau), dtype=np.int8) + n_alpha
	else:
		n_alpha    = 1
		n_beta     = 1
		x_coor_a   = np.zeros(len(tau), dtype=np.int8)
		x_coor_b   = np.zeros(len(tau), dtype=np.int8) + n_alpha

	n_theta = n_alpha + n_beta

	if apply_rep_bias:
		n_theta += 1

	samples                = np.random.rand(nb_samples, n_theta)
	if temperature:
		upp_bound_beta     = .6
	else:
		upp_bound_beta     = 2.

	samples[:, n_alpha:(n_beta + n_alpha)] = np.random.rand(nb_samples, n_beta) * upp_bound_beta

	if apply_rep_bias:
		samples[:, -1] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)

	
	Q_samples   = np.zeros([nb_samples, 2]) + .5
	prev_action = np.zeros(nb_samples) - 1

	# ibis param
	esslist       = np.zeros(T)
	log_weights   = np.zeros(nb_samples)
	weights_a     = np.zeros(nb_samples)
	p_loglkd      = np.zeros(nb_samples)
	loglkd        = np.zeros(nb_samples)
	marg_loglkd   = 0
	coefficient   = .5
	marg_loglkd_l = np.zeros(T)
	acceptance_l  = []

	# move step param
	move_samples = np.zeros([nb_samples, n_theta])
	move_p_loglkd    = np.zeros(nb_samples)
	Q_samples_move   = np.zeros([nb_samples, 2])
	prev_action_move = np.zeros(nb_samples)
	mean_Q           = np.zeros([T, 2])

	if show_progress : plt.figure(figsize=(15,9)); plt.suptitle("noiseless rl", fontsize=14); plt.ion()

	# loop
	for t_idx in range(T):
		#print t_idx

		if (t_idx+1) % 10 == 0 : sys.stdout.write(' ' + str(t_idx+1) + ' '); print 'marg_loglkd ' + str(marg_loglkd); 
		if (t_idx+1) % 100 == 0: print ('\n')
		# epsilon
		assert(len(np.unique(prev_action)) == 1)
		# update step
		weights_a[:] = log_weights

		for n_idx in range(nb_samples):
			alpha                  = samples[n_idx, x_coor_a[t_idx]]
			if temperature:
				beta                     = 1./samples[n_idx, x_coor_b[t_idx]]
			else:
				beta                     = 10**samples[n_idx, x_coor_b[t_idx]]
			if apply_rep_bias:
				eta                  = samples[n_idx, -1]
			if prev_action[n_idx] != -1 and apply_rep_bias:
				value              = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta))
				loglkd[n_idx]      = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx])))
				prev_action[n_idx] = actions[t_idx]
			else:
				value              = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
				loglkd[n_idx]      = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx])))
				prev_action[n_idx] = actions[t_idx]	
			
			if np.isnan(loglkd[n_idx]):
				print t_idx
				print n_idx
				print beta
				print value
				raise Exception

			p_loglkd[n_idx]          = p_loglkd[n_idx] + loglkd[n_idx]
			log_weights[n_idx]       = log_weights[n_idx] + loglkd[n_idx]

			if actions[t_idx] == 0:
				Q_samples[n_idx, 0]          = (1 - alpha) * Q_samples[n_idx, 0] + alpha * rewards[t_idx]
				Q_samples[n_idx, 1]          = (1 - alpha) * Q_samples[n_idx, 1] + alpha * (1 - rewards[t_idx])
			else:
				Q_samples[n_idx, 0]          = (1 - alpha) * Q_samples[n_idx, 0] + alpha * (1 - rewards[t_idx])
				Q_samples[n_idx, 1]          = (1 - alpha) * Q_samples[n_idx, 1] + alpha * rewards[t_idx]

		marg_loglkd         += logsumexp(weights_a + loglkd) - logsumexp(weights_a)
		marg_loglkd_l[t_idx] = marg_loglkd
		ess                  = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights))
		esslist[t_idx]       = ess

		weights_a[:]         = uf.to_normalized_weights(log_weights)
		mean_Q[t_idx]        = np.sum((Q_samples.T * weights_a).T, axis=0)

		# move step
		if ess < coefficient * nb_samples:
			idxTrajectories = uf.stratified_resampling(weights_a)
			mu_p            = np.sum(samples.T * weights_a, axis=1)
			Sigma_p         = np.dot((samples - mu_p).T * weights_a, (samples - mu_p))
			nb_acceptance   = 0.

			for n_idx in range(nb_samples):
				idx_traj = idxTrajectories[n_idx]
				while True:
					sample_p = multi_norm(mu_p, Sigma_p)
					if not apply_rep_bias:
						if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta):
							break
					else:
						if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta) and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta:
							break
				[loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor_a, x_coor_b, rewards, actions, t_idx + 1, apply_rep_bias, temperature) 
				log_ratio                               = loglkd_prop - p_loglkd[idx_traj] \
													         + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p)

				log_ratio = np.minimum(log_ratio, 0)
				if (np.log(np.random.rand()) < log_ratio):
					nb_acceptance          += 1.
					move_samples[n_idx]     = sample_p
					move_p_loglkd[n_idx]    = loglkd_prop
					Q_samples_move[n_idx]   = Q_prop
				else:
					move_samples[n_idx]     = samples[idx_traj]
					move_p_loglkd[n_idx]    = p_loglkd[idx_traj]
					Q_samples_move[n_idx]   = Q_samples[idx_traj]

			print 'acceptance ratio %s'%str(nb_acceptance/nb_samples)
			assert(prev_action_prop == prev_action[0])

			acceptance_l.append(nb_acceptance/nb_samples)
			# move samples
			samples[:]     = move_samples
			p_loglkd[:]    = move_p_loglkd
			log_weights[:] = 0.
			Q_samples[:]   = Q_samples_move

		if show_progress and t_idx%10==0 :
			weights_a[:]    = uf.to_normalized_weights(log_weights)

			plt.subplot(3,2,1)
			plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2);
			plt.hold(False)
			plt.xlabel('trials')
			plt.ylabel('Q values')

			if apply_rep_bias == 1:
				mean_rep = np.sum(weights_a * samples[:,2])
				std_rep  = np.sqrt(np.sum(weights_a * samples[:,2]**2) - mean_rep**2)
				plt.subplot(3,2,2)
				x = np.linspace(-2.,2.,5000)
				plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g'); plt.hold(True)
				plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(),'g', linewidth=2)
				plt.hold(False)
				plt.xlabel('trials')
				plt.ylabel('rep param')

			if temperature:
				mean_beta = np.sum(weights_a * 1./samples[:, 1])
				std_beta  = np.sqrt(np.sum(weights_a * ((1./samples[:,1])**2)) - mean_beta**2)
			else:
				mean_beta = np.sum(weights_a * 10**samples[:, 1])
				std_beta  = np.sqrt(np.sum(weights_a * ((10**samples[:,1])**2)) - mean_beta**2)
			plt.subplot(3,2,3)
			x = np.linspace(0.01,200.,5000)
			plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2); plt.hold(True)
			plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2)
			plt.hold(False)
			plt.xlabel('beta softmax')
			plt.ylabel('pdf')

			mean_alpha_0 = np.sum(weights_a * samples[:, 0])
			std_alpha_0  = np.sqrt(np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2)
			plt.subplot(3,2,4)
			x = np.linspace(0.,1.,5000)
			plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2); plt.hold(True)
			plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2)
			plt.hold(False)
			plt.xlabel('learning rate (majenta)')
			plt.ylabel('pdf')

			plt.subplot(3,2,5)
			plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2); plt.hold(True)
			plt.plot(plt.gca().get_xlim(), [nb_samples/2,  nb_samples/2],'b--', linewidth=2)
			plt.axis([0, t_idx-1, 0, nb_samples]) # For speed
			plt.hold(False)
			plt.xlabel('trials')
			plt.ylabel('ess')

			plt.draw()
			plt.show()
			plt.pause(0.05)
		
	return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, n_alpha_model=False):

	actions    = np.asarray(actions, dtype=np.intc)
	rewards    = np.ascontiguousarray(rewards)
	nb_samples = 1000
	T          = actions.shape[0]
	upp_bound_eta = 10.

	nb_acceptance  = 0
	# sample initialisation
	if n_alpha_model:
		n_alpha = 6
		tau_unique  = np.unique(tau)
		x_coor      = np.array([np.where(tau_unique == t)[0][0] for t in tau])
	else:
		n_alpha = 1
		x_coor  = np.zeros(len(tau), dtype=np.int8)

	if apply_rep_bias:
		samples                = np.random.rand(n_alpha + 2)
		if temperature:
			upp_bound_beta     = np.sqrt(6)/(np.pi * 5)
		else:
			upp_bound_beta     = 2.
		n_index_beta              = n_alpha
		samples[n_index_beta]     = upp_bound_beta/2.
		samples[n_index_beta + 1] = upp_bound_eta * (np.random.rand() * 2. - 1.)
	else:
		samples                = np.zeros(n_alpha + 1) + .5
		if temperature:
			upp_bound_beta     = np.sqrt(6)/(np.pi * 5)
		else:
			upp_bound_beta     = 2.
		n_index_beta      = n_alpha
		samples[-1]    = upp_bound_beta/2.
	
	all_samples    = np.zeros([nb_samples, len(samples)])
	all_samples[0] = samples
	lkd            = get_loglikelihood(samples, x_coor, rewards, actions, T, apply_rep_bias, temperature)[0]

	# loop
	for n_idx in range(nb_samples):
		Sigma_p  = 1e-2 * np.eye(len(samples))
		Sigma_p[-1][-1] = 1e-3
		while True:
			sample_p = multi_norm(samples, Sigma_p)
			if not apply_rep_bias:
				if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and sample_p[n_alpha] > 0 and sample_p[n_alpha] <= upp_bound_beta:
					break
			else:
				if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and sample_p[n_alpha] > 0 and sample_p[n_alpha] <= upp_bound_beta and sample_p[n_alpha + 1] > -upp_bound_eta and sample_p[n_alpha + 1] < upp_bound_eta:
					break

		[loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor, rewards, actions, T, apply_rep_bias, temperature) 
		log_ratio                               = loglkd_prop - lkd 

		log_ratio = np.minimum(log_ratio, 0)
		if (np.log(np.random.rand()) < log_ratio):
			nb_acceptance          += 1.
			all_samples[n_idx]     = sample_p
			lkd                    = loglkd_prop
			samples                = sample_p
		else:
			all_samples[n_idx]     = samples

	print('acception ratio is {0}'.format(nb_acceptance/nb_samples))

	return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]