agent1 = agents[i] agent2 = agents[j] if i == j: cov_mat[i][j] = variances[agents[i]] elif (i != j) and (agent1 == agent2): cov_mat[i][j] = intra_covs[agent1 + agent2] cov_mat[j][i] = intra_covs[agent1 + agent2] else: cov_mat[i][j] = inter_covs[''.join(sorted(agent1 + agent2))] cov_mat[j][i] = inter_covs[''.join(sorted(agent1 + agent2))] cov_mat = np.dot(np.matrix(cov_mat), np.matrix(cov_mat).T) print(cov_mat) means = [V for i in range(num_agents)] draw = multi_norm(means, cov_mat) cov_mat = cov_mat.tolist() variances = { "A": cov_mat[0][0], "B": cov_mat[len(cov_mat) - 1][len(cov_mat) - 1] } intra_covs = { "AA": cov_mat[0][1], "BB": cov_mat[len(cov_mat) - 1][len(cov_mat) - 2] } inter_covs = { "AB": cov_mat[type_populations["A"] - 1][type_populations["A"] + 1]
from numpy.random import multivariate_normal as multi_norm cov = [[0.9, 0.7, 0.4], [0.7, 0.8, 0.3], [0.4, 0.3, 0.7]] means = [0, 0, 0] print(multi_norm(means, cov))
def smc2(actions, rewards, idx_blocks, choices, subj_idx, show_progress, apply_rep, apply_weber, beta_softmax, temperature, observational_noise): assert (2 not in actions) assert (0 in actions) assert (1 in actions) assert (apply_rep == 0 or apply_rep == 1) assert (apply_weber == 0 or apply_weber == 1) # Extract parameters from task description actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) idx_blocks = np.asarray(idx_blocks, dtype=np.intc) N_samples = 1000 n_theta = 1000 coefficient = .5 T = actions.shape[0] prev_action = -1 upp_bound_eta = 10. if apply_rep: n_param = 5 else: n_param = 4 if apply_weber == 1: upp_bound_eps = 1. else: upp_bound_eps = .5 # samples samples = np.random.rand(n_theta, n_param) if beta_softmax > 0: temperature = False samples[:, 2] = beta_softmax sample_beta = False upp_bound_beta = beta_softmax else: if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(n_theta) * upp_bound_beta sample_beta = True samples[:, 3] = np.random.rand(n_theta) * upp_bound_eps if apply_rep: samples[:, 4] = (2 * np.random.rand(n_theta) - 1) * upp_bound_eta # variable memory noisy_descendants = np.zeros([n_theta, N_samples, 2]) noisy_ancestors = np.zeros([n_theta, N_samples, 2]) weights_norm = np.zeros([n_theta, N_samples]) log_weights_a = np.zeros([n_theta]) ancestorsIndexes = np.ascontiguousarray(np.zeros(n_theta, dtype=np.intc)) logThetaWeights = np.zeros(n_theta) logThetalkd = np.zeros(n_theta) log_lkd = np.zeros(n_theta) essList = np.zeros(T) acceptance_list = [] marg_loglkd = 0 #move step variables ancestors_indexes_p = np.ascontiguousarray( np.zeros(N_samples, dtype=np.intc)) samples_new = np.zeros([n_theta, n_param]) weights_new = np.zeros([n_theta, N_samples]) states_new = np.zeros([n_theta, N_samples, 2]) logThetalkd_new = np.zeros(n_theta) state_candidates = np.zeros([N_samples, 2]) state_candidates_a = np.zeros([N_samples, 2]) weights_candidates = np.zeros(N_samples) # history of samples noisy_history = np.zeros([T, 2]) if show_progress: plt.figure(figsize=(15, 9)) plt.suptitle("noisy rl", fontsize=14) plt.ion() for t_idx in range(T): # Print progress if (t_idx + 1) % 10 == 0: sys.stdout.write(' ' + str(t_idx + 1)) sys.stdout.flush() print ' marg_loglkd ' + str(marg_loglkd) prev_rew = np.ascontiguousarray(rewards[:, max(0, t_idx - 1)]) log_weights_a[:] = logThetaWeights if t_idx > 0 and choices[t_idx - 1]: assert (actions[max(0, t_idx - 1)] == prev_action) smc_c.smc_update_2q_c(log_lkd, logThetalkd, noisy_descendants, noisy_ancestors, weights_norm, logThetaWeights, ancestorsIndexes, samples, \ idx_blocks, choices, prev_action, actions, prev_rew, t_idx, apply_rep, apply_weber, 2, temperature, observational_noise) # save and update marg_loglkd += logsumexp(log_weights_a + log_lkd) - logsumexp(log_weights_a) normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights) noisy_history[t_idx] = np.sum((normalisedThetaWeights * np.sum( np.transpose(weights_norm * noisy_descendants.T), axis=1).T), axis=1) # Degeneray criterion logEss = 2 * uf.log_sum(logThetaWeights) - uf.log_sum( 2 * logThetaWeights) essList[t_idx] = np.exp(logEss) # update repetition action if choices[t_idx] == 1: prev_action = actions[t_idx] # Move step if (essList[t_idx] < coefficient * n_theta): acceptance_proba = 0 if not sample_beta: samples_tmp = np.delete(samples, 2, axis=1) mu_p = np.sum(samples_tmp.T * normalisedThetaWeights, axis=1) Sigma_p = np.dot( (samples_tmp - mu_p).T * normalisedThetaWeights, (samples_tmp - mu_p)) else: mu_p = np.sum(samples.T * normalisedThetaWeights, axis=1) Sigma_p = np.dot((samples - mu_p).T * normalisedThetaWeights, (samples - mu_p)) ancestorsIndexes[:] = uf.stratified_resampling( normalisedThetaWeights) for theta_idx in range(n_theta): idx_traj = ancestorsIndexes[theta_idx] while True: sample_cand = np.array(samples[idx_traj]) sample_p = multi_norm(mu_p, Sigma_p) sample_p_copy = np.array(sample_p) if (not sample_beta) and apply_rep: sample_p = np.array([ sample_p[0], sample_p[1], beta_softmax, sample_p[2], sample_p[3] ]) sample_cand = np.delete(sample_cand, 2) elif not sample_beta: sample_p = np.array([ sample_p[0], sample_p[1], beta_softmax, sample_p[2] ]) sample_cand = np.delete(sample_cand, 2) if apply_rep: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \ and sample_p[3] > 0 and sample_p[3] < upp_bound_eps and sample_p[4] > -upp_bound_eta and sample_p[4] < upp_bound_eta: break else: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \ and sample_p[3] > 0 and sample_p[3] < upp_bound_eps: break # Launch SMC logmarglkd_p = smc_c.smc_2q_c(state_candidates, state_candidates_a, weights_candidates, sample_p, ancestors_indexes_p, \ idx_blocks, actions, rewards, choices, t_idx + 1, apply_rep, apply_weber, 2, temperature, observational_noise) logAlpha = np.minimum(0, logmarglkd_p - logThetalkd[idx_traj] \ + get_logtruncnorm(sample_cand, mu_p, Sigma_p) - get_logtruncnorm(sample_p_copy, mu_p, Sigma_p) ) # accept or reject if np.log(np.random.rand()) < logAlpha: acceptance_proba += 1. samples_new[theta_idx] = sample_p weights_new[theta_idx] = weights_candidates states_new[theta_idx] = state_candidates logThetalkd_new[theta_idx] = logmarglkd_p else: samples_new[theta_idx] = samples[idx_traj] weights_new[theta_idx] = weights_norm[idx_traj] states_new[theta_idx] = noisy_descendants[idx_traj] logThetalkd_new[theta_idx] = logThetalkd[idx_traj] print('\n') print('acceptance ratio is ') print(acceptance_proba / n_theta) print('\n') acceptance_list.append(acceptance_proba / n_theta) weights_norm[:] = weights_new logThetalkd[:] = logThetalkd_new logThetaWeights[:] = np.zeros(n_theta) noisy_descendants[:] = states_new samples[:] = samples_new normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights) if show_progress and t_idx % 10: plt.subplot(3, 2, 1) plt.plot(range(t_idx), noisy_history[:t_idx, 0], 'r') plt.hold(True) plt.plot(range(t_idx), noisy_history[:t_idx, 1], 'b') plt.hold(False) plt.xlabel('trials') plt.ylabel('Q-value 0 (red), and 1 (blue)') plt.subplot(3, 2, 4) plt.plot(range(t_idx), essList[:t_idx], 'b', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [n_theta / 2, n_theta / 2], 'b--', linewidth=2) plt.axis([0, t_idx - 1, 0, n_theta]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') if temperature: mean_beta = np.sum(normalisedThetaWeights * (1. / samples[:, 2])) std_beta = np.sqrt( np.sum(normalisedThetaWeights * (1. / samples[:, 2])**2) - mean_beta**2) x = np.linspace(0., 200, 5000) else: mean_beta = np.sum(normalisedThetaWeights * (10**samples[:, 2])) std_beta = np.sqrt( np.sum(normalisedThetaWeights * (10**samples[:, 2])**2) - mean_beta**2) x = np.linspace(0., 10**upp_bound_beta, 5000) plt.subplot(3, 2, 3) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g') plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(normalisedThetaWeights * samples[:, 0]) std_alpha_0 = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 0]**2) - mean_alpha_0**2) mean_alpha_1 = np.sum(normalisedThetaWeights * samples[:, 1]) std_alpha_1 = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 1]**2) - mean_alpha_1**2) plt.subplot(3, 2, 2) x = np.linspace(0., 1., 5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm') plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm') plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c') plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c') plt.hold(False) plt.xlabel('learning rates') plt.ylabel('pdf') mean_epsilon = np.sum(normalisedThetaWeights * samples[:, 3]) std_epsilon = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 3]**2) - mean_epsilon**2) plt.subplot(3, 2, 6) x = np.linspace(0., upp_bound_eps, 5000) if apply_rep == 1: mean_rep = np.sum(normalisedThetaWeights * samples[:, 4]) std_rep = np.sqrt( np.sum(normalisedThetaWeights * samples[:, 4]**2) - mean_rep**2) x = np.linspace(-2., 2., 5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'y') plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(), 'y', linewidth=2) plt.plot(x, norm.pdf(x, mean_epsilon, std_epsilon), 'g') plt.hold(True) plt.plot([mean_epsilon, mean_epsilon], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('epsilon std (green), rep_bias (yellow)') plt.ylabel('pdf') plt.draw() plt.show() plt.pause(0.05) return [ samples, noisy_history, acceptance_list, normalisedThetaWeights, logThetalkd, marg_loglkd ]
def ibis(actions, rewards, choices, idx_blocks, subj_idx, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, show_progress, temperature): assert (2 not in actions) assert (0 in actions) assert (1 in actions) actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) choices = np.asarray(choices, dtype=np.intc) idx_blocks = np.asarray(idx_blocks, dtype=np.intc) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. # sample initialisation if (apply_rep_bias or curiosity_bias) and apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 4) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta samples[:, 3] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) elif apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 3) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta elif apply_weber_decision_noise == 1: if apply_rep_bias: samples = np.random.rand(nb_samples, 5) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. samples[:, 4] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) else: samples = np.random.rand(nb_samples, 4) if temperature: upp_bound_beta = np.sqrt(6) / (np.pi * 5) else: upp_bound_beta = 2. upp_bound_k = 10 samples[:, 2] = np.random.rand( nb_samples) * upp_bound_beta # bound on the beta samples[:, 3] = np.random.rand(nb_samples) * upp_bound_k Q_samples = np.zeros([nb_samples, 2]) prev_action = np.zeros(nb_samples) - 1 # ibis param esslist = np.zeros(T) log_weights = np.zeros(nb_samples) weights_a = np.zeros(nb_samples) p_loglkd = np.zeros(nb_samples) loglkd = np.zeros(nb_samples) marg_loglkd = 0 coefficient = .5 marg_loglkd_l = np.zeros(T) acceptance_l = [] # move step param if apply_rep_bias and apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 5]) elif apply_rep_bias or curiosity_bias: move_samples = np.zeros([nb_samples, 4]) elif apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 4]) else: move_samples = np.zeros([nb_samples, 3]) move_p_loglkd = np.zeros(nb_samples) Q_samples_move = np.zeros([nb_samples, 2]) prev_action_move = np.zeros(nb_samples) mean_Q = np.zeros([T, 2]) prediction_err = np.zeros(nb_samples) prediction_err[:] = -np.inf prediction_err_move = np.zeros(nb_samples) if show_progress: plt.figure(figsize=(15, 9)) plt.suptitle("noiseless rl", fontsize=14) plt.ion() # loop for t_idx in range(T): if (t_idx + 1) % 10 == 0: sys.stdout.write(' ' + str(t_idx + 1) + ' ') print 'marg_loglkd ' + str(marg_loglkd) if (t_idx + 1) % 100 == 0: print('\n') assert (len(np.unique(prev_action)) == 1) # update step weights_a[:] = log_weights if idx_blocks[t_idx]: Q_samples[:] = 0.5 prev_action[:] = -1 # loop over samples for n_idx in range(nb_samples): alpha_c = samples[n_idx, 0] alpha_u = samples[n_idx, 1] if temperature: beta = 1. / samples[n_idx, 2] else: beta = 10**samples[n_idx, 2] if apply_rep_bias or curiosity_bias: eta = samples[n_idx, -1] if apply_weber_decision_noise: k_beta = samples[n_idx, 3] # reweighting if choices[t_idx] == 1 and prev_action[n_idx] != -1 and ( apply_rep_bias == 1 or curiosity_bias) and apply_weber_decision_noise == 0: if apply_rep_bias: value = 1. / ( 1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif curiosity_bias: try: count_samples = t_idx - 1 - np.where( actions[:t_idx] != actions[t_idx - 1])[0][-1] except: count_samples = t_idx assert (count_samples > 0) value = 1. / (1. + np.exp( beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) + np.sign(prev_action[n_idx] - .5) * eta * count_samples) ) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[t_idx] == 1 and apply_weber_decision_noise == 0: value = 1. / ( 1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[ t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 0: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1. / ( 1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[ t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 1: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1. / ( 1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] else: value = 1. loglkd[n_idx] = 0. if np.isnan(loglkd[n_idx]): print t_idx print n_idx print beta print value raise Exception p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx] log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx] # update step if actions[t_idx] == 0: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 0] - rewards[0, t_idx]) Q_samples[n_idx, 0] = (1 - alpha_c) * Q_samples[ n_idx, 0] + alpha_c * rewards[0, t_idx] if not curiosity_bias: Q_samples[n_idx, 1] = (1 - alpha_u) * Q_samples[ n_idx, 1] + alpha_u * rewards[1, t_idx] else: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 1] - rewards[1, t_idx]) if not curiosity_bias: Q_samples[n_idx, 0] = (1 - alpha_u) * Q_samples[ n_idx, 0] + alpha_u * rewards[0, t_idx] Q_samples[n_idx, 1] = (1 - alpha_c) * Q_samples[ n_idx, 1] + alpha_c * rewards[1, t_idx] marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a) marg_loglkd_l[t_idx] = marg_loglkd ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights)) esslist[t_idx] = ess weights_a[:] = uf.to_normalized_weights(log_weights) mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0) # move step if ess < coefficient * nb_samples: idxTrajectories = uf.stratified_resampling(weights_a) mu_p = np.sum(samples.T * weights_a, axis=1) Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p)) nb_acceptance = 0. for n_idx in range(nb_samples): idx_traj = idxTrajectories[n_idx] while True: sample_p = multi_norm(mu_p, Sigma_p) if not apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[ 1] > 0 and sample_p[1] < 1 and sample_p[ 2] > 0 and sample_p[2] <= upp_bound_beta: break elif not apply_rep_bias and apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] <= upp_bound_k: break elif apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > -upp_bound_eta and sample_p[3] < upp_bound_eta: break else: if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] < upp_bound_k \ and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop, prediction_err_prop ] = get_loglikelihood(sample_p, rewards, actions, choices, idx_blocks, t_idx + 1, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, temperature) log_ratio = loglkd_prop - p_loglkd[idx_traj] \ + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p) log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. move_samples[n_idx] = sample_p move_p_loglkd[n_idx] = loglkd_prop Q_samples_move[n_idx] = Q_prop prediction_err_move[n_idx] = prediction_err_prop else: move_samples[n_idx] = samples[idx_traj] move_p_loglkd[n_idx] = p_loglkd[idx_traj] Q_samples_move[n_idx] = Q_samples[idx_traj] prediction_err_move[n_idx] = prediction_err[idx_traj] print 'acceptance ratio %s' % str(nb_acceptance / nb_samples) assert (prev_action_prop == prev_action[0]) acceptance_l.append(nb_acceptance / nb_samples) # move samples samples[:] = move_samples p_loglkd[:] = move_p_loglkd log_weights[:] = 0. Q_samples[:] = Q_samples_move prediction_err[:] = prediction_err_move if show_progress and t_idx % 10 == 0: weights_a[:] = uf.to_normalized_weights(log_weights) plt.subplot(3, 2, 1) plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('Q values') if apply_rep_bias == 1: mean_rep = np.sum(weights_a * samples[:, 3]) std_rep = np.sqrt( np.sum(weights_a * samples[:, 3]**2) - mean_rep**2) plt.subplot(3, 2, 2) x = np.linspace(-2., 2., 5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g') plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('rep param') if temperature: mean_beta = np.sum(weights_a * 1. / samples[:, 2]) std_beta = np.sqrt( np.sum(weights_a * ((1. / samples[:, 2])**2)) - mean_beta**2) else: mean_beta = np.sum(weights_a * 10**samples[:, 2]) std_beta = np.sqrt( np.sum(weights_a * ((10**samples[:, 2])**2)) - mean_beta**2) if apply_weber_decision_noise: mean_k = np.sum(weights_a * samples[:, 3]) std_k = np.sqrt( np.sum(weights_a * (samples[:, 3]**2)) - mean_k**2) plt.subplot(3, 2, 3) x = np.linspace(0.01, 200., 5000) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2) plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(weights_a * samples[:, 0]) std_alpha_0 = np.sqrt( np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2) mean_alpha_1 = np.sum(weights_a * samples[:, 1]) std_alpha_1 = np.sqrt( np.sum(weights_a * (samples[:, 1]**2)) - mean_alpha_1**2) plt.subplot(3, 2, 4) x = np.linspace(0., 1., 5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2) plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2) plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c', linewidth=2) plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c', linewidth=2) plt.hold(False) plt.xlabel('learning rate chosen (majenta) an unchosen (cyan)') plt.ylabel('pdf') plt.subplot(3, 2, 5) plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2) plt.hold(True) plt.plot(plt.gca().get_xlim(), [nb_samples / 2, nb_samples / 2], 'b--', linewidth=2) plt.axis([0, t_idx - 1, 0, nb_samples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') # modified here add the plot for k plt.subplot(3, 2, 6) x = np.linspace(0.01, 10., 5000) plt.plot(x, norm.pdf(x, mean_k, std_k), 'k', linewidth=2) plt.hold(True) plt.plot([mean_k, mean_k], plt.gca().get_ylim(), 'k', linewidth=2) plt.hold(False) plt.xlabel('scaling parameter for softmax 1/[0 1]') plt.ylabel('pdf') plt.draw() plt.show() plt.pause(0.05) return [ samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l ]
px = np.array(list(np.arange(0, 2, 0.02))) for j in range(0, m): mj = 2 * j / m pphi[:, j] = ((px - mj) / s).reshape(100) phi = 1 / (1 + np.exp(-phi)) pphi = 1 / (1 + np.exp(-pphi)) alpha = 10**(-6) s0_inv = alpha * np.identity(3) beta = 1 sn_inv = s0_inv + beta * phi.T.dot(phi) sn = inv(sn_inv) mn = sn.dot(beta * phi.T.dot(t[0:N])) w = multi_norm(np.squeeze(mn), sn, 5) plt.figure(figsize=(10, 8)) for i in range(5): pt = pphi.dot(w[i, :]) plt.plot(px, pt, 'r') plt.plot(x[0:N], t[0:N], 'ok', markerfacecolor='none') pt_m = pphi.dot(mn) plt.plot(px, pt_m, color='r') plt.ylim(-1, 4) sigma2 = 1 / beta + pphi.dot(sn).dot(pphi.T) sigma = np.sqrt(sigma2) sigma = np.diag(sigma) plt.figure(figsize=(10, 8)) plt.plot(px, pt_m, 'r')
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, model_id = 0): ''' model_id = 0 : 1 alpha, 1 beta model_id = 1 : n alpha, 1 beta model_id = 2 : n alpha, n beta ''' actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. # sample initialisation if model_id == 2: n_alpha = 6 n_beta = 6 tau_unique = np.unique(tau) x_coor_a = np.array([np.where(tau_unique == t)[0][0] for t in tau]) x_coor_b = np.array([np.where(tau_unique == t)[0][0] for t in tau]) + n_alpha elif model_id == 1: n_alpha = 6 n_beta = 1 tau_unique = np.unique(tau) x_coor_a = np.array([np.where(tau_unique == t)[0][0] for t in tau]) x_coor_b = np.zeros(len(tau), dtype=np.int8) + n_alpha else: n_alpha = 1 n_beta = 1 x_coor_a = np.zeros(len(tau), dtype=np.int8) x_coor_b = np.zeros(len(tau), dtype=np.int8) + n_alpha n_theta = n_alpha + n_beta if apply_rep_bias: n_theta += 1 samples = np.random.rand(nb_samples, n_theta) if temperature: upp_bound_beta = .6 else: upp_bound_beta = 2. samples[:, n_alpha:(n_beta + n_alpha)] = np.random.rand(nb_samples, n_beta) * upp_bound_beta if apply_rep_bias: samples[:, -1] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) Q_samples = np.zeros([nb_samples, 2]) + .5 prev_action = np.zeros(nb_samples) - 1 # ibis param esslist = np.zeros(T) log_weights = np.zeros(nb_samples) weights_a = np.zeros(nb_samples) p_loglkd = np.zeros(nb_samples) loglkd = np.zeros(nb_samples) marg_loglkd = 0 coefficient = .5 marg_loglkd_l = np.zeros(T) acceptance_l = [] # move step param move_samples = np.zeros([nb_samples, n_theta]) move_p_loglkd = np.zeros(nb_samples) Q_samples_move = np.zeros([nb_samples, 2]) prev_action_move = np.zeros(nb_samples) mean_Q = np.zeros([T, 2]) if show_progress : plt.figure(figsize=(15,9)); plt.suptitle("noiseless rl", fontsize=14); plt.ion() # loop for t_idx in range(T): #print t_idx if (t_idx+1) % 10 == 0 : sys.stdout.write(' ' + str(t_idx+1) + ' '); print 'marg_loglkd ' + str(marg_loglkd); if (t_idx+1) % 100 == 0: print ('\n') # epsilon assert(len(np.unique(prev_action)) == 1) # update step weights_a[:] = log_weights for n_idx in range(nb_samples): alpha = samples[n_idx, x_coor_a[t_idx]] if temperature: beta = 1./samples[n_idx, x_coor_b[t_idx]] else: beta = 10**samples[n_idx, x_coor_b[t_idx]] if apply_rep_bias: eta = samples[n_idx, -1] if prev_action[n_idx] != -1 and apply_rep_bias: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] else: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] if np.isnan(loglkd[n_idx]): print t_idx print n_idx print beta print value raise Exception p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx] log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx] if actions[t_idx] == 0: Q_samples[n_idx, 0] = (1 - alpha) * Q_samples[n_idx, 0] + alpha * rewards[t_idx] Q_samples[n_idx, 1] = (1 - alpha) * Q_samples[n_idx, 1] + alpha * (1 - rewards[t_idx]) else: Q_samples[n_idx, 0] = (1 - alpha) * Q_samples[n_idx, 0] + alpha * (1 - rewards[t_idx]) Q_samples[n_idx, 1] = (1 - alpha) * Q_samples[n_idx, 1] + alpha * rewards[t_idx] marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a) marg_loglkd_l[t_idx] = marg_loglkd ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights)) esslist[t_idx] = ess weights_a[:] = uf.to_normalized_weights(log_weights) mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0) # move step if ess < coefficient * nb_samples: idxTrajectories = uf.stratified_resampling(weights_a) mu_p = np.sum(samples.T * weights_a, axis=1) Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p)) nb_acceptance = 0. for n_idx in range(nb_samples): idx_traj = idxTrajectories[n_idx] while True: sample_p = multi_norm(mu_p, Sigma_p) if not apply_rep_bias: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta): break else: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta) and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor_a, x_coor_b, rewards, actions, t_idx + 1, apply_rep_bias, temperature) log_ratio = loglkd_prop - p_loglkd[idx_traj] \ + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p) log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. move_samples[n_idx] = sample_p move_p_loglkd[n_idx] = loglkd_prop Q_samples_move[n_idx] = Q_prop else: move_samples[n_idx] = samples[idx_traj] move_p_loglkd[n_idx] = p_loglkd[idx_traj] Q_samples_move[n_idx] = Q_samples[idx_traj] print 'acceptance ratio %s'%str(nb_acceptance/nb_samples) assert(prev_action_prop == prev_action[0]) acceptance_l.append(nb_acceptance/nb_samples) # move samples samples[:] = move_samples p_loglkd[:] = move_p_loglkd log_weights[:] = 0. Q_samples[:] = Q_samples_move if show_progress and t_idx%10==0 : weights_a[:] = uf.to_normalized_weights(log_weights) plt.subplot(3,2,1) plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2); plt.hold(False) plt.xlabel('trials') plt.ylabel('Q values') if apply_rep_bias == 1: mean_rep = np.sum(weights_a * samples[:,2]) std_rep = np.sqrt(np.sum(weights_a * samples[:,2]**2) - mean_rep**2) plt.subplot(3,2,2) x = np.linspace(-2.,2.,5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g'); plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(),'g', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('rep param') if temperature: mean_beta = np.sum(weights_a * 1./samples[:, 1]) std_beta = np.sqrt(np.sum(weights_a * ((1./samples[:,1])**2)) - mean_beta**2) else: mean_beta = np.sum(weights_a * 10**samples[:, 1]) std_beta = np.sqrt(np.sum(weights_a * ((10**samples[:,1])**2)) - mean_beta**2) plt.subplot(3,2,3) x = np.linspace(0.01,200.,5000) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2); plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(weights_a * samples[:, 0]) std_alpha_0 = np.sqrt(np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2) plt.subplot(3,2,4) x = np.linspace(0.,1.,5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2); plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2) plt.hold(False) plt.xlabel('learning rate (majenta)') plt.ylabel('pdf') plt.subplot(3,2,5) plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2); plt.hold(True) plt.plot(plt.gca().get_xlim(), [nb_samples/2, nb_samples/2],'b--', linewidth=2) plt.axis([0, t_idx-1, 0, nb_samples]) # For speed plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') plt.draw() plt.show() plt.pause(0.05) return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, n_alpha_model=False): actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. nb_acceptance = 0 # sample initialisation if n_alpha_model: n_alpha = 6 tau_unique = np.unique(tau) x_coor = np.array([np.where(tau_unique == t)[0][0] for t in tau]) else: n_alpha = 1 x_coor = np.zeros(len(tau), dtype=np.int8) if apply_rep_bias: samples = np.random.rand(n_alpha + 2) if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. n_index_beta = n_alpha samples[n_index_beta] = upp_bound_beta/2. samples[n_index_beta + 1] = upp_bound_eta * (np.random.rand() * 2. - 1.) else: samples = np.zeros(n_alpha + 1) + .5 if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. n_index_beta = n_alpha samples[-1] = upp_bound_beta/2. all_samples = np.zeros([nb_samples, len(samples)]) all_samples[0] = samples lkd = get_loglikelihood(samples, x_coor, rewards, actions, T, apply_rep_bias, temperature)[0] # loop for n_idx in range(nb_samples): Sigma_p = 1e-2 * np.eye(len(samples)) Sigma_p[-1][-1] = 1e-3 while True: sample_p = multi_norm(samples, Sigma_p) if not apply_rep_bias: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and sample_p[n_alpha] > 0 and sample_p[n_alpha] <= upp_bound_beta: break else: if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and sample_p[n_alpha] > 0 and sample_p[n_alpha] <= upp_bound_beta and sample_p[n_alpha + 1] > -upp_bound_eta and sample_p[n_alpha + 1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor, rewards, actions, T, apply_rep_bias, temperature) log_ratio = loglkd_prop - lkd log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. all_samples[n_idx] = sample_p lkd = loglkd_prop samples = sample_p else: all_samples[n_idx] = samples print('acception ratio is {0}'.format(nb_acceptance/nb_samples)) return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]