def test_passreward(env_name='PerceptualDecisionMaking-v0', num_steps=1000, verbose=False): """ Test pass-reward wrapper. TODO: explain wrapper Parameters ---------- env_name : str, optional enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'. num_steps : int, optional number of steps to run the environment (1000) verbose : boolean, optional whether to print observation and reward (False) Returns ------- None. """ env = gym.make(env_name) env = PassReward(env) obs = env.reset() for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) assert obs[-1] == rew, 'Previous reward is not part of observation' if verbose: print(obs) print(rew) print('--------') if done: env.reset()
def test_passreward(env_name, num_steps=10000, verbose=False, **envArgs): env = gym.make(env_name, **envArgs) env = PassReward(env) obs = env.reset() for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) if verbose: print(obs) print(rew) print('--------') if done: env.reset()
def test_concat_wrpprs_th_vch_pssr_pssa(env_name, num_steps=100000, probs=0.8, num_blocks=16, verbose=False, num_ch=6, variable_nch=True): env = gym.make(env_name, **{'n_ch': num_ch}) env = TrialHistory(env, probs=probs, rand_blcks=True, blk_ch_prob=0.001) # using random blocks! if variable_nch: env = Variable_nch(env, block_nch=1000, blocks_probs=[0.2, 0.2, 0.2, 0.2, 0.2]) transitions = np.zeros((num_blocks, num_ch, num_ch)) else: transitions = np.zeros((num_blocks, num_ch, num_ch)) env = PassReward(env) env = PassAction(env) env.reset() num_tr_blks = np.zeros((num_blocks,)) blk_id = [] blk = [] gt = [] nch = [] prev_gt = 1 for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) if done: env.reset() if info['new_trial'] and verbose: blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks) # print(info['curr_block']) # print('-------------') blk.append(info['curr_block']) gt.append(info['gt']) if variable_nch: nch.append(info['nch']) if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\ 2*[blk[-1]] == blk[-3:-1] and\ indx != -1: num_tr_blks[indx] += 1 transitions[indx, prev_gt, info['gt']-1] += 1 if prev_gt > info['nch'] or info['gt']-1 > info['nch']: pass else: nch.append(num_ch) if blk[-1] == blk[-2] and indx != -1: num_tr_blks[indx] += 1 transitions[indx, prev_gt, info['gt']-1] += 1 prev_gt = info['gt']-1 if verbose: print(blk_id) _, ax = plt.subplots(nrows=2, ncols=1, sharex=True) ax[0].plot(np.array(blk[:20000])/(10**(num_ch-1)), '-+') ax[0].plot(nch[:20000], '-+') ax[1].plot(gt[:20000], '-+') num_cols_rows = int(np.sqrt(num_blocks)) _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax1 = ax1.flatten() _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax2 = ax2.flatten() for ind_blk in range(num_blocks): norm_counts = transitions[ind_blk, :, :] ax1[ind_blk].imshow(norm_counts) ax1[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N='+str(num_tr_blks[ind_blk])+')', fontsize=6) nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1)) norm_counts = norm_counts / nxt_tr_counts ax2[ind_blk].imshow(norm_counts) ax2[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N='+str(num_tr_blks[ind_blk])+')', fontsize=6) data = {'transitions': transitions, 'blk': blk, 'blk_id': blk_id, 'gt': gt, 'nch': nch} return data
def test_concat_wrpprs_th_vch_pssr_pssa(env_name, num_steps=100000, probs=0.8, num_blocks=16, verbose=False, num_ch=8, variable_nch=True, env_args={}): env_args['n_ch'] = num_ch env_args['zero_irrelevant_stim'] = True env_args['ob_histblock'] = True env = gym.make(env_name, **env_args) env = TrialHistoryEvolution(env, probs=probs, ctx_ch_prob=0.005, predef_tr_mats=True, balanced_probs=True, num_contexts=1) env = Variable_nch(env, block_nch=5000000000, prob_12=0.05, sorted_ch=True) transitions = np.zeros((num_blocks, num_ch, num_ch)) env = PassReward(env) env = PassAction(env) env.reset() num_tr_blks = np.zeros((num_blocks, )) blk_id = [] s_chs = [] blk = [] blk_stp = [] gt = [] nch = [] obs_mat = [] prev_gt = 1 for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) obs_mat.append(obs) blk_stp.append(info['curr_block']) if done: env.reset() if info['new_trial'] and verbose: # print(info['curr_block']) # print('-------------') blk.append(info['curr_block']) gt.append(info['gt']) sel_chs = list(info['sel_chs'].replace('-', '')) sel_chs = [int(x) - 1 for x in sel_chs] blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks, sel_chs) s_chs.append(info['sel_chs']) nch.append(info['nch']) if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\ 2*[blk[-1]] == blk[-3:-1] and\ indx != -1: num_tr_blks[indx] += 1 transitions[indx, prev_gt, info['gt'] - 1] += 1 if prev_gt > info['nch'] or info['gt'] - 1 > info['nch']: pass prev_gt = info['gt'] - 1 if verbose: print(blk_id) sel_choices, counts = np.unique(s_chs, return_counts=1) print('\nSelected choices and frequencies:') print(sel_choices) print(counts / np.sum(counts)) tr_blks, counts =\ np.unique(np.array(blk)[np.array(s_chs) == '1-2'], return_counts=1) print('\n2AFC task transition matrices and frequencies:') print(tr_blks) print(counts / np.sum(counts)) _, ax = plt.subplots(nrows=1, ncols=1) obs_mat = np.array(obs_mat) ax.imshow(obs_mat[10000:20000, :].T, aspect='auto') _, ax = plt.subplots(nrows=2, ncols=1, sharex=True) blk_int = [int(x.replace('-', '')) for x in blk] ax[0].plot(np.array(blk_int[:20000]) / (10**(num_ch - 1)), '-+', label='tr-blck') ax[0].plot(nch[:20000], '-+', label='num choices') ax[1].plot(gt[:20000], '-+', label='correct side') ax[1].set_xlabel('Trials') ax[0].legend() ax[1].legend() num_cols_rows = int(np.sqrt(num_blocks)) _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax1 = ax1.flatten() _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax2 = ax2.flatten() for ind_blk in range(len(blk_id)): norm_counts = transitions[ind_blk, :, :] ax1[ind_blk].imshow(norm_counts) ax1[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' + str(num_tr_blks[ind_blk]) + ')', fontsize=6) nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1)) norm_counts = norm_counts / nxt_tr_counts ax2[ind_blk].imshow(norm_counts) ax2[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' + str(num_tr_blks[ind_blk]) + ')', fontsize=6) data = { 'transitions': transitions, 'blk': blk, 'blk_id': blk_id, 'gt': gt, 'nch': nch, 's_ch': s_chs, 'obs_mat': obs_mat, 'blk_stp': blk_stp } return data
def test_concat_wrpprs_th_vch_pssr_pssa( env_name='NAltPerceptualDecisionMaking-v0', num_steps=10000, probs=0.8, num_blocks=16, verbose=True, num_ch=6, variable_nch=True, th=0.5, rand_pretr=False, env_args={}): var_nch_block = 100 var_nch_perf_th = 0.8 tr_hist_block = 20 tr_hist_perf_th = 0.5 env_args['n_ch'] = num_ch env_args['zero_irrelevant_stim'] = True env_args['ob_histblock'] = False env = gym.make(env_name, **env_args) env = TrialHistoryEvolution(env, probs=probs, ctx_ch_prob=0.05, predef_tr_mats=True, balanced_probs=True, num_contexts=num_blocks, rand_pretrain=rand_pretr) env = Variable_nch(env, block_nch=var_nch_block, prob_12=0.5, sorted_ch=True) # env = PerfPhases(env, start_ph=3, step_ph=1, wait=100, # flag_key='above_perf_th_vnch') # env = ComputeMeanPerf(env, perf_th=[var_nch_perf_th, tr_hist_perf_th], # perf_w=[var_nch_block, tr_hist_block], # key=['vnch', 'trh'], # cond_on_coh=[False, True]) transitions = np.zeros((num_blocks, num_ch, num_ch)) env = PassReward(env) env = PassAction(env) env.reset() num_tr_blks = np.zeros((num_blocks, )) blk_id = [] s_chs = [] blk = [] blk_stp = [] gt = [] nch = [] obs_mat = [] perf_vnch = [] perf_trh = [] phase = [] prev_gt = 1 obs_cum = np.zeros((num_ch, )) for stp in range(num_steps): if (obs_cum - np.mean(obs_cum) > th).any(): action = np.argmax(obs_cum - np.mean(obs_cum)) + 1 else: action = 0 obs, rew, done, info = env.step(action) obs_mat.append(obs) blk_stp.append(info['curr_block']) if done: env.reset() if info['new_trial'] and verbose: # perf_vnch.append(info['mean_perf_'+str(var_nch_perf_th)+'_' + # str(var_nch_block)+'_vnch']) # perf_trh.append(info['mean_perf_'+str(tr_hist_perf_th)+'_' + # str(tr_hist_block)+'_trh']) # phase.append(info['phase']) obs_cum = np.zeros((env_args['n_ch'], )) # print(info['curr_block']) # print('-------------') blk.append(info['curr_block']) gt.append(info['gt']) sel_chs = list(info['sel_chs'].replace('-', '')) sel_chs = [int(x) - 1 for x in sel_chs] blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks, sel_chs) s_chs.append(info['sel_chs']) nch.append(info['nch']) if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\ 2*[blk[-1]] == blk[-3:-1] and\ indx != -1: num_tr_blks[indx] += 1 transitions[indx, prev_gt, info['gt'] - 1] += 1 if prev_gt > info['nch'] or info['gt'] - 1 > info['nch']: pass prev_gt = info['gt'] - 1 else: obs_cum += obs[1:num_ch + 1] if verbose: sel_choices, counts = np.unique(s_chs, return_counts=1) print('\nSelected choices and frequencies:') print(sel_choices) print(counts / np.sum(counts)) blocks, counts = np.unique(blk, return_counts=1) print('\nTransition matrices and frequencies:') print(blocks) print(counts / np.sum(counts)) tr_blks, counts = np.unique(np.array(blk)[np.array(s_chs) == '1-2'], return_counts=1) print('\n2AFC task transition matrices and frequencies:') print(tr_blks) print(counts / np.sum(counts)) _, ax = plt.subplots(nrows=1, ncols=1) obs_mat = np.array(obs_mat) ax.imshow(obs_mat.T, aspect='auto') _, ax = plt.subplots(nrows=3, ncols=1, sharex=True) blk_int = [int(x.replace('-', '')) for x in blk] ax[0].plot(np.array(blk_int[:20000]) / (10**(num_ch - 1)), '-+', label='tr-blck') ax[0].plot(nch[:20000], '-+', label='num choices') ax[0].plot(phase[:20000], '-+', label='phase') ax[1].plot(gt[:20000], '-+', label='correct side') ax[2].set_xlabel('Trials') ax[2].plot(perf_vnch[:20000], '-+', label='performance vnch (w=' + str(var_nch_block) + ', th=' + str(var_nch_perf_th) + ')') ax[2].plot(perf_trh[:20000], '-+', label='performance trh (w=' + str(tr_hist_block) + ', th=' + str(tr_hist_perf_th) + ')') ax[0].legend() ax[1].legend() ax[2].legend() num_cols_rows = int(np.sqrt(num_blocks)) _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax1 = ax1.flatten() _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows) ax2 = ax2.flatten() for ind_blk in range(len(blk_id)): norm_counts = transitions[ind_blk, :, :] ax1[ind_blk].imshow(norm_counts) ax1[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' + str(num_tr_blks[ind_blk]) + ')', fontsize=6) nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1)) norm_counts = norm_counts / nxt_tr_counts ax2[ind_blk].imshow(norm_counts) ax2[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' + str(num_tr_blks[ind_blk]) + ')', fontsize=6) data = { 'transitions': transitions, 'blk': blk, 'blk_id': blk_id, 'gt': gt, 'nch': nch, 's_ch': s_chs, 'obs_mat': obs_mat, 'blk_stp': blk_stp } return data
def test_stim_acc_signal(env='NAltPerceptualDecisionMaking-v0', num_steps=100, verbose=True): """ Test pass-reward wrapper. Parameters ---------- env_name : str, optional enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'. num_steps : int, optional number of steps to run the environment (1000) verbose : boolean, optional whether to print observation and reward (False) Returns ------- None. """ env_args = { 'timing': { 'fixation': 100, 'stimulus': 300, 'decision': 100 }, 'n_ch': 4 } env = gym.make(env, **env_args) env = StimAccSignal(env) env = PassAction(env) env = PassReward(env) obs = env.reset() if verbose: observations = [] reward = [] actions = [] gt = [] new_trials = [] for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) if verbose: observations.append(obs) actions.append(action) reward.append(rew) new_trials.append(info['new_trial']) gt.append(info['gt']) if done: env.reset() if verbose: observations = np.array(observations) _, ax = plt.subplots(nrows=3, ncols=1, sharex=True) ax = ax.flatten() ax[0].imshow(observations.T, aspect='auto') ax[1].plot(actions, label='Actions') ax[1].plot(gt, '--', label='gt') ax[1].set_xlim([-.5, len(actions) - 0.5]) ax[1].legend() ax[2].plot(reward) end_of_trial = np.where(new_trials)[0] for a in ax: ylim = a.get_ylim() for ch in end_of_trial: a.plot([ch, ch], ylim, '--c') ax[2].set_xlim([-.5, len(actions) - 0.5])
def test_learn_trans_matrix(env='NAltPerceptualDecisionMaking-v0', num_steps=100, verbose=True, n_ch=2, th=0.01): """ Test pass-reward wrapper. Parameters ---------- env_name : str, optional enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'. num_steps : int, optional number of steps to run the environment (1000) verbose : boolean, optional whether to print observation and reward (False) Returns ------- None. """ env_args = { 'timing': { 'fixation': 100, 'stimulus': 300, 'decision': 100 }, 'n_ch': n_ch } env = gym.make(env, **env_args) env = TrialHistoryEvolution(env, probs=0.9, predef_tr_mats=True, num_contexts=1) env = LearnTransMatrix(env) env = PassAction(env) env = PassReward(env) obs = env.reset() if verbose: observations = [] reward = [] actions = [] gt = [] new_trials = [] obs_cum = np.zeros((n_ch, )) for stp in range(num_steps): if (obs_cum - np.mean(obs_cum) > th).any(): action = np.argmax(obs_cum - np.mean(obs_cum)) + 1 else: action = 0 obs, rew, done, info = env.step(action) if info['new_trial']: obs_cum = np.zeros((env_args['n_ch'], )) else: obs_cum += obs[1:n_ch + 1] if verbose: observations.append(obs) actions.append(action) reward.append(rew) new_trials.append(info['new_trial']) gt.append(info['gt']) if done: env.reset() if verbose: observations = np.array(observations) _, ax = plt.subplots(nrows=3, ncols=1, sharex=True) ax = ax.flatten() ax[0].imshow(observations.T, aspect='auto') ax[1].plot(actions, label='Actions') ax[1].plot(gt, '--', label='gt') ax[1].set_xlim([-.5, len(actions) - 0.5]) ax[1].legend() ax[2].plot(reward) end_of_trial = np.where(new_trials)[0] for a in ax: ylim = a.get_ylim() for ch in end_of_trial: a.plot([ch, ch], ylim, '--c') ax[2].set_xlim([-.5, len(actions) - 0.5])