Python PassReward示例，neurogym.wrappers.PassReward Python示例

示例#1

0

显示文件

def test_passreward(env_name='PerceptualDecisionMaking-v0',
                    num_steps=1000,
                    verbose=False):
    """
    Test pass-reward wrapper.
    TODO: explain wrapper
    Parameters
    ----------
    env_name : str, optional
        enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'.
    num_steps : int, optional
        number of steps to run the environment (1000)
    verbose : boolean, optional
        whether to print observation and reward (False)

    Returns
    -------
    None.

    """
    env = gym.make(env_name)
    env = PassReward(env)
    obs = env.reset()
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        assert obs[-1] == rew, 'Previous reward is not part of observation'
        if verbose:
            print(obs)
            print(rew)
            print('--------')
        if done:
            env.reset()

示例#2

0

显示文件

文件： test_wrappers.py 项目： lqzhou/neurogym

def test_passreward(env_name, num_steps=10000, verbose=False, **envArgs):
    env = gym.make(env_name, **envArgs)
    env = PassReward(env)
    obs = env.reset()
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        if verbose:
            print(obs)
            print(rew)
            print('--------')
        if done:
            env.reset()

示例#3

0

显示文件

文件： test_wrappers.py 项目： lqzhou/neurogym

def test_concat_wrpprs_th_vch_pssr_pssa(env_name, num_steps=100000, probs=0.8,
                                        num_blocks=16, verbose=False, num_ch=6,
                                        variable_nch=True):
    env = gym.make(env_name, **{'n_ch': num_ch})
    env = TrialHistory(env, probs=probs,
                       rand_blcks=True, blk_ch_prob=0.001)  # using random blocks!
    if variable_nch:
        env = Variable_nch(env, block_nch=1000, blocks_probs=[0.2, 0.2, 0.2,
                                                              0.2, 0.2])
        transitions = np.zeros((num_blocks, num_ch, num_ch))
    else:
        transitions = np.zeros((num_blocks, num_ch, num_ch))
    env = PassReward(env)
    env = PassAction(env)
    env.reset()
    num_tr_blks = np.zeros((num_blocks,))
    blk_id = []
    blk = []
    gt = []
    nch = []
    prev_gt = 1
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        if done:
            env.reset()
        if info['new_trial'] and verbose:
            blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks)
            # print(info['curr_block'])
            # print('-------------')
            blk.append(info['curr_block'])
            gt.append(info['gt'])
            if variable_nch:
                nch.append(info['nch'])
                if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\
                   2*[blk[-1]] == blk[-3:-1] and\
                   indx != -1:
                    num_tr_blks[indx] += 1
                    transitions[indx, prev_gt, info['gt']-1] += 1
                    if prev_gt > info['nch'] or info['gt']-1 > info['nch']:
                        pass

            else:
                nch.append(num_ch)
                if blk[-1] == blk[-2] and indx != -1:
                    num_tr_blks[indx] += 1
                    transitions[indx, prev_gt, info['gt']-1] += 1
            prev_gt = info['gt']-1
    if verbose:
        print(blk_id)
        _, ax = plt.subplots(nrows=2, ncols=1, sharex=True)
        ax[0].plot(np.array(blk[:20000])/(10**(num_ch-1)), '-+')
        ax[0].plot(nch[:20000], '-+')
        ax[1].plot(gt[:20000], '-+')
        num_cols_rows = int(np.sqrt(num_blocks))
        _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax1 = ax1.flatten()
        _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax2 = ax2.flatten()
        for ind_blk in range(num_blocks):
            norm_counts = transitions[ind_blk, :, :]
            ax1[ind_blk].imshow(norm_counts)
            ax1[ind_blk].set_title(str(blk_id[ind_blk]) +
                                   ' (N='+str(num_tr_blks[ind_blk])+')',
                                   fontsize=6)
            nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1))
            norm_counts = norm_counts / nxt_tr_counts
            ax2[ind_blk].imshow(norm_counts)
            ax2[ind_blk].set_title(str(blk_id[ind_blk]) +
                                   ' (N='+str(num_tr_blks[ind_blk])+')',
                                   fontsize=6)
    data = {'transitions': transitions, 'blk': blk, 'blk_id': blk_id, 'gt': gt,
            'nch': nch}
    return data

示例#4

0

显示文件

def test_concat_wrpprs_th_vch_pssr_pssa(env_name,
                                        num_steps=100000,
                                        probs=0.8,
                                        num_blocks=16,
                                        verbose=False,
                                        num_ch=8,
                                        variable_nch=True,
                                        env_args={}):
    env_args['n_ch'] = num_ch
    env_args['zero_irrelevant_stim'] = True
    env_args['ob_histblock'] = True
    env = gym.make(env_name, **env_args)
    env = TrialHistoryEvolution(env,
                                probs=probs,
                                ctx_ch_prob=0.005,
                                predef_tr_mats=True,
                                balanced_probs=True,
                                num_contexts=1)
    env = Variable_nch(env, block_nch=5000000000, prob_12=0.05, sorted_ch=True)
    transitions = np.zeros((num_blocks, num_ch, num_ch))
    env = PassReward(env)
    env = PassAction(env)
    env.reset()
    num_tr_blks = np.zeros((num_blocks, ))
    blk_id = []
    s_chs = []
    blk = []
    blk_stp = []
    gt = []
    nch = []
    obs_mat = []
    prev_gt = 1
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        obs_mat.append(obs)
        blk_stp.append(info['curr_block'])
        if done:
            env.reset()
        if info['new_trial'] and verbose:
            # print(info['curr_block'])
            # print('-------------')
            blk.append(info['curr_block'])
            gt.append(info['gt'])
            sel_chs = list(info['sel_chs'].replace('-', ''))
            sel_chs = [int(x) - 1 for x in sel_chs]
            blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks,
                                        sel_chs)
            s_chs.append(info['sel_chs'])
            nch.append(info['nch'])
            if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\
               2*[blk[-1]] == blk[-3:-1] and\
               indx != -1:
                num_tr_blks[indx] += 1
                transitions[indx, prev_gt, info['gt'] - 1] += 1
                if prev_gt > info['nch'] or info['gt'] - 1 > info['nch']:
                    pass
            prev_gt = info['gt'] - 1
    if verbose:
        print(blk_id)
        sel_choices, counts = np.unique(s_chs, return_counts=1)
        print('\nSelected choices and frequencies:')
        print(sel_choices)
        print(counts / np.sum(counts))
        tr_blks, counts =\
            np.unique(np.array(blk)[np.array(s_chs) == '1-2'],
                      return_counts=1)
        print('\n2AFC task transition matrices and frequencies:')
        print(tr_blks)
        print(counts / np.sum(counts))
        _, ax = plt.subplots(nrows=1, ncols=1)
        obs_mat = np.array(obs_mat)
        ax.imshow(obs_mat[10000:20000, :].T, aspect='auto')
        _, ax = plt.subplots(nrows=2, ncols=1, sharex=True)
        blk_int = [int(x.replace('-', '')) for x in blk]
        ax[0].plot(np.array(blk_int[:20000]) / (10**(num_ch - 1)),
                   '-+',
                   label='tr-blck')
        ax[0].plot(nch[:20000], '-+', label='num choices')
        ax[1].plot(gt[:20000], '-+', label='correct side')
        ax[1].set_xlabel('Trials')
        ax[0].legend()
        ax[1].legend()
        num_cols_rows = int(np.sqrt(num_blocks))
        _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax1 = ax1.flatten()
        _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax2 = ax2.flatten()
        for ind_blk in range(len(blk_id)):
            norm_counts = transitions[ind_blk, :, :]
            ax1[ind_blk].imshow(norm_counts)
            ax1[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' +
                                   str(num_tr_blks[ind_blk]) + ')',
                                   fontsize=6)
            nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1))
            norm_counts = norm_counts / nxt_tr_counts
            ax2[ind_blk].imshow(norm_counts)
            ax2[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' +
                                   str(num_tr_blks[ind_blk]) + ')',
                                   fontsize=6)
    data = {
        'transitions': transitions,
        'blk': blk,
        'blk_id': blk_id,
        'gt': gt,
        'nch': nch,
        's_ch': s_chs,
        'obs_mat': obs_mat,
        'blk_stp': blk_stp
    }
    return data

示例#5

0

显示文件

def test_concat_wrpprs_th_vch_pssr_pssa(
        env_name='NAltPerceptualDecisionMaking-v0',
        num_steps=10000,
        probs=0.8,
        num_blocks=16,
        verbose=True,
        num_ch=6,
        variable_nch=True,
        th=0.5,
        rand_pretr=False,
        env_args={}):
    var_nch_block = 100
    var_nch_perf_th = 0.8
    tr_hist_block = 20
    tr_hist_perf_th = 0.5
    env_args['n_ch'] = num_ch
    env_args['zero_irrelevant_stim'] = True
    env_args['ob_histblock'] = False
    env = gym.make(env_name, **env_args)
    env = TrialHistoryEvolution(env,
                                probs=probs,
                                ctx_ch_prob=0.05,
                                predef_tr_mats=True,
                                balanced_probs=True,
                                num_contexts=num_blocks,
                                rand_pretrain=rand_pretr)
    env = Variable_nch(env,
                       block_nch=var_nch_block,
                       prob_12=0.5,
                       sorted_ch=True)
    # env = PerfPhases(env, start_ph=3, step_ph=1, wait=100,
    #                  flag_key='above_perf_th_vnch')
    # env = ComputeMeanPerf(env, perf_th=[var_nch_perf_th, tr_hist_perf_th],
    #                       perf_w=[var_nch_block, tr_hist_block],
    #                       key=['vnch', 'trh'],
    #                       cond_on_coh=[False, True])
    transitions = np.zeros((num_blocks, num_ch, num_ch))
    env = PassReward(env)
    env = PassAction(env)
    env.reset()
    num_tr_blks = np.zeros((num_blocks, ))
    blk_id = []
    s_chs = []
    blk = []
    blk_stp = []
    gt = []
    nch = []
    obs_mat = []
    perf_vnch = []
    perf_trh = []
    phase = []
    prev_gt = 1
    obs_cum = np.zeros((num_ch, ))
    for stp in range(num_steps):
        if (obs_cum - np.mean(obs_cum) > th).any():
            action = np.argmax(obs_cum - np.mean(obs_cum)) + 1
        else:
            action = 0
        obs, rew, done, info = env.step(action)
        obs_mat.append(obs)
        blk_stp.append(info['curr_block'])
        if done:
            env.reset()
        if info['new_trial'] and verbose:
            # perf_vnch.append(info['mean_perf_'+str(var_nch_perf_th)+'_' +
            #                  str(var_nch_block)+'_vnch'])
            # perf_trh.append(info['mean_perf_'+str(tr_hist_perf_th)+'_' +
            #                      str(tr_hist_block)+'_trh'])
            # phase.append(info['phase'])
            obs_cum = np.zeros((env_args['n_ch'], ))
            # print(info['curr_block'])
            # print('-------------')
            blk.append(info['curr_block'])
            gt.append(info['gt'])
            sel_chs = list(info['sel_chs'].replace('-', ''))
            sel_chs = [int(x) - 1 for x in sel_chs]
            blk_id, indx = check_blk_id(blk_id, info['curr_block'], num_blocks,
                                        sel_chs)
            s_chs.append(info['sel_chs'])
            nch.append(info['nch'])
            if len(nch) > 2 and 2*[nch[-1]] == nch[-3:-1] and\
               2*[blk[-1]] == blk[-3:-1] and\
               indx != -1:
                num_tr_blks[indx] += 1
                transitions[indx, prev_gt, info['gt'] - 1] += 1
                if prev_gt > info['nch'] or info['gt'] - 1 > info['nch']:
                    pass
            prev_gt = info['gt'] - 1
        else:
            obs_cum += obs[1:num_ch + 1]
    if verbose:
        sel_choices, counts = np.unique(s_chs, return_counts=1)
        print('\nSelected choices and frequencies:')
        print(sel_choices)
        print(counts / np.sum(counts))
        blocks, counts = np.unique(blk, return_counts=1)
        print('\nTransition matrices and frequencies:')
        print(blocks)
        print(counts / np.sum(counts))
        tr_blks, counts = np.unique(np.array(blk)[np.array(s_chs) == '1-2'],
                                    return_counts=1)
        print('\n2AFC task transition matrices and frequencies:')
        print(tr_blks)
        print(counts / np.sum(counts))
        _, ax = plt.subplots(nrows=1, ncols=1)
        obs_mat = np.array(obs_mat)
        ax.imshow(obs_mat.T, aspect='auto')
        _, ax = plt.subplots(nrows=3, ncols=1, sharex=True)
        blk_int = [int(x.replace('-', '')) for x in blk]
        ax[0].plot(np.array(blk_int[:20000]) / (10**(num_ch - 1)),
                   '-+',
                   label='tr-blck')
        ax[0].plot(nch[:20000], '-+', label='num choices')
        ax[0].plot(phase[:20000], '-+', label='phase')
        ax[1].plot(gt[:20000], '-+', label='correct side')
        ax[2].set_xlabel('Trials')
        ax[2].plot(perf_vnch[:20000],
                   '-+',
                   label='performance vnch (w=' + str(var_nch_block) +
                   ', th=' + str(var_nch_perf_th) + ')')
        ax[2].plot(perf_trh[:20000],
                   '-+',
                   label='performance trh (w=' + str(tr_hist_block) + ', th=' +
                   str(tr_hist_perf_th) + ')')
        ax[0].legend()
        ax[1].legend()
        ax[2].legend()
        num_cols_rows = int(np.sqrt(num_blocks))
        _, ax1 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax1 = ax1.flatten()
        _, ax2 = plt.subplots(ncols=num_cols_rows, nrows=num_cols_rows)
        ax2 = ax2.flatten()
        for ind_blk in range(len(blk_id)):
            norm_counts = transitions[ind_blk, :, :]
            ax1[ind_blk].imshow(norm_counts)
            ax1[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' +
                                   str(num_tr_blks[ind_blk]) + ')',
                                   fontsize=6)
            nxt_tr_counts = np.sum(norm_counts, axis=1).reshape((-1, 1))
            norm_counts = norm_counts / nxt_tr_counts
            ax2[ind_blk].imshow(norm_counts)
            ax2[ind_blk].set_title(str(blk_id[ind_blk]) + ' (N=' +
                                   str(num_tr_blks[ind_blk]) + ')',
                                   fontsize=6)
    data = {
        'transitions': transitions,
        'blk': blk,
        'blk_id': blk_id,
        'gt': gt,
        'nch': nch,
        's_ch': s_chs,
        'obs_mat': obs_mat,
        'blk_stp': blk_stp
    }
    return data

示例#6

0

显示文件

def test_stim_acc_signal(env='NAltPerceptualDecisionMaking-v0',
                         num_steps=100,
                         verbose=True):
    """
    Test pass-reward wrapper.

    Parameters
    ----------
    env_name : str, optional
        enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'.
    num_steps : int, optional
        number of steps to run the environment (1000)
    verbose : boolean, optional
        whether to print observation and reward (False)

    Returns
    -------
    None.

    """
    env_args = {
        'timing': {
            'fixation': 100,
            'stimulus': 300,
            'decision': 100
        },
        'n_ch': 4
    }
    env = gym.make(env, **env_args)
    env = StimAccSignal(env)
    env = PassAction(env)
    env = PassReward(env)
    obs = env.reset()
    if verbose:
        observations = []
        reward = []
        actions = []
        gt = []
        new_trials = []
    for stp in range(num_steps):
        action = env.action_space.sample()
        obs, rew, done, info = env.step(action)
        if verbose:
            observations.append(obs)
            actions.append(action)
            reward.append(rew)
            new_trials.append(info['new_trial'])
            gt.append(info['gt'])
        if done:
            env.reset()
    if verbose:
        observations = np.array(observations)
        _, ax = plt.subplots(nrows=3, ncols=1, sharex=True)
        ax = ax.flatten()
        ax[0].imshow(observations.T, aspect='auto')
        ax[1].plot(actions, label='Actions')
        ax[1].plot(gt, '--', label='gt')
        ax[1].set_xlim([-.5, len(actions) - 0.5])
        ax[1].legend()
        ax[2].plot(reward)
        end_of_trial = np.where(new_trials)[0]
        for a in ax:
            ylim = a.get_ylim()
            for ch in end_of_trial:
                a.plot([ch, ch], ylim, '--c')
        ax[2].set_xlim([-.5, len(actions) - 0.5])

示例#7

0

显示文件

def test_learn_trans_matrix(env='NAltPerceptualDecisionMaking-v0',
                            num_steps=100,
                            verbose=True,
                            n_ch=2,
                            th=0.01):
    """
    Test pass-reward wrapper.

    Parameters
    ----------
    env_name : str, optional
        enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'.
    num_steps : int, optional
        number of steps to run the environment (1000)
    verbose : boolean, optional
        whether to print observation and reward (False)

    Returns
    -------
    None.

    """
    env_args = {
        'timing': {
            'fixation': 100,
            'stimulus': 300,
            'decision': 100
        },
        'n_ch': n_ch
    }
    env = gym.make(env, **env_args)
    env = TrialHistoryEvolution(env,
                                probs=0.9,
                                predef_tr_mats=True,
                                num_contexts=1)
    env = LearnTransMatrix(env)
    env = PassAction(env)
    env = PassReward(env)
    obs = env.reset()
    if verbose:
        observations = []
        reward = []
        actions = []
        gt = []
        new_trials = []
    obs_cum = np.zeros((n_ch, ))
    for stp in range(num_steps):
        if (obs_cum - np.mean(obs_cum) > th).any():
            action = np.argmax(obs_cum - np.mean(obs_cum)) + 1
        else:
            action = 0
        obs, rew, done, info = env.step(action)
        if info['new_trial']:
            obs_cum = np.zeros((env_args['n_ch'], ))
        else:
            obs_cum += obs[1:n_ch + 1]
        if verbose:
            observations.append(obs)
            actions.append(action)
            reward.append(rew)
            new_trials.append(info['new_trial'])
            gt.append(info['gt'])
        if done:
            env.reset()
    if verbose:
        observations = np.array(observations)
        _, ax = plt.subplots(nrows=3, ncols=1, sharex=True)
        ax = ax.flatten()
        ax[0].imshow(observations.T, aspect='auto')
        ax[1].plot(actions, label='Actions')
        ax[1].plot(gt, '--', label='gt')
        ax[1].set_xlim([-.5, len(actions) - 0.5])
        ax[1].legend()
        ax[2].plot(reward)
        end_of_trial = np.where(new_trials)[0]
        for a in ax:
            ylim = a.get_ylim()
            for ch in end_of_trial:
                a.plot([ch, ch], ylim, '--c')
        ax[2].set_xlim([-.5, len(actions) - 0.5])