Пример #1
0
def plot_every_shallow(df, env, rep):
    upper_limit = 30000
    scaling_factor = analysis_specs['avg_max_rwd'][env + '1']
    fig, ax = plt.subplots(1, 2, sharey='col', sharex='col')
    ftsz = 8

    groups_to_split = ['env_name', 'representation']
    df_gb = df.groupby(groups_to_split)["save_id"]

    id_list = list(df_gb.get_group((env + '1', rep)))
    print(env, rep, len(id_list))
    total_avg_reward = []
    for i, id_num in enumerate(id_list):
        with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
            dats = pickle.load(f)
            scaled_ = (np.asarray(dats['total_reward']) +
                       2.5) / (scaling_factor + 2.5)
            if len(scaled_) < upper_limit:
                print('hello', len(scaled_))
                num_extras = upper_limit - len(scaled_)
                last_200_mean = np.mean(scaled_[-200:])
                last_200_std = np.std(scaled_[-200:])
                filler = np.random.normal(last_200_mean, last_200_std,
                                          num_extras)
                nans = np.zeros(num_extras)
                nans[:] = np.nan
                #if last_200_mean > 0.95:
                #    scaled_ = np.concatenate((scaled_, filler))
                #else:
                if list(df.loc[df['save_id'] == id_num]
                        ['load_from'])[0] == ' ':
                    scaled_ = np.concatenate((scaled_, nans))
                else:
                    scaled_ = np.concatenate((nans, scaled_ + 0.15))
            else:
                print(len(scaled_), 'len scaled')
            total_avg_reward.append(scaled_)

    mean = rm(np.nanmean(total_avg_reward, axis=0), 200)
    stand = rm(np.nanstd(total_avg_reward, axis=0), 200) / np.sqrt(
        len(total_avg_reward))
    print(len(mean), 'len mean')
    ax[0].set_ylim([0, 1.1])
    ax[0].set_yticks([0, 1])
    ax[0].set_yticklabels([0, 100], fontsize=ftsz)
    ax[0].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz)
    for index, x in enumerate(total_avg_reward):
        ax[0].plot(rm(x, 200), label=f'{id_list[index][0:8]}')
    ax[0].legend(loc=0)
    ax[1].plot(mean, color=col_to_plot[rep])
    ax[1].fill_between(np.arange(len(mean)),
                       mean - stand,
                       mean + stand,
                       color=col_to_plot[rep],
                       alpha=0.2)
    plt.show()
Пример #2
0
def plot_pref_dir(data):
    indices = [5000, 10000, 14999]
    colors = {
        100: LINCLAB_COLS['red'],
        75: LINCLAB_COLS['orange'],
        50: LINCLAB_COLS['green'],
        25: LINCLAB_COLS['purple'],
        'mf': 'black'
    }
    fig, ax = plt.subplots(2, len(indices))
    gs = ax[0, 0].get_gridspec()
    for a in ax[0, :]:
        a.remove()
    big_ax = fig.add_subplot(gs[0, :])
    if pct == 'mf':
        raw_score = data['total_reward'][5000:20000]
    else:
        raw_score = data['bootstrap_reward'][0:15000]
    normalization = analysis_specs['avg_max_rwd'][env_name[0:22]]
    transformed = rm((np.asarray(raw_score) + 2.5) / (normalization + 2.5),
                     100)
    big_ax.plot(transformed, color=colors[pct])
    big_ax.set_ylim([-0.1, 1.1])
    for i, index in enumerate(indices):
        if pct == 'mf':
            print(len(data['P_snap']))
            index = [75, 100, 149][i]
            print(index)
            pol_array = data['P_snap'][index].flatten()
        else:
            pol_array = data['P_snap'][index].flatten()
        for x in env.obstacle:
            pol_array[x] = tuple(np.zeros(4))
        dir_map = convert_pol_array_to_pref_dir(pol_array.flatten())
        a = ax[1, i].imshow(dir_map, cmap=fade, vmin=0, vmax=360)

        old_loc = plt.Rectangle((4.49, 4.47),
                                width=1,
                                height=1,
                                edgecolor='w',
                                fill=False,
                                linestyle='--')
        new_loc = plt.Rectangle((13.49, 13.47),
                                width=1,
                                height=1,
                                edgecolor='w',
                                fill=False,
                                linestyle='-')
        ax[1, i].add_patch(old_loc)
        ax[1, i].add_patch(new_loc)
        ax[1, i].get_xaxis().set_visible(False)
        ax[1, i].get_yaxis().set_visible(False)
    plt.colorbar(a, ax=ax[1, 2])
    plt.savefig(f'../figures/CH3/pref_dir_{rep}_{pct}.svg')
    plt.show()
Пример #3
0
def plot_each(env_name, rep, cutoff=25000, smoothing=500):
    plt.figure()
    list_of_ids = master_dict[env_name][rep]
    for id_num in list_of_ids:
        with open(data_dir + f'{id_num}_data.p', 'rb') as f:
            dats = pickle.load(f)
            reward_info = dats['total_reward'][0:cutoff]
        processed_rwd = rm(reward_info, smoothing)
        plt.plot(processed_rwd, label=id_num[0:8])
    plt.legend(loc='upper center', bbox_to_anchor=(0.1, 1.1))
    plt.ylim([-4, 12])
    plt.show()


#plot_all(cutoff=25000)
#plot_each(envs[2],reps[1])
Пример #4
0
def get_train_test(env,rep):
    scaling_factor = analysis_specs['avg_max_rwd'][env]
    id_list = list(gb.get_group((env, rep)))
    load_id = list(df.loc[df['save_id']==id_list[0]]['load_from'])[0]
    with open(data_dir+f'{load_id}_data.p','rb') as f:
        training_data = pickle.load(f)['total_reward']
    print(len(training_data))
    tot_rwds =[]
    for save_id in id_list:
        with open(data_dir+f'{save_id}_data.p','rb') as f:
            retrain_data = pickle.load(f)['total_reward']
        train_test_data = training_data+retrain_data
        scaled_  = (np.asarray(train_test_data)+2.5)/(scaling_factor+2.5)
        tot_rwds.append(rm(scaled_,200))

    mean_performance = np.nanmean(tot_rwds,axis=0)
    std_e_mean = np.nanstd(tot_rwds,axis=0)/np.sqrt(len(tot_rwds))
    return mean_performance, std_e_mean
Пример #5
0
def plot_reward_loss():
    smoothing = 10
    fig, ax = plt.subplots(3, 1, sharex=True)
    ax[0].plot(rm(run.data['total_reward'], smoothing), 'k', alpha=0.5)
    if "bootstrap_reward" in run.data.keys():
        ax[0].plot(rm(run.data['bootstrap_reward'], smoothing), 'r')

    ax[1].plot(rm(run.data['loss'][0], smoothing), label='ec_p')
    ax[2].plot(rm(run.data['loss'][1], smoothing), label='ec_v')

    if "mf_loss" in run.data.keys():
        ax[1].plot(rm(run.data['mf_loss'][0], smoothing), label='mf_p')
        ax[2].plot(rm(run.data['mf_loss'][1], smoothing), label='mf_v')

    ax[1].legend(loc=0)
    ax[2].legend(loc=0)

    plt.show()
    plt.close()
Пример #6
0
def plot_compare_conv_retraining(envs_to_plot, pcts_to_plot):
    fig, ax = plt.subplots(len(envs_to_plot),3, figsize=(10,12))
    for e, env in enumerate(envs_to_plot):
        if env[-1] == '5':
            rwd_colrow0 = (3,9)
            rwd_colrow1= (16,9)
        else:
            rwd_colrow0 = (5,5)
            rwd_colrow1=(14,14)

        rect0 = plt.Rectangle(rwd_colrow0, 1, 1, facecolor='b',edgecolor=None, alpha=0.3)
        rect1 = plt.Rectangle(rwd_colrow1, 1, 1, facecolor='g', edgecolor=None,alpha=0.3)
        ax[e,0].pcolor(grids[envs_to_plot.index(env)],cmap='bone_r',edgecolors='k', linewidths=0.1)
        ax[e,0].axis(xmin=0, xmax=20, ymin=0,ymax=20)
        ax[e,0].set_aspect('equal')
        #ax[e,0].add_patch(rect0)
        ax[e,0].add_patch(rect1)
        ax[e,0].get_xaxis().set_visible(False)
        ax[e,0].get_yaxis().set_visible(False)
        ax[e,0].invert_yaxis()


        rep = 'conv'
        id_list = gb_base.get_group((env,'conv','x'))
        mf_retrain = []
        print('MF data')
        for id_num in id_list:
            with open(data_dir+f'{id_num}_data.p','rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward'][0:5000]
                normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200)
                mf_retrain.append(transformed)
        means = np.nanmean(mf_retrain,axis=0)
        maxes = means+(np.nanstd(mf_retrain,axis=0)/np.sqrt(len(mf_retrain)))
        mins  = means-(np.nanstd(mf_retrain,axis=0)/np.sqrt(len(mf_retrain)))
        ax[e,2].plot(means, 'k', alpha=0.7)
        ax[e,2].fill_between(np.arange(len(means)),mins,maxes, color='k', alpha=0.2)

        print('EC bootstrapped data')
        for p, pct in enumerate(pcts_to_plot):
            print(pct)
            ec_performance = []
            mf_bootstrap = []
            try:
                id_list = gb.get_group((env,rep,int(cache_limits[env][100]*(pct/100))))
                for i, id_num in enumerate(id_list):
                    with open(data_dir+f'{id_num}_data.p','rb') as f:
                        dats = pickle.load(f)
                        raw_score = dats['bootstrap_reward'][0:5000]
                        normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                        transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200)
                        mf_bootstrap.append(transformed)

                        raw_score = dats['total_reward'][0:5000]
                        normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                        transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5) , 200)
                        ec_performance.append(transformed)

                means = np.nanmean(ec_performance,axis=0)
                ax[e,1].plot(means, label=f'{pct}')

                means = np.nanmean(mf_bootstrap,axis=0)
                ax[e,2].plot(means, label=f'{pct}')
            except:
                print(f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}')
        ax[e,1].legend(loc=0)
        ax[e,2].legend(loc=0)
        ax[e,1].set_ylim(0,1.1)
        ax[e,2].set_ylim(0,1.1)
    plt.show()
Пример #7
0
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    params.lr = 0.001
    params.temp = 1.1
    print(params.__dict__)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f=f'./Data/agents/load_agents/{network_id}.pt')
memtemp = 1
memory = Memory.EpisodicMemory(cache_limit=400,
                               entry_size=env.action_space.n,
                               mem_temp=memtemp)

agent = Agent(network, memory=memory)

run = expt(agent, env)
ntrials = 1000
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=100)
#run.record_log(f'mf_ec_t{memtemp}', env_name, n_trials=ntrials)

smoothing = 10
plt.figure()
plt.plot(rm(run.data['total_reward'], smoothing), c='k', alpha=0.5)
if 'bootstrap_reward' in run.data.keys():
    plt.plot(rm(run.data['bootstrap_reward'], smoothing), c='r')
plt.show()
Пример #8
0
ec_results = []
mf_results = []
colors = [
    LINCLAB_COLS['red'], LINCLAB_COLS['blue'], LINCLAB_COLS['green'],
    LINCLAB_COLS['purple'], LINCLAB_COLS['orange'], LINCLAB_COLS['grey']
]
fig, ax = plt.subplots(2, 4, sharey=True)
for r, rep in enumerate(reps_to_plot):
    for p, pct in enumerate(pcts_to_plot):
        print(rep, pct)
        try:
            id_list = gb.get_group(
                (env, rep, int(cache_limits[env][100] * (pct / 100)), 5000))
            for i, id_num in enumerate(id_list):
                if i == 0:
                    with open(data_dir + f'{id_num}_data.p', 'rb') as f:
                        dats = pickle.load(f)
                        ax[r, p].plot(rm(dats['total_reward'], 200),
                                      linestyle=':',
                                      color=colors[i])
                        ax[r, p].plot(rm(dats['bootstrap_reward'], 200),
                                      color='k')
            baseline_id = list(gb_base.get_group((env, rep)))[0]
            with open(data_dir + f'{baseline_id}_data.p', 'rb') as f:
                dats = pickle.load(f)
                filt = rm(dats['total_reward'][0:5000], 200)
                ax[r, p].plot(filt, color='cyan')
        except:
            print('no data')
ax[0, 0].set_ylim([-2.5, 10])
plt.show()
Пример #9
0
memtemp = 1
memory = Memory.EpisodicMemory(cache_limit=400,
                               entry_size=env.action_space.n,
                               mem_temp=memtemp)

agent = Agent(network, memory=memory)

run = expt(agent, env)

ntrials = 1000
nevents = 250
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents)
run.record_log(file='MFtraining.csv',
               expt_type=f'{type(run).__name__}',
               env_name=env_name,
               n_trials=ntrials,
               n_steps=nevents)
smoothing = 10
fig, ax = plt.subplots(3, 1, sharex=True)
ax[0].plot(rm(run.data['total_reward'], smoothing), 'k', alpha=0.5)
ax[0].plot(rm(run.data['bootstrap_reward'], smoothing), 'r')

ax[1].plot(rm(run.data['loss'][0], smoothing), label='ec_p')
ax[1].plot(rm(run.data['mf_loss'][0], smoothing), label='mf_p')
ax[1].legend(loc=0)

ax[2].plot(rm(run.data['loss'][1], smoothing), label='ec_v')
ax[2].plot(rm(run.data['mf_loss'][1], smoothing), label='mf_v')
ax[2].legend(loc=0)

plt.show()
Пример #10
0
    for r, rep in enumerate(['unstructured', 'structured']):
        train_avg_reward = []
        test_avg_reward = []
        train_ids = list(train_gb.get_group((env, rep, 5000)))
        test_ids = list(test_gb.get_group((env + '1', rep, 10000)))
        print(env, rep, len(train_ids), len(test_ids))
        ax[0, r].set_title(f'{rep}')
        for i, id_num in enumerate(train_ids):
            with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward']
                normalization = analysis_specs['avg_max_rwd'][env + '1']
                transformed = (np.asarray(raw_score) + 2.5) / (normalization +
                                                               2.5)
                train_avg_reward.append(transformed)
                ax[e, r].plot(rm(transformed, 200))
        #train_mean  = np.mean(train_avg_reward,axis=0)
        #train_maxes = train_mean+np.std(train_avg_reward,axis=0)/np.sqrt(len(train_avg_reward))
        #train_mins  = train_mean-np.std(train_avg_reward,axis=0)/np.sqrt(len(train_avg_reward))

        for i, id_num in enumerate(test_ids):
            with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward']
                normalization = analysis_specs['avg_max_rwd'][env + '1']
                transformed = (np.asarray(raw_score) + 2.5) / (normalization +
                                                               2.5)
                test_avg_reward.append(transformed)
                ax[e,
                   r].plot(np.arange(5000, 5000 + len(rm(transformed, 200))),
                           rm(transformed, 200))
Пример #11
0
env = 'gridworld:gridworld-v51'
e = gym.make(env)
plt.close()
rep = 'structured'

id_list = {
    'gridworld:gridworld-v11': 'a886a36b-77af-4845-b950-71e64506190c',
    'gridworld:gridworld-v31': '22d9a5cc-13e4-4fe3-9e92-9f25c5ba9b18',
    'gridworld:gridworld-v41': '5f0f1b3f-db3e-4a19-9d19-cf817e9aeee3',
    'gridworld:gridworld-v51': 'ac9a6807-9ecb-405c-b31e-3ff4ccaa2bfd'
}

id_num = id_list[env]
with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
    dats = pickle.load(f)
    print(id_num, [(x, len(dats[x])) for x in dats.keys()])

start_ind = 0
end_ind = -1
MF_map = np.nansum(np.asarray(dats['MF_occupancy'][start_ind:end_ind]), axis=0)

MF_visits = np.nansum(np.asarray(MF_map))
MF = MF_map / MF_visits

MF_occ = np.log(MF.reshape(20, 20) / (1 / len(e.useable)))
fig, ax = plt.subplots(2, 1)
ax[0].plot(rm(dats['total_reward'][start_ind:end_ind], 200))
ax[1].imshow(MF_occ, cmap='RdBu_r', vmin=-4, vmax=4)
plt.savefig(f'../figures/CH3/mf_only_{env[-2:]}_state_occ.svg')
plt.show()
Пример #12
0
                raw_score = dats['total_reward']
                normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                scaled_ = (np.asarray(raw_score) + 2.5) / (normalization + 2.5)
                if len(scaled_) < upper_limit:
                    num_extras = upper_limit - len(scaled_)
                    last_200_mean = np.mean(scaled_[-5000:])
                    last_200_std = np.std(scaled_[-5000:])
                    filler = np.random.normal(last_200_mean, last_200_std,
                                              num_extras)
                    nans = np.zeros(num_extras)
                    nans[:] = np.nan
                    if last_200_mean > 0.9:
                        scaled_ = np.concatenate((scaled_, filler))
                    else:
                        scaled_ = np.concatenate((scaled_, nans))
                total_avg_reward.append(rm(scaled_, smoothing))
        mean = np.nanmean(total_avg_reward, axis=0)
        maxes = mean + np.nanstd(total_avg_reward, axis=0) / np.sqrt(
            len(total_avg_reward))
        mins = mean - np.nanstd(total_avg_reward, axis=0) / np.sqrt(
            len(total_avg_reward))

        mean = chop_(mean)
        maxes = chop_(maxes)
        mins = chop_(mins)

        ax[e, r].axvline(x=5000 - smoothing + 1, linestyle=":", color='gray')
        ax[e, r].plot(np.arange(len(mean)), mean, color='k', alpha=0.7)
        ax[e, r].fill_between(np.arange(len(mean)),
                              mins,
                              maxes,
Пример #13
0
def plot_single_retraining(env, pcts_to_plot, rep, index):
    fig, ax = plt.subplots(2, 2, figsize=(10, 12))

    ## get MF only -- baseline
    id_list = gb_base.get_group((env[0:22], rep))
    filler = np.zeros(5000)
    filler[:] = np.nan
    mf_retrain = []
    for id_num in id_list[0:3]:
        print(id_num)
        with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
            dats = pickle.load(f)
            raw_score = dats['total_reward'][5000:20000]
            normalization = analysis_specs['avg_max_rwd'][env[0:22]]
            transformed = rm(
                (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
            if len(transformed) == 9801:
                transformed = np.concatenate((transformed, filler))

            mf_retrain.append(transformed)
    lens = [len(x) for x in mf_retrain]
    print(lens, len(mf_retrain), len(mf_retrain[0]))
    means = np.nanmean(np.asarray(mf_retrain), axis=0)
    maxes = means + (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain)))
    mins = means - (np.nanstd(mf_retrain, axis=0) / np.sqrt(len(mf_retrain)))
    ax[0, 1].plot(means, 'k', alpha=0.7)
    ax[0, 1].fill_between(np.arange(len(means)),
                          mins,
                          maxes,
                          color='k',
                          alpha=0.2)

    print('EC bootstrapped data')
    for p, pct in enumerate(pcts_to_plot):
        print(pct)
        ec_performance = []
        mf_bootstrap = []
        try:
            current_id_list = gb.get_group(
                (env, rep, int(cache_limits[env][100] * (pct / 100)), 15000))
            print(env, pct, len(current_id_list), 'helloooooo')
            print(current_id_list)
            id_num = list(current_id_list)[index]
            print(id_num)
            with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['bootstrap_reward'][0:15000]
                normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                transformed = rm(
                    (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
                mf_bootstrap.append(transformed)

                raw_score = dats['total_reward'][0:15000]
                normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                transformed = rm(
                    (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
                ec_performance.append(transformed)

            ax[0, 0].plot(ec_performance[0], label=f'{pct}', color=colors[pct])
            ax[0, 1].plot(mf_bootstrap[0], label=f'{pct}', color=colors[pct])

        except:
            print(
                f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}'
            )
    ax[0, 0].legend(loc=0)
    ax[0, 1].legend(loc=0)
    ax[0, 0].set_title('EC perf')
    ax[0, 1].set_title('Bootstrap Perf')
    ax[0, 0].set_ylim(0, 1.1)
    ax[0, 1].set_ylim(0, 1.1)
    #plt.savefig(f'../figures/CH3/example_bootstrap.svg')
    plt.show()
Пример #14
0
def plot_each_env_retraining(envs_to_plot, pcts_to_plot, rep):
    fig, ax = plt.subplots(len(envs_to_plot),
                           2,
                           figsize=(14, 10),
                           sharex=True,
                           sharey=True)
    for e, env in enumerate(envs_to_plot):
        ## get MF only -- baseline
        id_list = gb_base.get_group((env[0:22], rep, 30000))
        mf_retrain = []
        print(f'numMF retraining on pol = {len(id_list)}')
        for id_num in id_list:
            with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward'][5000:20000]
                normalization = analysis_specs['avg_max_rwd'][env[0:22]]
                transformed = rm(
                    (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
                mf_retrain.append(transformed)
        means = np.nanmean(mf_retrain, axis=0)
        maxes = means + (np.nanstd(mf_retrain, axis=0) /
                         np.sqrt(len(mf_retrain)))
        mins = means - (np.nanstd(mf_retrain, axis=0) /
                        np.sqrt(len(mf_retrain)))
        ax[e, 1].plot(means, 'k', alpha=0.7)
        ax[e, 1].fill_between(np.arange(len(means)),
                              mins,
                              maxes,
                              color='k',
                              alpha=0.2)

        print('EC bootstrapped data')
        for p, pct in enumerate(pcts_to_plot):
            print(pct)
            ec_performance = []
            mf_bootstrap = []
            try:
                id_list = gb.get_group(
                    (env, rep, int(cache_limits[env][100] * (pct / 100)),
                     15000))
                print(env, pct, len(id_list))
                for i, id_num in enumerate(id_list):
                    with open(parent_path + f'results/{id_num}_data.p',
                              'rb') as f:
                        dats = pickle.load(f)
                        raw_score = dats['bootstrap_reward'][0:15000]
                        normalization = analysis_specs['avg_max_rwd'][
                            env[0:22]]
                        transformed = rm((np.asarray(raw_score) + 2.5) /
                                         (normalization + 2.5), 200)
                        mf_bootstrap.append(transformed)

                        raw_score = dats['total_reward'][0:15000]
                        normalization = analysis_specs['avg_max_rwd'][
                            env[0:22]]
                        transformed = rm((np.asarray(raw_score) + 2.5) /
                                         (normalization + 2.5), 200)
                        ec_performance.append(transformed)

                means = np.nanmean(ec_performance, axis=0)
                maxes = means + (np.nanstd(ec_performance, axis=0) /
                                 np.sqrt(len(ec_performance)))
                mins = means - (np.nanstd(ec_performance, axis=0) /
                                np.sqrt(len(ec_performance)))
                ax[e, 0].plot(means, label=f'{pct}', color=colors[pct])
                ax[e, 0].fill_between(np.arange(len(means)),
                                      mins,
                                      maxes,
                                      color=colors[pct],
                                      alpha=0.2)

                means = np.nanmean(mf_bootstrap, axis=0)
                maxes = means + (np.nanstd(mf_bootstrap, axis=0) /
                                 np.sqrt(len(mf_bootstrap)))
                mins = means - (np.nanstd(mf_bootstrap, axis=0) /
                                np.sqrt(len(mf_bootstrap)))
                ax[e, 1].plot(means, label=f'{pct}', color=colors[pct])
                ax[e, 1].fill_between(np.arange(len(means)),
                                      mins,
                                      maxes,
                                      color=colors[pct],
                                      alpha=0.2)

            except:
                print(
                    f'no data for EC{env}{rep}{int(cache_limits[env][100]*(pct/100))}'
                )
        ax[0, 0].set_title('Episodic Control')
        ax[0, 1].set_title('Model-Free Control')
        for r in range(len(envs_to_plot)):
            ax[r, 0].set_ylabel(f'Performance (% Optimal)')
            ax[r, 0].set_ylim(0, 1.1)
            ax[r, 1].set_ylim(0, 1.1)
        ax[r, 0].set_xlabel('Episodes')
        ax[r, 1].set_xlabel('Episodes')
    plt.savefig(f'../figures/CH3/example_bootstrap_{rep}_all_env.svg')
    plt.show()
Пример #15
0
def plot_perceptron(df, envs_to_plot, reps_to_plot):
    grids = get_grids(envs_to_plot)
    fig, ax = plt.subplots(len(envs_to_plot), 2, sharey='col', sharex='col')
    for e, env in enumerate(envs_to_plot):
        scaling_factor = analysis_specs['avg_max_rwd'][env + '1']
        if env[-1] == '5':
            rwd_colrow0 = (3, 9)
            rwd_colrow1 = (16, 9)
        else:
            rwd_colrow0 = (5, 5)
            rwd_colrow1 = (14, 14)

        rect0 = plt.Rectangle(rwd_colrow0,
                              1,
                              1,
                              facecolor='gray',
                              edgecolor=None,
                              alpha=0.3)
        rect1 = plt.Rectangle(rwd_colrow1,
                              1,
                              1,
                              facecolor='g',
                              edgecolor=None,
                              alpha=0.3)
        ax[e, 0].pcolor(grids[e],
                        cmap='bone_r',
                        edgecolors='k',
                        linewidths=0.1)
        ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20)
        ax[e, 0].set_aspect('equal')
        ax[e, 0].add_patch(rect0)
        ax[e, 0].add_patch(rect1)
        ax[e, 0].get_xaxis().set_visible(False)
        ax[e, 0].get_yaxis().set_visible(False)
        ax[e, 0].invert_yaxis()

        for r, rep in enumerate(reps_to_plot):
            id_list = list(df_gb.get_group((env + '1', rep, 10000)))
            print(env, rep, len(id_list))
            total_avg_reward = []
            for i, id_num in enumerate(id_list):
                # get training data
                train_dat_id = list(
                    df.loc[df['save_id'] == id_num]['load_from'])[0]
                with open(parent_path + f'results/{train_dat_id}_data.p',
                          'rb') as f:
                    dats = pickle.load(f)
                    raw_score = dats['total_reward'][0:5000]
                    training_transformed = (np.asarray(raw_score) +
                                            2.5) / (scaling_factor + 2.5)

                with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    scaled_ = (np.asarray(dats['total_reward']) +
                               2.5) / (scaling_factor + 2.5)
                train_test = np.concatenate((training_transformed, scaled_))
                total_avg_reward.append(rm(train_test, 200))
            mean = np.mean(total_avg_reward, axis=0)
            stand = np.std(total_avg_reward, axis=0) / np.sqrt(
                len(total_avg_reward))
            print(len(mean))
            #for j in total_avg_reward:
            #ax[e,r].plot(j)
            ax[e, 1].set_ylim([0, 1.1])
            ax[e, 1].set_yticks([0, 1])
            ax[e, 1].set_yticklabels([0, 100])
            ax[e, 1].set_ylabel('Performance \n(% Optimal)')
            ax[e, 1].plot(mean, color=col_to_plot[rep])
            ax[e, 1].fill_between(np.arange(len(mean)),
                                  mean - stand,
                                  mean + stand,
                                  color=col_to_plot[rep],
                                  alpha=0.2)
            #ax[0,r].set_title(rep)
    plt.savefig('../figures/CH1/perceptron_FC.svg')
    plt.show()
Пример #16
0
def plot_shallow(
    df,
    envs_to_plot,
    reps_to_plot,
):
    upper_limit = 30000
    grids = get_grids(envs_to_plot)
    groups_to_split = ['env_name', 'representation', 'extra_info']
    df_gb = df.groupby(groups_to_split)["save_id"]
    fig, ax = plt.subplots(len(envs_to_plot), 2, sharey='col', sharex='col')
    ftsz = 8
    for e, env in enumerate(envs_to_plot):
        scaling_factor = analysis_specs['avg_max_rwd'][env + '1']
        if env[-1] == '5':
            rwd_colrow0 = (3, 9)
            rwd_colrow1 = (16, 9)
        else:
            rwd_colrow0 = (5, 5)
            rwd_colrow1 = (14, 14)

        rect0 = plt.Rectangle(rwd_colrow0,
                              1,
                              1,
                              facecolor='gray',
                              edgecolor=None,
                              alpha=0.5)
        rect1 = plt.Rectangle(rwd_colrow1,
                              1,
                              1,
                              facecolor='g',
                              edgecolor=None,
                              alpha=0.3)
        ax[e, 0].pcolor(grids[e],
                        cmap='bone_r',
                        edgecolors='k',
                        linewidths=0.1)
        ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20)
        ax[e, 0].set_aspect('equal')
        ax[e, 0].add_patch(rect0)
        ax[e, 0].add_patch(rect1)
        ax[e, 0].get_xaxis().set_visible(False)
        ax[e, 0].get_yaxis().set_visible(False)
        ax[e, 0].invert_yaxis()

        for r, rep in enumerate(reps_to_plot):
            id_list = list(df_gb.get_group((env, rep, 'x')))
            print(env, rep, len(id_list))
            total_avg_reward = []
            for i, id_num in enumerate(id_list):
                with open(parent_path + f'results/{id_num}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    scaled_ = (np.asarray(dats['total_reward']) +
                               2.5) / (scaling_factor + 2.5)
                    if len(scaled_) < upper_limit:
                        print('hello', len(scaled_))
                        num_extras = upper_limit - len(scaled_)
                        last_200_mean = np.mean(scaled_[-200:])
                        last_200_std = np.std(scaled_[-200:])
                        filler = np.random.normal(last_200_mean, last_200_std,
                                                  num_extras)

                        nans = np.zeros(num_extras)
                        nans[:] = np.nan
                        if last_200_mean > 0.95:
                            scaled_ = np.concatenate((scaled_, filler))
                        else:
                            scaled_ = np.concatenate((scaled_, nans))
                    else:
                        print(len(scaled_))
                    total_avg_reward.append(scaled_)

            mean = rm(np.nanmean(total_avg_reward, axis=0), 200)
            stand = rm(
                np.nanstd(total_avg_reward, axis=0) /
                np.sqrt(len(total_avg_reward)), 200)
            print(len(mean))
            ax[e, 1].set_ylim([0, 1.1])
            ax[e, 1].set_yticks([0, 1])
            ax[e, 1].set_yticklabels([0, 100], fontsize=ftsz)
            ax[e, 1].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz)
            #for index, x in enumerate(total_avg_reward):
            #    ax[e,r+1].plot(rm(x,200), label=f'{id_list[index][0:8]}')
            #ax[e,r+1].legend(loc=0)
            ax[e, 1].plot(mean, color=col_to_plot[rep])
            ax[e, 1].fill_between(np.arange(len(mean)),
                                  mean - stand,
                                  mean + stand,
                                  color=col_to_plot[rep],
                                  alpha=0.2)
    ax[e, 1].set_xlabel('Episodes', fontsize=ftsz)
    ax[e, 1].set_xticks([0, 10000, 20000, 30000])
    ax[e, 1].set_xticklabels([0, 10000, 20000, 30000], fontsize=ftsz)
    plt.savefig('../figures/CH1/shallow_FC.svg')
    plt.show()
Пример #17
0
envs_to_plot = ['gridworld:gridworld-v11','gridworld:gridworld-v41','gridworld:gridworld-v31','gridworld:gridworld-v51']
pcts_to_plot = [100,75,50,25]
reps_to_plot = ['unstructured','structured']
grids = get_grids(envs_to_plot)

env = envs_to_plot[1]
pct = 100
rep = 'structured'

id_list = list(bl_gb.get_group((env,rep)))
print(id_list)
total_avg_reward = []
for i, id_num in enumerate(id_list):
    with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f:
        dats = pickle.load(f)
        total_avg_reward.append(rm(dats['total_reward'],100))
mean = np.mean(total_avg_reward,axis=0)
print(len(mean))
plt.plot(mean)

for pct in pcts_to_plot:
    try:
        id_list = list(df_gb.get_group((env,rep,int(cache_limits[env][100]*(pct/100)),5000)))
        print(id_list)
        total_avg_reward = []
        for i, id_num in enumerate(id_list):
            with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                total_avg_reward.append(rm(dats['total_reward'],100))
        mean = np.mean(total_avg_reward,axis=0)
        print(len(mean))
Пример #18
0
       }


total_reward = {}
for key, value in ids.items():
    if value == '':
        pass
    else:
        load_id = value
        with open(data_dir+ f'results/{load_id}_data.p', 'rb') as f:
            total_reward[key] = pickle.load(f)['total_reward']

smoothing = 100

plt.figure()
plt.plot(rm(total_reward['oh_test'],smoothing),':',c='C0',label='model free control (onehot)')
plt.plot(rm(total_reward['oh_test_ec'],smoothing), c='C0',label='episodic control (onehot)')
plt.plot(rm(total_reward['pc_test'],smoothing),':',c='C1',label='model free control (place cell)')
plt.plot(rm(total_reward['pc_test_ec'],smoothing), c='C1',label='episodic control (place cell)')

plt.xlim([0,1900])
plt.legend(loc='upper center', bbox_to_anchor =(0.5, 1.1), ncol=2)
plt.show()







Пример #19
0
empties[:]=np.nan
for e, env in enumerate(envs_to_plot):
    for r, rep in enumerate(reps_to_plot):
        id_list = list(df_gb.get_group((env,rep)))
        print(env, rep, len(id_list))
        total_avg_reward = []
        ax[0,r].set_title(f'{rep}')
        for i, id_num in enumerate(id_list):
            with open(parent_path+ f'results/{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward']
                print(len(raw_score))
                if len(raw_score)==15000:
                    raw_score = np.concatenate((raw_score, empties))
                normalization = analysis_specs['avg_max_rwd'][env+'1']
                transformed = rm((np.asarray(raw_score)+2.5)/(normalization +2.5),200)
                total_avg_reward.append(transformed)
                print(len(transformed))
                #ax[e,r].plot(transformed,label=f'{id_num[0:8]}')

        mean  = np.nanmean(total_avg_reward,axis=0)
        maxes = mean+np.nanstd(total_avg_reward,axis=0)/np.sqrt(len(total_avg_reward))
        mins  = mean-np.nanstd(total_avg_reward,axis=0)/np.sqrt(len(total_avg_reward))
        ax[e,r].axvline(x=4801, linestyle=":",color='gray')
        ax[e,r].plot(np.arange(len(mean)),mean,LINCLAB_COLS['green'])
        ax[e,r].fill_between(np.arange(len(mean)),mins,maxes,color=LINCLAB_COLS['green'], alpha=0.2)
    ax[e,r].set_ylim(0,1.1)

plt.savefig(f'../figures/CH1/MFshallow_traintest.svg')
plt.show()
Пример #20
0
def plot_train_test(df, df_gb, envs, reps, save=False):
    fig, ax = plt.subplots(len(envs), 2, sharex='col')
    for e, env in enumerate(envs):
        if env[-1] == '5':
            rwd_colrow0 = (3, 9)
            rwd_colrow1 = (16, 9)
        else:
            rwd_colrow0 = (5, 5)
            rwd_colrow1 = (14, 14)

        rect0 = plt.Rectangle(rwd_colrow0,
                              1,
                              1,
                              facecolor='b',
                              edgecolor=None,
                              alpha=0.3)
        rect1 = plt.Rectangle(rwd_colrow1,
                              1,
                              1,
                              facecolor='g',
                              edgecolor=None,
                              alpha=0.3)
        ax[e, 0].pcolor(grids[e],
                        cmap='bone_r',
                        edgecolors='k',
                        linewidths=0.1)
        ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20)
        ax[e, 0].set_aspect('equal')
        ax[e, 0].add_patch(rect0)
        ax[e, 0].add_patch(rect1)
        ax[e, 0].get_xaxis().set_visible(False)
        ax[e, 0].get_yaxis().set_visible(False)
        ax[e, 0].invert_yaxis()

        for r, rep in enumerate(reps):
            train_test_array = []
            id_list = list(df_gb.get_group((env, rep)))
            print(env, rep)
            for i, id_num in enumerate(id_list):
                # get training data
                train_dat_id = list(
                    df.loc[df['save_id'] == id_num]['load_from'])[0]
                with open(data_dir + f'{train_dat_id}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    raw_score = dats['total_reward'][0:5000]
                    normalization = analysis_specs['avg_max_rwd'][env + '1']
                    training_transformed = (np.asarray(raw_score) +
                                            2.5) / (normalization + 2.5)
                # get testing data
                with open(data_dir + f'{id_num}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    raw_score = dats['total_reward']
                    normalization = analysis_specs['avg_max_rwd'][env + '1']
                    testing_transformed = (np.asarray(raw_score) +
                                           2.5) / (normalization + 2.5)

                train_test_data = rm(
                    np.concatenate(
                        (training_transformed, testing_transformed)), 200)
                train_test_array.append(train_test_data)
                print('done', id_num)

            mean_perf = np.mean(train_test_array, axis=0)
            std_perf = np.std(train_test_array, axis=0) / np.sqrt(
                len(train_test_array))
            mins = mean_perf - std_perf
            maxes = mean_perf + std_perf

            ax[e, 1].plot(np.arange(len(mean_perf)),
                          mean_perf,
                          color=LINCLAB_COLS[rep_to_col[rep]],
                          label=labels_for_plot[rep])
            ax[e, 1].fill_between(np.arange(len(mean_perf)),
                                  mins,
                                  maxes,
                                  color=LINCLAB_COLS[rep_to_col[rep]],
                                  alpha=0.2)
            ax[e, 1].set_ylim(0, 1.1)
            ax[e, 1].axvline(x=4801, linestyle=":", color='gray')
        ax[e, 1].set_ylabel('Performance \n(% Optimal)')

    ax[e, 1].set_xlabel('Episodes')
    ax[0, 1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1))
    plt.savefig('../figures/CH1/conv_net_retrain.svg')
    plt.show()
Пример #21
0
def plot_train_test(df, envs, reps, save=False):
    fig, ax = plt.subplots(4, 2, sharex='col')
    ftsz = 8
    groups_to_split = ['env_name', 'representation', 'extra_info']
    training_df = pd.read_csv('../../Data/conv_mf_training.csv')
    tr_gb = training_df.groupby(groups_to_split)['save_id']
    df_gb = df.groupby(groups_to_split)["save_id"]
    for e, env in enumerate(envs):
        if env[-1] == '5':
            rwd_colrow0 = (3, 9)
            rwd_colrow1 = (16, 9)
        else:
            rwd_colrow0 = (5, 5)
            rwd_colrow1 = (14, 14)

        rect0 = plt.Rectangle(rwd_colrow0,
                              1,
                              1,
                              facecolor='gray',
                              edgecolor=None,
                              alpha=0.5)
        rect1 = plt.Rectangle(rwd_colrow1,
                              1,
                              1,
                              facecolor='g',
                              edgecolor=None,
                              alpha=0.3)
        ax[e, 0].pcolor(grids[e],
                        cmap='bone_r',
                        edgecolors='k',
                        linewidths=0.1)
        ax[e, 0].axis(xmin=0, xmax=20, ymin=0, ymax=20)
        ax[e, 0].set_aspect('equal')
        ax[e, 0].add_patch(rect0)
        ax[e, 0].add_patch(rect1)
        ax[e, 0].get_xaxis().set_visible(False)
        ax[e, 0].get_yaxis().set_visible(False)
        ax[e, 0].invert_yaxis()

        for r, rep in enumerate(reps):
            print(env, rep)
            train_test_array = []
            id_array = []
            # get training
            test_dummy = np.zeros(25000)
            test_dummy[:] = np.nan
            train_dummy = np.zeros(5000)
            train_dummy[:] = np.nan

            train_ids = list(tr_gb.get_group((env, rep, 'x')))
            for i, id_num in enumerate(train_ids):
                with open(data_dir + f'{id_num}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    raw_score = dats['total_reward'][0:5000]
                    normalization = analysis_specs['avg_max_rwd'][env + '1']
                    training_transformed = (np.asarray(raw_score) +
                                            2.5) / (normalization + 2.5)

                run_info = np.concatenate((training_transformed, test_dummy))
                id_array.append(id_num)
                train_test_array.append(run_info)
            print('training')
            # get testing
            test_ids = list(df_gb.get_group((env, rep, 'x')))
            for i, id_num in enumerate(test_ids):
                with open(data_dir + f'{id_num}_data.p', 'rb') as f:
                    dats = pickle.load(f)
                    raw_score = dats['total_reward']
                    normalization = analysis_specs['avg_max_rwd'][env + '1']
                    testing_transformed = (np.asarray(raw_score) +
                                           2.5) / (normalization + 2.5)

                run_info = np.concatenate((train_dummy, testing_transformed))
                id_array.append(id_num)
                train_test_array.append(run_info)
            print('testing')

            mean_perf = rm(np.nanmean(train_test_array, axis=0), 200)
            std_perf = rm(np.nanstd(train_test_array, axis=0), 200) / np.sqrt(
                len(train_test_array))
            mins = mean_perf - std_perf
            maxes = mean_perf + std_perf

            ax[e, 1].plot(np.arange(len(mean_perf)),
                          mean_perf,
                          color=LINCLAB_COLS[rep_to_col[rep]],
                          label=labels_for_plot[rep])
            ax[e, 1].fill_between(np.arange(len(mean_perf)),
                                  mins,
                                  maxes,
                                  color=LINCLAB_COLS[rep_to_col[rep]],
                                  alpha=0.2)
            ax[e, 1].set_ylim(0, 1.1)
            ax[e, 1].set_yticks([0, 1])
            ax[e, 1].set_yticklabels([0, 100])
            ax[e, 1].axvline(x=4801, linestyle=":", color='gray')
            ax[e, 1].tick_params(axis='both', which='major', labelsize=8)
        ax[e, 1].set_ylabel('Performance \n(% Optimal)', fontsize=ftsz)

    ax[e, 1].set_xlabel('Episodes', fontsize=ftsz)

    ax[0, 1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1))
    plt.savefig('../figures/CH1/conv_net_retrain.svg')
    plt.show()
Пример #22
0
def compare_conv_fc(env, conv_gb, fc_gb):
    fig, ax = plt.subplots(1, 2, sharex='col')
    if env[-1] == '5':
        rwd_colrow0 = (3, 9)
        rwd_colrow1 = (16, 9)
    else:
        rwd_colrow0 = (5, 5)
        rwd_colrow1 = (14, 14)

    rect0 = plt.Rectangle(rwd_colrow0,
                          1,
                          1,
                          facecolor='b',
                          edgecolor=None,
                          alpha=0.3)
    rect1 = plt.Rectangle(rwd_colrow1,
                          1,
                          1,
                          facecolor='g',
                          edgecolor=None,
                          alpha=0.3)
    ax[0].pcolor(grids[envs.index(env)],
                 cmap='bone_r',
                 edgecolors='k',
                 linewidths=0.1)
    ax[0].axis(xmin=0, xmax=20, ymin=0, ymax=20)
    ax[0].set_aspect('equal')
    ax[0].add_patch(rect0)
    #ax[0].add_patch(rect1)
    ax[0].get_xaxis().set_visible(False)
    ax[0].get_yaxis().set_visible(False)
    ax[0].invert_yaxis()

    # show CONV Results
    id_list = list(conv_gb.get_group((env, 'reward_conv')))
    train_array = []
    for i, id_num in enumerate(id_list):
        with open(data_dir + f'{id_num}_data.p', 'rb') as f:
            dats = pickle.load(f)

            raw_score = dats['total_reward']  #[0:5000]
            print(len(raw_score))
            normalization = analysis_specs['avg_max_rwd'][env + '1']
            training_transformed = rm(
                (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
            train_array.append(training_transformed)
            print('done', id_num)

    mean_perf = np.mean(train_array, axis=0)
    std_perf = np.std(train_array, axis=0) / np.sqrt(len(train_array))
    mins = mean_perf - std_perf
    maxes = mean_perf + std_perf

    ax[1].plot(np.arange(len(mean_perf)),
               mean_perf,
               color="gray",
               label='Image (Conv)')
    ax[1].fill_between(np.arange(len(mean_perf)),
                       mins,
                       maxes,
                       color='gray',
                       alpha=0.2)

    # show FC Results
    for rep in ['unstructured', 'structured']:
        id_list = list(fc_gb.get_group((env, rep)))
        train_array = []
        filler = np.zeros(10000)
        filler[:] = np.nan
        for i, id_num in enumerate(id_list):
            with open(data_dir + f'{id_num}_data.p', 'rb') as f:
                dats = pickle.load(f)
                raw_score = dats['total_reward'][5000:-1]
                if len(raw_score) == 9999:
                    raw_score += list(filler)
                print(len(raw_score))
                normalization = analysis_specs['avg_max_rwd'][env + '1']
                training_transformed = rm(
                    (np.asarray(raw_score) + 2.5) / (normalization + 2.5), 200)
                train_array.append(training_transformed)
                print('done', id_num)

        mean_perf = np.nanmean(train_array, axis=0)
        std_perf = np.nanstd(train_array, axis=0) / np.sqrt(len(train_array))
        mins = mean_perf - std_perf
        maxes = mean_perf + std_perf

        ax[1].plot(np.arange(len(mean_perf)),
                   mean_perf,
                   color=LINCLAB_COLS[rep_to_col[rep]],
                   label=f'{rep} (FC)')
        ax[1].fill_between(np.arange(len(mean_perf)),
                           mins,
                           maxes,
                           color=LINCLAB_COLS[rep_to_col[rep]],
                           alpha=0.2)

    ax[1].set_ylim(0, 1.1)
    ax[1].set_yticks([0, 1])
    ax[1].set_yticklabels([0, 100])
    ax[1].set_ylabel('Performance \n(% Optimal)')
    ax[1].set_xlabel('Episodes')
    ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.1))
    plt.savefig(f'../figures/CH1/compare_conv_fc{env[-2:]}.svg')
    plt.show()