Пример #1
0
actor      = 'EC'
ntrials    = 5000


# create environment
env = gym.make(env_name)
plt.close()

# generate network
if network_id == None:
    # generate parameters for network from environment observation shape
    params = nets.fc_params(env)
    network = nets.ActorCritic(params)
else:
    network = torch.load(f'./Data/agents/load_agents/{network_id}.pt')

memory = Memory.EpisodicMemory(cache_limit=400, entry_size=env.action_space.n)

agent = Agent(network, memory=memory)

if actor == 'MF':
    agent.get_action = agent.MF_action
elif actor == 'EC':
    agent.get_action = agent.EC_action

run = expt(agent, env)
run.run(NUM_TRIALS=ntrials, NUM_EVENTS=250)
run.record_log(f'{actor}', env_name, n_trials=ntrials)


Пример #2
0
        25: 96
    },
    'gridworld:gridworld-v51': {
        100: 286,
        75: 214,
        50: 143,
        25: 71
    }
}
cache_size_for_env = int(cache_limits[test_env][100] * (cache_size / 100))
memory = Memory(entry_size=env.action_space.n,
                cache_limit=cache_size_for_env,
                distance=distance_metric)

# reinitalize agent with new network
agent = Agent(network, memory, state_representations=latent_state_reps)

#verify_env = gym.make(env_name)
#ver_ex = expt(agent,verify_env)

# expt - redefines logging function to keep track of network details
ex = expt(agent, env)
ex.run(num_trials, num_events)
ex.record_log(env_name=test_env,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file,
              load_from=agent_id)
pc_state_reps = {}
oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))
    pc_state_reps[env.twoD2oneD(state)] = place_cells.get_activities([state
                                                                      ])[0]

place_cells.plot_placefields(env_states_to_map=env.useable)

#oh_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
#oh_network = torch.load(data_dir+f'agents/{load_id}.pt')
#oh_agent = Agent(oh_network, state_representations=oh_state_reps)

#pc_network = Network(input_dims=[input_dims],fc1_dims=200,fc2_dims=200,output_dims=env.action_space.n, lr=0.0005)
pc_network = torch.load(data_dir + f'agents/{load_id}.pt')
pc_agent = Agent(pc_network,
                 state_representations=pc_state_reps,
                 memory=memory)
pc_agent.get_action = pc_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(pc_agent, env)
ntrials = 2000
nsteps = 250
#ex.run(ntrials, nsteps, printfreq=1)
#ex.data['place_cells'] = place_cells
#ex.record_log('pc_episodic',env_name,ntrials,nsteps, dir=data_dir,file='ac_representation.csv')
# save place cells
Пример #4
0
top_layer_dict = {}
top_layer_dict['pol.weight'] = state_dict['output.0.weight']
top_layer_dict['pol.bias'] = state_dict['output.0.bias']
top_layer_dict['val.weight'] = state_dict['output.1.weight']
top_layer_dict['val.bias'] = state_dict['output.1.bias']
AC_head_agent.load_state_dict(top_layer_dict)

# get state inputs
h0, h1 = get_net_activity(env_name, rep_type, net)
state_reps, representation_name, = h1, f'h1_{rep_type}_latents'

memory = None  #Memory.EpisodicMemory(cache_limit=cache_size_for_env, entry_size=env.action_space.n)

agent = Agent(AC_head_agent, memory=memory, state_representations=state_reps)

run = expt(agent, env)
run.run(NUM_TRIALS=num_trials, NUM_EVENTS=num_events)

test_env_name = env_name + '1'
test_env = gym.make(test_env_name)
plt.close()
print(test_env.rewards)
# get test env state inputs
h0, h1 = get_net_activity(test_env_name, rep_type, net)
state_reps, representation_name, = h1, f'h1_{rep_type}_latents'
# update agent with new state_reps
agent.state_reps = state_reps

test_run = expt(agent, test_env)
test_run.data = run.data
test_run.run(NUM_TRIALS=num_trials * 2, NUM_EVENTS=num_events)
Пример #5
0
    memory = Memory.EpisodicMemory(cache_limit=400,
                                   entry_size=env.action_space.n,
                                   mem_temp=memtemp)

    agent = Agent(network, memory)
    #agent.get_action = agent.MF_action

    opt_values = np.zeros(env.nstates)
    reward_loc = env.twoD2oneD(list(env.rewards.keys())[0])
    opt_values[reward_loc] = list(env.rewards.values())[0]

    for ind in reversed(range(len(opt_values) - 1)):
        opt_values[ind] = env.step_penalization + agent.gamma * opt_values[ind
                                                                           + 1]

    ntrials = 1000
    nevents = 50
    run = expt(environment=env, agent=agent)

    run.run(ntrials, nevents, printfreq=10)
    run.data['opt_values'] = opt_values

    run.record_log(dir='../../../Data/',
                   file='linear_track.csv',
                   expt_type=f'{type(run).__name__}_lr{params.lr}',
                   env_name=env_name,
                   n_trials=ntrials,
                   n_steps=nevents,
                   extra=[params.lr])
#plt.plot(rm(run.data['total_reward'],10))
#plt.show()
Пример #6
0
oh_state_reps = {}
for state in env.useable:
    oh_state_reps[env.twoD2oneD(state)] = one_hot_state(env.twoD2oneD(state))

oh_network = Network(input_dims=[input_dims],
                     fc1_dims=200,
                     fc2_dims=200,
                     output_dims=env.action_space.n,
                     lr=0.0005)
oh_network = torch.load(data_dir + f'agents/{load_id}.pt')
oh_agent = Agent(oh_network,
                 state_representations=oh_state_reps,
                 memory=memory)
oh_agent.get_action = oh_agent.EC_action

# retraining
env.set_reward({(15, 15): 10})

ex = expt(oh_agent, env)
ntrials = 2000
nsteps = 250
ex.run(ntrials, nsteps, printfreq=1)
ex.record_log('oh_episodic',
              env_name,
              ntrials,
              nsteps,
              dir=data_dir,
              file='ac_representation.csv')
# save place cells