Exemplo n.º 1
0
from basic.modules.Agents import Agent
from basic.modules.Experiments import flat_expt
sys.path.append('../../../')


write_to_file = 'flat_ac_training.csv'
version = 1
env_name = f'gridworld:gridworld-v{version}'
representation_type = 'latent'
num_trials = 25000
num_events = 250


# make gym environment
env = gym.make(env_name)
plt.close()

state_reps, representation_name, input_dims, _ = rep_types[representation_type](env)



for _ in range(1):
    empty_net = head_AC(input_dims, env.action_space.n, lr=0.0005)
    agent = Agent(empty_net, state_representations=state_reps)

    ex = flat_expt(agent, env)
    ex.run(num_trials,num_events,snapshot_logging=True)
    ex.record_log(env_name=env_name, representation_type=representation_name,
                  n_trials=num_trials, n_steps=num_events,
                  dir='../../Data/', file=write_to_file, mock_log=True)
Exemplo n.º 2
0
import basic.modules.Utils.gridworld_plotting as gp
import matplotlib.pyplot as plt
import torch
# Make Environment to Test Agent in
env_name = 'gridworld:gridworld-v1'
expt_type = 'MF'
env = gym.make(env_name)

# get parameters to specific Model Free Network
params = basic_agent_params(env)
# build network & memory modules
network = nets.ActorCritic(params)
memory = Memory(entry_size=params.action_dims, cache_limit=400)

# construct agent instance with network and memory modules
agent = Agent(network, memory=memory)

# choose action controller
if expt_type == 'EC':
    agent.get_action = agent.EC_action
elif expt_type == 'MF':
    agent.get_action = agent.MF_action

# instantiate experiment
run = ex(agent, env)
print(env.obstacles_list)
#run.run(5000,250, printfreq=10, render=False)
#run.record_log(expt_type, env_name, n_trials=5000, dir='../../Data/', file='test_environments.csv')
'''
# show examples from
state_obs = []
Exemplo n.º 3
0
num_trials = 25000
num_events = 250
relative_path_to_data = '../../Data'  # ../../Data if you are in Tests/CH2

# make gym environment
env = gym.make(env_name)
plt.close()

rep_types = {
    'onehot': onehot,
    'random': random,
    'place_cell': place_cell,
    'sr': sr,
    'latent': saved_latents
}
state_reps, representation_name, input_dims, _ = rep_types[
    representation_type](env)

# load weights to head_ac network from previously learned agent
AC_head_agent = head_AC(input_dims, test_env.action_space.n, lr=0.0005)

agent = Agent(AC_head_agent, state_representations=state_reps)

ex = flat_expt(agent, test_env)
ex.run(num_trials, num_events, snapshot_logging=False)
ex.record_log(env_name=test_env_name,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file)
Exemplo n.º 4
0

for _ in range(1):
    ## generate the environment object
    env = gym.make(env_name)
    plt.close()

    ## get state representations to be used
    state_reps, representation_name, input_dims, _ = rep_types[representation_type](env)



    ## create an actor-critic network and associated agent
    network = Network(input_dims=[input_dims], fc1_dims=200, fc2_dims=200, output_dims=env.action_space.n, lr=0.0005)
    memory = Memory(entry_size=env.action_space.n, cache_limit=400, mem_temp=1)
    agent = Agent(network, state_representations=state_reps, memory=memory)

    # create an experiment class instance
    ex = expt(agent, env)

    ex.run(num_trials, num_events)

    ex.record_log(env_name=env_name, representation_type=representation_name,
                  n_trials=num_trials, n_steps=num_events,
                  dir='../../Data/', file=write_to_file)
'''
# print results of training
fig, ax = plt.subplots(2,1, sharex=True)
ax[0].plot(ex.data['total_reward'])
ax[1].plot(ex.data['loss'][0], label='P_loss')
ax[1].plot(ex.data['loss'][1], label='V_loss')
Exemplo n.º 5
0
num_events = 250

# valid representation types for this experiment
rep_types = {'conv': convs, 'rwd_conv': reward_convs}
param_set = {'conv': conv_PO_params, 'rwd_conv': conv_FO_params}

# instantiate the environment for the experiment
env = gym.make(env_name)
plt.close()

# get representation type, associated parameters to specify the network dimensions
state_reps, representation_name, input_dims, _ = rep_types[
    representation_type](env)
params = param_set[representation_type]
network_parameters = params(env)

# make a new network instance
network = Network(network_parameters)
# reinitalize agent with new network
agent = Agent(network, state_representations=state_reps)

# expt - redefines logging function to keep track of network details
ex = conv_expt(agent, env)
ex.run(num_trials, num_events, printfreq=10)
ex.record_log(env_name=env_name,
              representation_type=representation_name,
              n_trials=num_trials,
              n_steps=num_events,
              dir=relative_path_to_data,
              file=write_to_file)
Exemplo n.º 6
0
env_name = 'gym_grid:gridworld-v1'

# create environment
env = gym.make(env_name)
plt.close()

network_params = nets.fc_params(env)
network_params.lr = 0.005
print(network_params.__dict__)
network = nets.ActorCritic(network_params)

# generate agent
ntrials = 20000
nevents = 250
agent = Agent(network, None)
run = expt(agent, env)

run.run(NUM_TRIALS=ntrials, NUM_EVENTS=nevents, printfreq=10, snap=False)
run.record_log(dir='../../../Data/',
               file='actor_critic_learning.csv',
               expt_type='MFonly',
               env_name=env_name,
               n_trials=ntrials,
               n_steps=nevents)
plt.figure()
plt.plot(run.data['total_reward'])
"""
plt.figure()
ac_pref = pref_ac_action(run.data['P_snap'][-1])
plt.imshow(ac_pref)