示例#1
0
def setup(config: str, load_file: str) -> DQNAgent:
    if config:
        loader = AgentLoader(config, num_actions=num_actions, num_inputs=num_inputs)
        agent = loader.load()
    else:
        agent = DQNAgent(num_actions=num_actions, num_inputs=num_inputs)
    if load_file:
        print(f'Loading "{load_file}"...')
        agent.load(load_file)
    return agent
示例#2
0
        output = self.l5(h4)
        return output


net = Linear()

print('Initializing the learner...')
learner = Learner(settings)
learner.load_net(net)

print('Initializing the agent framework...')
agent = DQNAgent(settings)

print('Training...')
agent.train(learner, memory, simulator)

print('Loading the net...')
learner = agent.load(settings['save_dir'] + '/learner_final.p')

ind_max = learner.val_rewards.index(max(learner.val_rewards))
ind_net = settings['initial_exploration'] + ind_max * settings['eval_every']
agent.load_net(learner, settings['save_dir'] + '/net_%d.p' % int(ind_net))

np.random.seed(settings["seed_general"])

print('Evaluating DQN agent...')
print('(reward, MSE loss, mean Q-value, episodes - NA, time)')
reward, MSE_loss, mean_Q_value, episodes, time, paths, actions, rewards = agent.evaluate(
    learner, simulator, 50000)
print(reward, MSE_loss, mean_Q_value, episodes, time)