Exemplo n.º 1
0
reward_table = OrderedDict({})
resolution = (84, 84)

episodes = 500
gamma = 0.99
learning_rate = 0.0002

print('Initialising Doom...')
doom = VizdoomWrapper(config_path=config_path,
                      reward_table=reward_table,
                      frame_resolution=resolution,
                      show_mode=False,
                      frame_stack=4)

doomguy = PolicyGradientAgent(doom.get_state_size(),
                              doom.get_action_size(),
                              learning_rate,
                              gamma,
                              save_path=model_path)

if load_pretrained_network:
    doomguy.load_model()

if train_network:
    for episode in range(episodes):
        print('Episode', episode)
        doom.new_game()
        done = False
        step = 0

        while not done:
Exemplo n.º 2
0
episodes_to_watch = 2

print('Initialising VizDoom...')
config_path = 'scenarios/basic.cfg'
actor_path = 'models/defend_the_center_actor.hd5'
critic_path = 'models/defend_the_center_critic.hd5'
reward_table = OrderedDict({'FRAGCOUNT': 1})
resolution = (90, 60)
doom = VizdoomWrapper(config_path=config_path,
                      reward_table=reward_table,
                      frame_resolution=resolution,
                      show_mode=True,
                      frame_stack=1)

print('Initialising Doomguy...')
doomguy = BaseQDoom(doom.get_state_size(), doom.get_action_size())
doomguy.load_model()

for _ in range(episodes_to_watch):
    done = False
    doom.new_game()
    while not done:
        state = doom.get_current_state()
        best_action = doomguy.act(state)
        done = doom.set_action(best_action)

    # Sleep between episodes
    sleep(1.0)
    score = doom.get_total_reward()
    print('Total score: {}'.format(score))