示例#1
0
def autoplay(method, environment, resume, render):
    game = Game(name=environments_to_names[environment], render=render)

    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape, n_actions, environment, 1, 1, eps_start=0.0)
    agent.load(resume)

    log.info(f'Evaluating agent, loaded from {resume}, starting ...')

    game.reset()
    state = game.get_state()
    for t in count():
        state = game.get_state()
        action = agent.select_action(state)
        transition, done = game.step(int(action.cpu().numpy()))
        # agent.eval(
        #     transition, 1, 0.0)
        time.sleep(0.1)
        if done:
            log.info(f'agent survived {t} steps')
            game.reset()
            break

    game.env.close()
示例#2
0
def train(method, environment, resume, episodes, lr, lr_episodes, min_lr,
          eval_only, replay_width, batch_size, gamma, update_rate,
          save_interval):

    history = History(method + '_' + environment,
                      ['steps', 'avg_reward', 'loss'], resume is not None)
    history.flush()
    memory = ReplayMemory(replay_width)
    game = Game(name=environments_to_names[environment],
                memory=memory,
                render=False)
    init_state, state_shape = game.get_state(True)
    n_actions = game.env.action_space.n
    agent_cls = agent_factory[method]
    agent = agent_cls(state_shape,
                      n_actions,
                      environment,
                      episodes,
                      update_rate,
                      step_size=lr_episodes,
                      lr=lr,
                      save_interval=save_interval)

    # resume from a ckpt
    if resume is not None:
        agent.load(resume)

    avg_reward = MovingAverage(100)
    avg_loss = MovingAverage(100)

    log.info(f'Training with {episodes}, starting ...')

    # main training loop
    for i in range(episodes):
        state = game.reset()
        done = False
        loss = None
        while not done:
            state = game.state
            action = agent.select_action(state)

            transition, done = game.step(int(action.to('cpu').numpy()))

            if len(memory) > batch_size:
                batched = memory.sample(batch_size)
                loss = agent.train(batched, batch_size, gamma, i)
                avg_loss.add(loss)
        reward = game.rewards
        # agent.save_best(reward)
        agent.save()
        agent.scheduler.step()
        avg_reward.add(reward)

        # moving averages
        text = [
            f'steps: {agent.step_cnt}',
            f'game epochs: {i}/{episodes}',
            f'train loss: {float(avg_loss):.5}',
            f'avg reward: {float(avg_reward):.5}',
            # f'best reward: {float(agent.best_reward):.5}',
            f'reward: {float(reward):.5}',
            f'epsilon: {agent.epsilon:.3}',
        ]
        log.info(', '.join(text), update=True)
        if agent.step_cnt % save_interval == 0:
            history.record({
                'steps': agent.step_cnt,
                'avg_reward': float(avg_reward),
                'loss': float(avg_loss),
            })

    game.env.close()