Exemplo n.º 1
0
def test_worker(args, shared_model, total_steps, optimizer):
    args.environment.clip_rewards = False
    env = make_env(args)

    log_path = '{}/{}'.format(args.train.experiment_path, 'log.txt')
    logging.basicConfig(filename=log_path, level=logging.INFO)
    logging.info("STARTED TRAINING PROCESS {}".format(
        time.strftime("%Y.%m.%d_%H:%M", time.localtime())))

    model = ActorCritic(env.observation_space.shape, env.action_space.n)
    model.eval()

    start_time = time.time()

    reward_history = []
    while True:
        model.load_state_dict(shared_model.state_dict())
        if (len(reward_history) + 1) % args.train.save_frequency == 0:
            save_progress(args, model, optimizer, total_steps.value)
        total_reward, _ = play_game(model, env)
        reward_history.append(total_reward)

        log_message = "Time {}, num steps {}, FPS {:.0f}, curr episode reward {}, mean episode reward: {}".format(
            time.strftime("%Hh %Mm %Ss",
                          time.gmtime(time.time() - start_time)),
            total_steps.value,
            total_steps.value / (time.time() - start_time),
            total_reward,
            np.mean(reward_history[-60:]),
        )
        print(log_message)
        logging.info(log_message)
        time.sleep(60)
Exemplo n.º 2
0
def test_worker(args, shared_model, total_steps, optimizer):
    args.environment.clip_rewards = False
    env = make_env(args.environment)

    log_path = '{}/{}'.format(args.train.experiment_folder, 'log.txt')
    logging.basicConfig(filename=log_path, level=logging.INFO)
    logging.info("STARTED TRAINING PROCESS {}".format(time.strftime("%Y.%m.%d_%H:%M", time.localtime())))

    model = ActorCritic(env.observation_space.shape, env.action_space.n)
    model = BaseWrapper(model)
    if (args.train.use_pixel_control or
            args.train.use_reward_prediction):
        model = ExperienceWrapper(model)
    if args.train.use_pixel_control:
        model = PixelControlWrapper(model, args.train.gamma, args.train.pc_coef)
    if args.train.use_reward_prediction:
        model = RewardPredictionWrapper(model, args.train.rp_coef)
    if args.train.use_value_replay:
        model = ValueReplayWrapper(model)
    model.config = args
    model.eval()

    start_time = time.time()

    reward_history = []
    while True:
        model.load_state_dict(shared_model.state_dict())
        if (len(reward_history) + 1) % args.train.save_frequency == 0:
            save_progress(args, model, optimizer, total_steps.value)
        stats = play_game(model, env)
        reward_history.append(stats['total_reward'])

        log_message = (
                'Time {}, num steps {}, FPS {:.0f}, '+
                'curr episode reward {:.2f}, mean episode reward: {:.2f}, '+
                'mean policy loss {:.2f}, mean value loss {:.2f}, '+
                'mean entropy percentage {:.2f}'
            ).format(
            time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)),
            total_steps.value,
            total_steps.value / (time.time() - start_time),
            stats['total_reward'],
            np.mean(reward_history[-60:]),
            stats['policy_loss'],
            stats['value_loss'],
            stats['entropy']
        )
        if args.train.use_pixel_control:
            log_message += ', pixel control loss %.2f' %stats['pc_loss']
        if args.train.use_reward_prediction:
            log_message += ', reward prediction loss %.2f' %stats['rp_loss']
        if args.train.use_value_replay:
            log_message += ', value replay loss %.2f' %stats['vr_loss']
        print(log_message)
        logging.info(log_message)
        time.sleep(60)
Exemplo n.º 3
0
parser = argparse.ArgumentParser(description='A3C')
parser.add_argument('--env-name',
                    default='SpaceInvaders-v0',
                    help='environment to train on (default: SpaceInvaders-v0)')
parser.add_argument('--logs-path',
                    default="./log.txt",
                    help='path to logs (default: `./log.txt`)')
parser.add_argument(
    '--pretrained-weights',
    default=None,
    help='path to pretrained weights (default: None – evaluate random model)')

if __name__ == '__main__':
    cmd_args = parser.parse_args()
    logging.basicConfig(filename=cmd_args.logs_path, level=logging.INFO)

    env = make_atari(cmd_args.env_name, clip=False)
    model = ActorCritic(env.observation_space.shape[0], env.action_space.n)
    if cmd_args.pretrained_weights is not None:
        model.load_weights(cmd_args.pretrained_weights)
    else:
        print(
            "You have not specified path to model weigths, random plays will be performed"
        )
    model.eval()
    results = record_video(model, env)
    log_message = "evaluated on pretrained weights: {}, results: {}".format(
        cmd_args.pretrained_weights, results)
    print(log_message)
    logging.info(log_message)