示例#1
0
def main():

    params = {
        'actor_learning_rate': 1e-4,
        'critic_learning_rate': 1e-3,
        'gamma': 0.99,
        'tau': 0.001,
        'sigma': 0.2,
        'num_epochs': 500,
        'num_episodes': 20,
        'replay_size': 1000000,
        'num_train_steps': 50,
        'replay_init_size': 1000,
        'batch_size': 64,
        'render_train': False,
        'restore': False,
        'env': 'HalfCheetah-v2'
    }

    agent = DDPG(params)
    #agent.train()
    agent.test()
示例#2
0
def test(agent, trial_dir, test_episode, visual_flag, submit_flag):
    pid = os.getpid()
    logger, _ = prepare_for_logging("pid_{}".format(pid), False)

    logger.info("trial_dir={}".format(trial_dir))
    if not os.path.exists(trial_dir):
        logger.info("trial_dir does not exist")
        return

    # create environment
    env = NIPS(visualize=visual_flag)

    # load config
    with open(os.path.join(trial_dir, "config.pk"), "rb") as f:
        config = pickle.load(f)

    if agent == 'DDPG':
        config["scale_action"] = scale_action

        # observation processor
        if "ob_processor" not in config or config["ob_processor"] == "dummy":
            ob_processor = ObservationProcessor()
        elif config["ob_processor"] == "2ndorder":
            ob_processor = SecondOrderAugmentor()
        else:
            ob_processor = BodySpeedAugmentor()
        config["ob_aug_dim"] = ob_processor.get_aug_dim()
        util.print_settings(logger, config, env)

        # create random process
        oup = create_rand_process(env, config)

        # create replay buffer
        memory = create_memory(env, config)

        # create ddpg agent
        agent = DDPG(env, memory, oup, ob_processor, config)
        agent.build_nets(actor_hiddens=config["actor_hiddens"],
                         scale_action=config["scale_action"],
                         critic_hiddens=config["critic_hiddens"])

        # load weights
        paths = {}
        if test_episode > 0:
            paths["actor"] = "actor_{}.h5".format(test_episode)
            paths["critic"] = "critic_{}.h5".format(test_episode)
            paths["target"] = "target_{}.h5".format(test_episode)
        else:
            paths["actor"] = "actor.h5"
            paths["critic"] = "critic.h5"
            paths["target"] = "target.h5"
        paths = {k: os.path.join(trial_dir, v) for k, v in paths.iteritems()}
        logger.info("Paths to models: {}".format(paths))
        agent.load_models(paths)

    elif agent == 'TRPO':

        def ob_processor_maker():
            if config["ob_processor"] == "normal":
                return ObservationProcessor()
            elif config["ob_processor"] == "2ndorder":
                return SecondOrderAugmentor()
            elif config['ob_processor'] == 'bodyspeed':
                return BodySpeedAugmentor()
            else:
                raise ValueError('invalid ob processor type')

        config = {
            "agent": 'TRPO',
            "batch_size": 5000,
            "n_envs": 16,
            "n_iters": 5000,
            "ob_processor": "bodyspeed",
            # "hidden_nonlinearity": "relu",
            # "action_nonlinearity": "tanh",
            # "policy_hiddens": [128, 128, 64, 64],
            # "baseline_hiddens": [128, 128, 64, 64],
            "policy_hiddens": [256, 128, 64],
            "baseline_hiddens": [256, 128, 64],
            "hidden_nonlinearity": "tanh",
            "action_nonlinearity": None,
        }

        agent = TRPO(
            env,
            env_maker=None,
            logger=logger,
            log_dir=None,
            ob_processor_maker=ob_processor_maker,
            policy_hiddens=config['policy_hiddens'],
            baseline_hiddens=config['baseline_hiddens'],
            hidden_nonlinearity=config['hidden_nonlinearity'],
            action_nonlinearity=config['action_nonlinearity'],
            n_envs=config['n_envs'],
            batch_size=config['batch_size'],
            n_iters=config['n_iters'],
        )
        agent.load_models(trial_dir)
    else:
        raise ValueError('invalid agent type')

    if submit_flag:
        submit(agent, logger)
    else:
        rewards = []
        for i in xrange(10):
            steps, reward = agent.test(max_steps=1000)
            logger.info("episode={}, steps={}, reward={}".format(
                i, steps, reward))
            rewards.append(reward)
        logger.info("avg_reward={}".format(np.mean(rewards)))