def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(normalize(ASTEnv(simulator=sim,
                                                 reward_function=reward_function,
                                                 spaces=spaces,
                                                 **env_args
                                                 )))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)}

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(
                        algo=algo,
                        env=env,
                        sampler_cls=sampler_cls,
                        sampler_args={"open_loop": False,
                                      "sim": sim,
                                      "reward_function": reward_function,
                                      'n_envs': n_parallel})

                    # Run the experiment
                    local_runner.train(**runner_args)
Exemplo n.º 2
0
def run_task(snapshot_config, *_):

    with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner:

        # Instantiate the example classes
        sim = ExampleAVSimulator()
        reward_function = ExampleAVReward()
        spaces = ExampleAVSpaces()

        # Create the environment
        env = TfEnv(
            normalize(
                ASTEnv(blackbox_sim_state=True,
                       fixed_init_state=True,
                       s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                       simulator=sim,
                       reward_function=reward_function,
                       spaces=spaces)))

        # Instantiate the garage objects
        policy = GaussianLSTMPolicy(name='lstm_policy',
                                    env_spec=env.spec,
                                    hidden_dim=64)

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(env_spec=env.spec,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    discount=0.99,
                    kl_constraint='soft',
                    max_kl_step=0.01)

        sampler_cls = ASTVectorizedSampler

        runner.setup(algo=algo,
                     env=env,
                     sampler_cls=sampler_cls,
                     sampler_args={
                         "sim": sim,
                         "reward_function": reward_function
                     })

        runner.train(n_epochs=1, batch_size=4000, plot=False)

        print("Installation successfully validated")
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args)
                    env = TfEnv(env)

                    policy = ContinuousMLPPolicy(name='ast_agent',
                                                 env_spec=env.spec,
                                                 **policy_args)

                    params = policy.get_params()
                    sess.run(tf.variables_initializer(params))

                    # Instantiate the garage objects
                    baseline = ZeroBaseline(env_spec=env.spec)

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args = {
                        "open_loop": False,
                        "sim": sim,
                        "reward_function": reward_function,
                        "n_envs": n_parallel
                    }

                    if ga_type == 'ga':
                        print('ga')
                        algo = GA(env_spec=env.spec,
                                  policy=policy,
                                  baseline=baseline,
                                  top_paths=top_paths,
                                  **algo_args)
                    elif ga_type == 'gasm':
                        print('gasm')
                        algo = GASM(env_spec=env.spec,
                                    policy=policy,
                                    baseline=baseline,
                                    top_paths=top_paths,
                                    **algo_args)
                    else:
                        raise NotImplementedError

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args)

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    if mcts_type == 'mcts':
                        print('mcts')
                        algo = MCTS(env=env, top_paths=top_paths, **algo_args)
                    elif mcts_type == 'mctsbv':
                        print('mctsbv')
                        algo = MCTSBV(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    elif mcts_type == 'mctsrs':
                        print('mctsrs')
                        algo = MCTSRS(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    else:
                        raise NotImplementedError

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args={
                                           "open_loop": False,
                                           "sim": sim,
                                           "reward_function": reward_function,
                                           "n_envs": n_parallel
                                       })

                    # Run the experiment
                    local_runner.train(**runner_args)

                    log_dir = run_experiment_args['log_dir']
                    with open(log_dir + '/best_actions.p', 'rb') as f:
                        best_actions = pickle.load(f)
                    expert_trajectories = []
                    for actions in best_actions:
                        sim.reset(s_0=env_args['s_0'])
                        path = []
                        for action in actions:
                            obs = sim.step(action)
                            state = sim.clone_state()
                            reward = reward_function.give_reward(
                                action=action, info=sim.get_reward_info())
                            path.append({
                                'state': state,
                                'reward': reward,
                                'action': action,
                                'observation': obs
                            })
                        expert_trajectories.append(path)
                    with open(log_dir + '/expert_trajectory.p', 'wb') as f:
                        pickle.dump(expert_trajectories, f)
Exemplo n.º 5
0
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args
                                 )

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    if mcts_type == 'mcts':
                        print('mcts')
                        algo = MCTS(env=env,
                                    top_paths=top_paths,
                                    **algo_args)
                    elif mcts_type == 'mctsbv':
                        print('mctsbv')
                        algo = MCTSBV(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    elif mcts_type == 'mctsrs':
                        print('mctsrs')
                        algo = MCTSRS(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    else:
                        raise NotImplementedError

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    log_dir = run_experiment_args['log_dir']

                    if save_expert_trajectory:
                        load_convert_and_save_mcts_expert_trajectory(
                            best_actions_filename=log_dir + '/best_actions.p',
                            expert_trajectory_filename=log_dir + '/expert_trajectory.p',
                            sim=sim,
                            s_0=env_args['s_0'],
                            reward_function=reward_function)
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(
                        normalize(
                            ASTEnv(simulator=sim,
                                   reward_function=reward_function,
                                   spaces=spaces,
                                   **env_args)))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    if save_expert_trajectory:
                        load_convert_and_save_drl_expert_trajectory(
                            last_iter_filename=os.path.join(
                                run_experiment_args['log_dir'], 'itr_' +
                                str(runner_args['n_epochs'] - 1) + '.pkl'),
                            expert_trajectory_filename=os.path.join(
                                run_experiment_args['log_dir'],
                                'expert_trajectory.pkl'))

                    print('done!')