Exemplo n.º 1
0
def train(args, extra_args):
    env_type, env_id = run.get_env_type(args.env)

    if args.alg == 'gail':
        env_type += '_gail'
        args.alg = 'bgail'
    elif args.alg not in ['bgail', 'gail']:
        raise NotImplementedError

    learn = run.get_learn_function(args.alg)
    alg_kwargs = run.get_learn_function_defaults(args.alg, env_type)
    alg_kwargs.update(extra_args)

    env = build_env(args)
    logger.configure(os.path.join("log", "GAIL", args.env, "subsample_{}".format(extra_args["data_subsample_freq"]),
                                  "traj_{}".format(extra_args["num_expert_trajs"]), "batch_size_{}".format(extra_args["timesteps_per_batch"]),
                                  "seed_{}".format(args.seed)))

    print('Training {} on {}:{} with arguments \n{}'.format(args.alg, env_type, env_id, alg_kwargs))

    model = learn(env=env,
                  seed=args.seed,
                  save_path=args.save_path,
                  load_path=args.load_path,
                  render=args.render,
                  **alg_kwargs)
Exemplo n.º 2
0
def train(params_dict: dict):
    ncpu = multiprocessing.cpu_count()
    # ncpu = 1
    env_id = params_dict['env_params']['env']

    total_timesteps = float(params_dict['training_params']['num_timesteps'])

    learn = get_learn_function(params_dict['model_params']['alg'])
    alg_kwargs = get_learn_function_defaults(
        params_dict['model_params']['alg'], 'atari')
    alg_kwargs['network'] = params_dict['model_params']['network']
    alg_kwargs['lr'] = 0.0001

    if 'frame_stack' in params_dict['env_params'] and params_dict[
            'env_params']['frame_stack']:
        wrapper_kwargs = {'frame_stack': True}
    else:
        wrapper_kwargs = {}

    env = make_vec_env(env_id,
                       'atari',
                       ncpu,
                       seed=None,
                       wrapper_kwargs=wrapper_kwargs)

    # env = VecFrameStack(env, 4)

    model = learn(env=env,
                  seed=None,
                  total_timesteps=total_timesteps,
                  **alg_kwargs)

    return model, env
Exemplo n.º 3
0
def test_coexistence(learn_fn, network_fn):
    '''
    Test if more than one model can exist at a time
    '''

    if learn_fn == 'deepq':
            # TODO enable multiple DQN models to be useable at the same time
            # github issue https://github.com/openai/baselines/issues/656
            return

    if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']:
            # TODO make acktr work with recurrent policies
            # and test
            # github issue: https://github.com/openai/baselines/issues/660
            return

    env = DummyVecEnv([lambda: gym.make('CartPole-v0')])
    learn = get_learn_function(learn_fn)

    kwargs = {}
    kwargs.update(network_kwargs[network_fn])
    kwargs.update(learn_kwargs[learn_fn])

    learn =  partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs)
    make_session(make_default=True, graph=tf.Graph())
    model1 = learn(seed=1)
    make_session(make_default=True, graph=tf.Graph())
    model2 = learn(seed=2)

    model1.step(env.observation_space.sample())
    model2.step(env.observation_space.sample())
Exemplo n.º 4
0
def test_coexistence(learn_fn, network_fn):
    '''
    Test if more than one model can exist at a time
    '''

    if learn_fn == 'deepq':
            # TODO enable multiple DQN models to be useable at the same time
            # github issue https://github.com/openai/baselines/issues/656
            return

    if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']:
            # TODO make acktr work with recurrent policies
            # and test
            # github issue: https://github.com/openai/baselines/issues/660
            return

    env = DummyVecEnv([lambda: gym.make('CartPole-v0')])
    learn = get_learn_function(learn_fn)

    kwargs = {}
    kwargs.update(network_kwargs[network_fn])
    kwargs.update(learn_kwargs[learn_fn])

    learn =  partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs)
    make_session(make_default=True, graph=tf.Graph())
    model1 = learn(seed=1)
    make_session(make_default=True, graph=tf.Graph())
    model2 = learn(seed=2)

    model1.step(env.observation_space.sample())
    model2.step(env.observation_space.sample())
Exemplo n.º 5
0
def test_multidiscrete_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    env_fn = lambda: MultiDiscreteIdentityEnv((3, 3), episode_len=100)
    simple_test(env_fn, learn_fn, 0.9)
Exemplo n.º 6
0
def test_multidiscrete_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    env_fn = lambda: MultiDiscreteIdentityEnv((3,3), episode_len=100)
    simple_test(env_fn, learn_fn, 0.9)
def test_serialization(learn_fn, network_fn):
    '''
    Test if the trained model can be serialized
    '''

    if network_fn.endswith('lstm') and learn_fn in [
            'acer', 'acktr', 'trpo_mpi', 'deepq'
    ]:
        # TODO make acktr work with recurrent policies
        # and test
        # github issue: https://github.com/openai/baselines/issues/660
        return

    def make_env():
        env = MnistEnv(episode_len=100)
        env.seed(10)
        return env

    env = DummyVecEnv([make_env])
    ob = env.reset().copy()
    learn = get_learn_function(learn_fn)

    kwargs = {}
    kwargs.update(network_kwargs[network_fn])
    kwargs.update(learn_kwargs[learn_fn])

    learn = partial(learn, env=env, network=network_fn, seed=0, **kwargs)

    with tempfile.TemporaryDirectory() as td:
        model_path = os.path.join(td, 'serialization_test_model')

        with tf.Graph().as_default(), make_session().as_default():
            model = learn(total_timesteps=100)
            model.save(model_path)
            mean1, std1 = _get_action_stats(model, ob)
            variables_dict1 = _serialize_variables()

        with tf.Graph().as_default(), make_session().as_default():
            model = learn(total_timesteps=0, load_path=model_path)
            mean2, std2 = _get_action_stats(model, ob)
            variables_dict2 = _serialize_variables()

        for k, v in variables_dict1.items():
            np.testing.assert_allclose(
                v,
                variables_dict2[k],
                atol=0.01,
                err_msg='saved and loaded variable {} value mismatch'.format(
                    k))

        np.testing.assert_allclose(mean1, mean2, atol=0.5)
        np.testing.assert_allclose(std1, std2, atol=0.5)
Exemplo n.º 8
0
def test_continuous_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    to a required precision
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)
    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)

    env_fn = lambda: BoxIdentityEnv((1,), episode_len=100)
    simple_test(env_fn, learn_fn, -0.1)
Exemplo n.º 9
0
def test_continuous_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    to a required precision
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)
    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)

    env_fn = lambda: BoxIdentityEnv((1, ), episode_len=100)
    simple_test(env_fn, learn_fn, -0.1)
Exemplo n.º 10
0
def test_fixed_sequence(alg, rnn):
    '''
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    env_fn = lambda: FixedSequenceEnv(n_actions=10, episode_len=5)
    learn = lambda e: get_learn_function(alg)(env=e, network=rnn, **kwargs)

    simple_test(env_fn, learn, 0.7)
Exemplo n.º 11
0
def test_mnist(alg):
    '''
    Test if the algorithm can learn to classify MNIST digits.
    Uses CNN policy.
    '''

    learn_kwargs = learn_args[alg]
    learn_kwargs.update(common_kwargs)

    learn = get_learn_function(alg)
    learn_fn = lambda e: learn(env=e, **learn_kwargs)
    env_fn = lambda: MnistEnv(episode_len=100)

    simple_test(env_fn, learn_fn, 0.6)
Exemplo n.º 12
0
def test_mnist(alg):
    '''
    Test if the algorithm can learn to classify MNIST digits.
    Uses CNN policy.
    '''

    learn_kwargs = learn_args[alg]
    learn_kwargs.update(common_kwargs)

    learn = get_learn_function(alg)
    learn_fn = lambda e: learn(env=e, **learn_kwargs)
    env_fn = lambda: MnistEnv(seed=0, episode_len=100)

    simple_test(env_fn, learn_fn, 0.6)
Exemplo n.º 13
0
def test_serialization(learn_fn, network_fn):
    '''
    Test if the trained model can be serialized
    '''


    if network_fn.endswith('lstm') and learn_fn in ['acer', 'acktr', 'trpo_mpi', 'deepq']:
            # TODO make acktr work with recurrent policies
            # and test
            # github issue: https://github.com/openai/baselines/issues/660
            return

    def make_env():
        env = MnistEnv(episode_len=100)
        env.seed(10)
        return env

    env = DummyVecEnv([make_env])
    ob = env.reset().copy()
    learn = get_learn_function(learn_fn)

    kwargs = {}
    kwargs.update(network_kwargs[network_fn])
    kwargs.update(learn_kwargs[learn_fn])


    learn = partial(learn, env=env, network=network_fn, seed=0, **kwargs)

    with tempfile.TemporaryDirectory() as td:
        model_path = os.path.join(td, 'serialization_test_model')

        with tf.Graph().as_default(), make_session().as_default():
            model = learn(total_timesteps=100)
            model.save(model_path)
            mean1, std1 = _get_action_stats(model, ob)
            variables_dict1 = _serialize_variables()

        with tf.Graph().as_default(), make_session().as_default():
            model = learn(total_timesteps=0, load_path=model_path)
            mean2, std2 = _get_action_stats(model, ob)
            variables_dict2 = _serialize_variables()

        for k, v in variables_dict1.items():
            np.testing.assert_allclose(v, variables_dict2[k], atol=0.01,
                err_msg='saved and loaded variable {} value mismatch'.format(k))

        np.testing.assert_allclose(mean1, mean2, atol=0.5)
        np.testing.assert_allclose(std1, std2, atol=0.5)
def test_env_after_learn(algo):
    def make_env():
        # acktr requires too much RAM, fails on travis
        env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
        return env

    make_session(make_default=True, graph=tf.Graph())
    env = SubprocVecEnv([make_env])

    learn = get_learn_function(algo)

    # Commenting out the following line resolves the issue, though crash happens at env.reset().
    learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)

    env.reset()
    env.close()
Exemplo n.º 15
0
def test_env_after_learn(algo):
    def make_env():
        # acktr requires too much RAM, fails on travis
        env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
        return env

    make_session(make_default=True, graph=tf.Graph())
    env = SubprocVecEnv([make_env])

    learn = get_learn_function(algo)

    # Commenting out the following line resolves the issue, though crash happens at env.reset().
    learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)

    env.reset()
    env.close()
Exemplo n.º 16
0
def test_fixed_sequence(alg, rnn):
    '''
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    env_fn = lambda: FixedSequenceEnv(n_actions=10, episode_len=5)
    learn = lambda e: get_learn_function(alg)(
        env=e,
        network=rnn,
        **kwargs
    )

    simple_test(env_fn, learn, 0.7)
Exemplo n.º 17
0
def test_fetchreach(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn the FetchReach task
    '''

    kwargs = common_kwargs.copy()
    kwargs.update(learn_kwargs[alg])

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    def env_fn():

        env = gym.make('FetchReach-v1')
        env.seed(0)
        return env

    reward_per_episode_test(env_fn, learn_fn, -15)
Exemplo n.º 18
0
def test_cartpole(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn to balance the cartpole
    '''

    kwargs = common_kwargs.copy()
    kwargs.update(learn_kwargs[alg])

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    def env_fn():

        env = gym.make('CartPole-v0')
        env.seed(0)
        return env

    reward_per_episode_test(env_fn, learn_fn, 100)
Exemplo n.º 19
0
def test_fetchreach(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn the FetchReach task
    '''

    kwargs = common_kwargs.copy()
    kwargs.update(learn_kwargs[alg])

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    def env_fn():

        env = gym.make('FetchReach-v1')
        env.seed(0)
        return env

    reward_per_episode_test(env_fn, learn_fn, -15)
Exemplo n.º 20
0
def test_cartpole(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn to balance the cartpole
    '''

    kwargs = common_kwargs.copy()
    kwargs.update(learn_kwargs[alg])

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    def env_fn():

        env = gym.make('CartPole-v0')
        env.seed(0)
        return env

    reward_per_episode_test(env_fn, learn_fn, 100)
Exemplo n.º 21
0
def test_env_after_learn(algo):
    def make_env():
        env = gym.make('PongNoFrameskip-v4')
        return env

    make_session(make_default=True, graph=tf.Graph())
    env = SubprocVecEnv([make_env])

    learn = get_learn_function(algo)
    network = cnn(one_dim_bias=True)

    # Commenting out the following line resolves the issue, though crash happens at env.reset().
    learn(network=network,
          env=env,
          total_timesteps=0,
          load_path=None,
          seed=None)

    env.reset()
    env.close()
Exemplo n.º 22
0
Arquivo: run.py Projeto: zwc662/BGAIL
def train(args, extra_args):
    env_type, env_id = run.get_env_type(args.env)

    if args.alg == 'gail':
        env_type += '_gail'
        args.alg = 'bgail'
    elif args.alg not in ['bgail', 'gail']:
        raise NotImplementedError

    learn = run.get_learn_function(args.alg)
    alg_kwargs = run.get_learn_function_defaults(args.alg, env_type)
    alg_kwargs.update(extra_args)

    env = build_env(args)

    print('Training {} on {}:{} with arguments \n{}'.format(
        args.alg, env_type, env_id, alg_kwargs))

    model = learn(env=env,
                  seed=args.seed,
                  save_path=args.save_path,
                  load_path=args.load_path,
                  render=args.render,
                  **alg_kwargs)
Exemplo n.º 23
0
#!/usr/bin/python3

#https://stackoverflow.com/questions/45068568/is-it-possible-to-create-a-new-gym-environment-in-openai

import gym
import gym_banana
import baselines.run as r

time_cycles = 2464

env = gym.make('Banana-v0')
env.num_envs = 1
learn = r.get_learn_function("ppo2")
model = learn(
    network='mlp',
    env=env,
    total_timesteps=time_cycles
)

Exemplo n.º 24
0
    'nsteps': 2048,
    'noptepochs': 10,
    'save_interval': 20,
    'log_interval': 1,
    'save_path': save_path,
    'model_load_path': model_load_path,
    'seed': 0,
    'reward_scale': 1,
    'flatten_dict_observations': True,
    'transfer_weights': False
}
args = SimpleNamespace(**args_dict)

# Prepare the environment and learning algorithm
env_type, env_id = get_env_type(args.env)
learn = get_learn_function(args.alg)
alg_kwargs = get_learn_function_defaults(args.alg, env_type)
env = build_env(args)
alg_kwargs['network'] = args.network

# The path we will store the results of this experiment
full_path = args.save_path + '/' + args.env + '-' + args.alg

# Make folders that we will store the checkpoints, models and epoch results
if not os.path.exists(full_path):
    os.makedirs(full_path)
    os.makedirs(full_path + '/checkpoints')

print("About to start learning model")

model = learn(env=env,