Exemplo n.º 1
0
def main(env):

    n_actions = env.action_space.shape[0]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))

    # Using only one expert trajectory
    # you can specify `traj_limitation=-1` for using the whole dataset
    file_dir = "/home/vignesh/Thesis_Suture_data/trial2/ambf_data/"
    dataset = ExpertDataset(expert_path=file_dir + 'expert_psm_data.npz',
                            traj_limitation=1,
                            batch_size=32)

    model = DDPG(MlpPolicy,
                 env,
                 gamma=0.95,
                 verbose=1,
                 nb_train_steps=300,
                 nb_rollout_steps=150,
                 param_noise=param_noise,
                 batch_size=128,
                 action_noise=action_noise,
                 random_exploration=0.05,
                 normalize_observations=True,
                 tensorboard_log="./ddpg_dvrk_tensorboard/",
                 observation_range=(-1.5, 1.5))

    model.pretrain(dataset, n_epochs=1000)
    model.save("./gail_robot_env")
Exemplo n.º 2
0
def train_agent_with_ddpg(load):
    from stable_baselines.ddpg.policies import FeedForwardPolicy
    from stable_baselines.common.vec_env import DummyVecEnv
    from stable_baselines.ddpg.noise import OrnsteinUhlenbeckActionNoise
    from stable_baselines import DDPG

    # Create and wrap the environment
    env = gym.make('F16GCAS-v0')
    env = DummyVecEnv([lambda: env])

    # the noise objects for DDPG
    n_actions = env.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.01) * np.ones(n_actions))

    # Custom MLP policy of two layers of size 16 each
    class CustomPolicy(FeedForwardPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomPolicy, self).__init__(*args, **kwargs,
                                               layers=[128, 128],
                                               layer_norm=False,
                                               feature_extraction="mlp")

    model = DDPG(CustomPolicy, env, verbose=1, action_noise=action_noise)

    if not load:
        ExpData = ExpertDataset("./lqr_export.npz")
        model.pretrain(ExpData, n_epochs=100)
        model.save(ROOT+"/trained_models/TDRL/f16/ddpg/128_128")
    else:
        model = DDPG.load(ROOT+"/trained_models/TDRL/f16/ddpg/128_128", policy=CustomPolicy, env=env)

    return model