def run_m2s_model(env_name, config, trial): env = create_env(env_name) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] experiment = ExperimentDDPG(env_name, env, config) agent = DDPGAerisM2SModelAgent(state_dim, action_dim, config) experiment.run_forward_model(agent, trial) env.close()
def run_metalearner_model(env_name, config, trial): env = create_env(env_name) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] experiment = ExperimentDDPG(env_name, env, config) agent = DDPGAerisGatedMetacriticModelAgent(state_dim, action_dim, config) experiment.run_metalearner_model(agent, trial) env.close()
def run_dop_model(env_name, config, i): env = create_env(env_name) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] experiment = ExperimentDDPG(env_name, env, config) agent = DDPGAerisDOPAgent(state_dim, action_dim, config, TYPE.continuous) experiment.run_dop_model(agent, i) env.close()
def run_baseline(config, i): env = gym.make('HopperBulletEnv-v0') state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] experiment = ExperimentDDPG('HopperBulletEnv-v0', env, config) agent = DDPGBulletAgent(state_dim, action_dim, config) experiment.run_baseline(agent, i) env.close()
def run_metalearner_model(config, i): env = gym.make('HopperBulletEnv-v0') state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] experiment = ExperimentDDPG('HopperBulletEnv-v0', env, config) agent = DDPGBulletGatedMetacriticModelAgent(state_dim, action_dim, config) experiment.run_metalearner_model(agent, i) env.close()
def run_forward_model(config, i): env = gym.make('AntBulletEnv-v0') state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] experiment = ExperimentDDPG('AntBulletEnv-v0', env, config) agent = DDPGBulletForwardModelAgent(state_dim, action_dim, config) experiment.run_forward_model(agent, i) env.close()
def run_baseline(config, i): env = gym.make('LunarLanderContinuous-v2') state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] experiment = ExperimentDDPG('LunarLanderContinuous-v2', env, config) agent = DDPGAgent(state_dim, action_dim, config) experiment.run_baseline(agent, i) env.close()