Пример #1
0
def make_env(config, writer, prefix, datadir, store):
  suite, task = config.task.split('_', 1)
  if suite == 'kitchen':
    env = wrappers.Kitchen2D(task)
    env = wrappers.ActionRepeat(env, config.action_repeat)
    env = wrappers.NormalizeActions(env)
  elif suite == 'dmc':
    env = wrappers.DeepMindControl(task)
    env = wrappers.ActionRepeat(env, config.action_repeat)
    env = wrappers.NormalizeActions(env)
  elif suite == 'atari':
    env = wrappers.Atari(
        task, config.action_repeat, (64, 64), grayscale=False,
        life_done=True, sticky_actions=True)
    env = wrappers.OneHotAction(env)
  else:
    raise NotImplementedError(suite)
  env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
  callbacks = []
  if store:
    callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
  if prefix == 'test':
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
  env = wrappers.Collect(env, callbacks, config.precision)
  env = wrappers.RewardObs(env)
  return env
Пример #2
0
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'gym':
        env = wrappers.Gym(task, config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif task == 'door':
        env = wrappers.DoorOpen(config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif task == 'drawer':
        env = wrappers.DrawerOpen(config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    if prefix == 'test':
        callbacks.append(
            lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
Пример #3
0
def make_env(config, logger, mode, train_eps, eval_eps):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task, config.action_repeat, config.size)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat,
                             config.size,
                             grayscale=config.grayscale,
                             life_done=False and (mode == 'train'),
                             sticky_actions=True,
                             all_actions=True)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit)
    env = wrappers.SelectAction(env, key='action')
    callbacks = [
        functools.partial(process_episode, config, logger, mode, train_eps,
                          eval_eps)
    ]
    env = wrappers.CollectDataset(env, callbacks)
    env = wrappers.RewardObs(env)
    return env
Пример #4
0
def make_env(config,
             writer,
             prefix,
             datadir,
             store,
             index=None,
             real_world=False):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        if config.dr is None or real_world:  #first index is always real world
            env = wrappers.DeepMindControl(task,
                                           use_state=config.use_state,
                                           real_world=real_world)
        else:
            env = wrappers.DeepMindControl(task,
                                           dr=config.dr,
                                           use_state=config.use_state,
                                           real_world=real_world)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    elif suite == 'gym':
        if index == 0 or index is None:  #first index is always real world
            env = wrappers.GymControl(task)
        else:
            env = wrappers.GymControl(task, dr=config.dr)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)

    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
Пример #5
0
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    elif suite == 'football':
        env = football_env.create_environment(
            representation='pixels',
            env_name='academy_empty_goal_close',
            stacked=False,
            logdir='./football/empty_goal_close2',
            write_goal_dumps=True,
            write_full_episode_dumps=True,
            render=True,
            write_video=True)
        env = wrappers.Football(env)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
from environments.pendulum import UnderactuatedPendulum

# INITIALIZE COLORS FOR PROMPT ON WIN10 MACHINES
colorama.init()

# DATASET PARAMETERS
max_num_episodes = 120
len_time = 100
render_shape = (500, 500)
image_res = (28, 28)

# INITIALIZE PENDULUM ENVIRONMENT
env = gym.make('pendulum-underactuated-v0',
               render_shape=render_shape,
               model=None)
env = wrappers.NormalizeActions(env)
env = wrappers.MinimumDuration(env, len_time)
env = wrappers.MaximumDuration(env, len_time)
env = wrappers.ObservationDict(env, key='observation')
env = wrappers.PixelObservations(env, image_res, np.uint8, 'image')
env = wrappers.ConvertRewardToCost(env)
env = wrappers.ConvertTo32Bit(env)

# SEED EXPERIMENT TO CREATE REPRODUCIBLE RESULTS
seed_value = 0
seed_experiment(seed=seed_value)
env.seed(seed=seed_value)

# GET ENVIRONMENT DATA SHAPES
observation_shape = env.observation_space['image'].shape
action_shape = env.action_space.shape