示例#1
0
def make_env(config, writer, prefix, datadir, store):
  suite, task = config.task.split('_', 1)
  if suite == 'kitchen':
    env = wrappers.Kitchen2D(task)
    env = wrappers.ActionRepeat(env, config.action_repeat)
    env = wrappers.NormalizeActions(env)
  elif suite == 'dmc':
    env = wrappers.DeepMindControl(task)
    env = wrappers.ActionRepeat(env, config.action_repeat)
    env = wrappers.NormalizeActions(env)
  elif suite == 'atari':
    env = wrappers.Atari(
        task, config.action_repeat, (64, 64), grayscale=False,
        life_done=True, sticky_actions=True)
    env = wrappers.OneHotAction(env)
  else:
    raise NotImplementedError(suite)
  env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
  callbacks = []
  if store:
    callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
  if prefix == 'test':
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
  env = wrappers.Collect(env, callbacks, config.precision)
  env = wrappers.RewardObs(env)
  return env
示例#2
0
文件: lompo.py 项目: rmrafailov/LOMPO
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'gym':
        env = wrappers.Gym(task, config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif task == 'door':
        env = wrappers.DoorOpen(config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif task == 'drawer':
        env = wrappers.DrawerOpen(config, size=(128, 128))
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    if prefix == 'test':
        callbacks.append(
            lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
示例#3
0
def make_env(config,
             writer,
             prefix,
             datadir,
             store,
             index=None,
             real_world=False):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        if config.dr is None or real_world:  #first index is always real world
            env = wrappers.DeepMindControl(task,
                                           use_state=config.use_state,
                                           real_world=real_world)
        else:
            env = wrappers.DeepMindControl(task,
                                           dr=config.dr,
                                           use_state=config.use_state,
                                           real_world=real_world)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    elif suite == 'gym':
        if index == 0 or index is None:  #first index is always real world
            env = wrappers.GymControl(task)
        else:
            env = wrappers.GymControl(task, dr=config.dr)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)

    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
示例#4
0
文件: dreamer.py 项目: axelbr/dreamer
def make_base_env(config, gui=False):
    env = wrappers.RaceCarBaseEnv(track=config.track,
                                  task=config.task,
                                  rendering=gui)
    env = wrappers.RaceCarWrapper(env, agent_id='A')
    env = wrappers.ActionRepeat(env, config.action_repeat)
    env = wrappers.ReduceActionSpace(env, low=[0.005, -1.0], high=[1.0, 1.0])
    env = wrappers.OccupancyMapObs(env)
    return env
示例#5
0
def make_single_track_env(track, action_repeat, rendering=True):
    scenario = MultiAgentScenario.from_spec(f'scenarios/eval/{track}.yml',
                                            rendering=rendering)
    env = MultiAgentRaceEnv(scenario=scenario)
    env = wrappers.RaceCarWrapper(env)
    env = wrappers.FixedResetMode(env, mode='grid')
    env = wrappers.ActionRepeat(env, action_repeat)
    env = wrappers.ReduceActionSpace(env, low=[0.005, -1.0], high=[1.0, 1.0])
    env = wrappers.OccupancyMapObs(env)
    return env
示例#6
0
def make_multi_track_env(tracks,
                         action_repeat,
                         rendering=True,
                         is_dreamer=True):
    # note: problem of multi-track racing env with wrapper `OccupancyMapObs` because it initializes the map once
    # ideas to solve this issue? when changing env force the update of occupancy map in wrapper?
    scenarios = [
        MultiAgentScenario.from_spec(f'scenarios/eval/{track}.yml',
                                     rendering=rendering) for track in tracks
    ]
    env = ChangingTrackMultiAgentRaceEnv(scenarios=scenarios, order='manual')
    env = wrappers.RaceCarWrapper(env)
    env = wrappers.FixedResetMode(env, mode='grid')
    env = wrappers.ActionRepeat(env, action_repeat)
    if is_dreamer:
        env = wrappers.ReduceActionSpace(env,
                                         low=[0.005, -1.0],
                                         high=[1.0, 1.0])
    return env
示例#7
0
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    elif suite == 'football':
        env = football_env.create_environment(
            representation='pixels',
            env_name='academy_empty_goal_close',
            stacked=False,
            logdir='./football/empty_goal_close2',
            write_goal_dumps=True,
            write_full_episode_dumps=True,
            render=True,
            write_video=True)
        env = wrappers.Football(env)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
示例#8
0
import gym
import wrappers
import numpy as np

task = 'SingleAgentTreitlstrasse_v2-v0'
time_limit = 60 * 100
action_repeat = 8
env = gym.make(task)
env = wrappers.TimeLimit(env, time_limit)
env = wrappers.ActionRepeat(env, action_repeat)


def test_on_track(model, outdir):
    video, returns = simulate_episode(model)
    videodir = outdir / 'videos'
    videodir.mkdir(parents=True, exist_ok=True)
    import imageio
    writer = imageio.get_writer(videodir / f'test_return{returns}.mp4')
    for image in video:
        writer.append_data(image)
    writer.close()


def simulate_episode(model, prediction_window=5, terminate_on_collision=True):
    # to do: make it uniform to f1_tenth directory
    done = False
    obs = env.reset(mode='grid')
    state = None
    video = []
    returns = 0.0
    while not done: