def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'kitchen': env = wrappers.Kitchen2D(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari( task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) if prefix == 'test': callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'gym': env = wrappers.Gym(task, config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif task == 'door': env = wrappers.DoorOpen(config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif task == 'drawer': env = wrappers.DrawerOpen(config, size=(128, 128)) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) if prefix == 'test': callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_env(config, writer, prefix, datadir, store, index=None, real_world=False): suite, task = config.task.split('_', 1) if suite == 'dmc': if config.dr is None or real_world: #first index is always real world env = wrappers.DeepMindControl(task, use_state=config.use_state, real_world=real_world) else: env = wrappers.DeepMindControl(task, dr=config.dr, use_state=config.use_state, real_world=real_world) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari(task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) elif suite == 'gym': if index == 0 or index is None: #first index is always real world env = wrappers.GymControl(task) else: env = wrappers.GymControl(task, dr=config.dr) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
def make_base_env(config, gui=False): env = wrappers.RaceCarBaseEnv(track=config.track, task=config.task, rendering=gui) env = wrappers.RaceCarWrapper(env, agent_id='A') env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.ReduceActionSpace(env, low=[0.005, -1.0], high=[1.0, 1.0]) env = wrappers.OccupancyMapObs(env) return env
def make_single_track_env(track, action_repeat, rendering=True): scenario = MultiAgentScenario.from_spec(f'scenarios/eval/{track}.yml', rendering=rendering) env = MultiAgentRaceEnv(scenario=scenario) env = wrappers.RaceCarWrapper(env) env = wrappers.FixedResetMode(env, mode='grid') env = wrappers.ActionRepeat(env, action_repeat) env = wrappers.ReduceActionSpace(env, low=[0.005, -1.0], high=[1.0, 1.0]) env = wrappers.OccupancyMapObs(env) return env
def make_multi_track_env(tracks, action_repeat, rendering=True, is_dreamer=True): # note: problem of multi-track racing env with wrapper `OccupancyMapObs` because it initializes the map once # ideas to solve this issue? when changing env force the update of occupancy map in wrapper? scenarios = [ MultiAgentScenario.from_spec(f'scenarios/eval/{track}.yml', rendering=rendering) for track in tracks ] env = ChangingTrackMultiAgentRaceEnv(scenarios=scenarios, order='manual') env = wrappers.RaceCarWrapper(env) env = wrappers.FixedResetMode(env, mode='grid') env = wrappers.ActionRepeat(env, action_repeat) if is_dreamer: env = wrappers.ReduceActionSpace(env, low=[0.005, -1.0], high=[1.0, 1.0]) return env
def make_env(config, writer, prefix, datadir, store): suite, task = config.task.split('_', 1) if suite == 'dmc': env = wrappers.DeepMindControl(task) env = wrappers.ActionRepeat(env, config.action_repeat) env = wrappers.NormalizeActions(env) elif suite == 'atari': env = wrappers.Atari(task, config.action_repeat, (64, 64), grayscale=False, life_done=True, sticky_actions=True) env = wrappers.OneHotAction(env) elif suite == 'football': env = football_env.create_environment( representation='pixels', env_name='academy_empty_goal_close', stacked=False, logdir='./football/empty_goal_close2', write_goal_dumps=True, write_full_episode_dumps=True, render=True, write_video=True) env = wrappers.Football(env) env = wrappers.OneHotAction(env) else: raise NotImplementedError(suite) env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat) callbacks = [] if store: callbacks.append(lambda ep: tools.save_episodes(datadir, [ep])) callbacks.append( lambda ep: summarize_episode(ep, config, datadir, writer, prefix)) env = wrappers.Collect(env, callbacks, config.precision) env = wrappers.RewardObs(env) return env
import gym import wrappers import numpy as np task = 'SingleAgentTreitlstrasse_v2-v0' time_limit = 60 * 100 action_repeat = 8 env = gym.make(task) env = wrappers.TimeLimit(env, time_limit) env = wrappers.ActionRepeat(env, action_repeat) def test_on_track(model, outdir): video, returns = simulate_episode(model) videodir = outdir / 'videos' videodir.mkdir(parents=True, exist_ok=True) import imageio writer = imageio.get_writer(videodir / f'test_return{returns}.mp4') for image in video: writer.append_data(image) writer.close() def simulate_episode(model, prediction_window=5, terminate_on_collision=True): # to do: make it uniform to f1_tenth directory done = False obs = env.reset(mode='grid') state = None video = [] returns = 0.0 while not done: