def __init__(self, level, config, num_action_repeats, seed, runfiles_path=None, level_cache=None): self._num_action_repeats = num_action_repeats self._random_state = np.random.RandomState(seed=seed) if runfiles_path: deepmind_lab.set_runfiles_path(runfiles_path) config = {k: str(v) for k, v in config.items()} self._observation_spec = ['RGB_INTERLEAVED', 'INSTR'] renderer = config['renderer'] self.benchmark_mode = config['benchmark_mode'] if self.benchmark_mode: print( 'BENCHMARK MODE IS ON! USE THIS ONLY FOR TESTING AND THROUGHPUT MEASUREMENT!' ) self._env = deepmind_lab.Lab( level=level, observations=self._observation_spec, config=config, level_cache=level_cache, renderer=renderer, )
def __init__( self, level, mode, render_size=(64, 64), action_repeat=4, action_set=ACTION_SET_DEFAULT, level_cache=None, seed=None, runfiles_path=None): assert mode in ('train', 'test') import deepmind_lab if runfiles_path: print('Setting DMLab runfiles path:', runfiles_path) deepmind_lab.set_runfiles_path(runfiles_path) self._config = {} self._config['width'] = render_size[0] self._config['height'] = render_size[1] self._config['logLevel'] = 'WARN' if mode == 'test': self._config['allowHoldOutLevels'] = 'true' self._config['mixerSeed'] = 0x600D5EED self._action_repeat = action_repeat self._random = np.random.RandomState(seed) self._env = deepmind_lab.Lab( level='contributed/dmlab30/'+level, observations=['RGB_INTERLEAVED'], config={k: str(v) for k, v in self._config.items()}, level_cache=level_cache) self._action_set = action_set self._last_image = None self._done = True
def __init__(self, game, num_action_repeats, seed, is_test, config, action_set=DEFAULT_ACTION_SET, level_cache_dir=None): if is_test: config['allowHoldOutLevels'] = 'true' # Mixer seed for evalution, see # https://github.com/deepmind/lab/blob/master/docs/users/python_api.md config['mixerSeed'] = 0x600D5EED if game in games.ALL_GAMES: game = 'contributed/dmlab30/' + game config['datasetPath'] = FLAGS.dataset_path self._num_action_repeats = num_action_repeats self._random_state = np.random.RandomState(seed=seed) if FLAGS.homepath: deepmind_lab.set_runfiles_path(FLAGS.homepath) self._env = deepmind_lab.Lab( level=game, observations=['RGB_INTERLEAVED'], level_cache=LevelCache(level_cache_dir) if level_cache_dir else None, config={k: str(v) for k, v in config.items()}, ) self._action_set = action_set self.action_space = gym.spaces.Discrete(len(self._action_set)) self.observation_space = gym.spaces.Box( low=0, high=255, shape=(config['height'], config['width'], 3), dtype=np.uint8)
def __init__(self, level, config, num_action_repeats, seed, runfiles_path=None, level_cache=None): # Define initial attributes self._num_action_repeats = num_action_repeats self._random_state = np.random.RandomState(seed=seed) self._width = config.get('width', None) self._height = config.get('height', None) # Determine observation space if FLAGS.depth: self._observation_spec = ['RGBD_INTERLEAVED'] else: self._observation_spec = ['RGB_INTERLEAVED'] if False: self._observation_spec.extend(['DEBUG.POS.TRANS', 'DEBUG.POS.ROT']) if False: self._observation_spec.extend(['DEBUG.POS.TRANS', 'DEBUG.POS.ROT']) # Configure deepmind environment if runfiles_path: deepmind_lab.set_runfiles_path(runfiles_path) config = {k: str(v) for k, v in config.iteritems()} self._env = deepmind_lab.Lab( level=level, observations=self._observation_spec, config=config, level_cache=level_cache, )
def __init__(self, screen_width=300, screen_height=300, runfiles_path='', state_processor=None, level_script='tests/empty_room_test', frame_skip=1, seed=None, level_directory=''): self.width = screen_width self.height = screen_height self.runfiles_path = runfiles_path self.level_script = level_script self.frame_skip = frame_skip self.seed = seed self.level_directory = level_directory self.processor = state_processor self.current_state = None self.mode = 'RGB_INTERLEAVED' config = { 'width': str(screen_width), 'height': str(screen_height), 'levelDirectory': str(level_directory) } if self.runfiles_path: deepmind_lab.set_runfiles_path(self.runfiles_path) self.env = deepmind_lab.Lab(level_script, [self.mode], config=config) self.number_of_actions = len(self.env.action_spec())
def is_terminal(state): # Currently, is terminal if and only if you recieve a reward def actor_learner_thread(child_conn, level_script, config): env = deepmind_lab.Lab(level_script, ['RGB_INTERLEAVED'], config=config) t = 1 while T < T_max: d_theta, d_theta_v = 0, 0 theta, theta_v = child_conn.recv() # Get most current version t_start = t state = env.observations()['RGB_INTERLEAVED'] while not is_terminal(state) or t-t_start == t_max: reward, next_state = env_step() t += 1 T += 1 R = 0 if is_terminal(state) else valNetwork(state) #Bootstrap? for i in range(t-1, t_start, -1): R = reward[i] + gamma*R #accumulate these # Accumulate gradients d_theta += d_theta_v += child_conn.send(d_theta, d_theta_v) def run(width, height, level_script, frame_count): """Spins up an environment and runs the random agent.""" config = {'width': str(width), 'height': str(height)} if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--frame_count', type=int, default=10000, help='Number of steps to run the agent') parser.add_argument('--width', type=int, default=80, help='Horizontal size of the observations') parser.add_argument('--height', type=int, default=80, help='Vertical size of the observations') parser.add_argument('--runfiles_path', type=str, default=None, help='Set the runfiles path to find DeepMind Lab data') parser.add_argument('--level_script', type=str, default='tests/empty_room_test', help='The environment level script to load') args = parser.parse_args() if args.runfiles_path: deepmind_lab.set_runfiles_path(args.runfiles_path) run(args.width, args.height, args.level_script, args.frame_count)
def __init__(self, level, config, num_action_repeats, seed, runfiles_path=None, level_cache=None): self._num_action_repeats = num_action_repeats self._random_state = np.random.RandomState(seed=seed) if runfiles_path: deepmind_lab.set_runfiles_path(runfiles_path) config = {k: str(v) for k, v in config.iteritems()} self._observation_spec = ['RGB_INTERLEAVED', 'INSTR'] self._env = deepmind_lab.Lab( level=level, observations=self._observation_spec, config=config, level_cache=level_cache, )
def __init__(self, level, config, seed, runfiles_path=None, level_cache=None): self._random_state = np.random.RandomState(seed=seed) if runfiles_path: deepmind_lab.set_runfiles_path(runfiles_path) config = {k: str(v) for k, v in config.items()} self._observation_spec = ['RGBD'] self._env = deepmind_lab.Lab( level=level, observations=self._observation_spec, config=config, level_cache=level_cache, )
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--runfiles_path', type=str, default=None, help='Set the runfiles path to find DeepMind Lab data') args = parser.parse_args() if args.runfiles_path: deepmind_lab.set_runfiles_path(args.runfiles_path) # Test for 1 minute of simulation time at fixed in-game frame rate. length = 3600 fps = 60 # Benchmark at each of the following levels at the specified resolutions and # observation specs. for level_script in ['nav_maze_static_01', 'lt_space_bounce_hard']: for width, height in [(84, 84), (160, 120), (320, 240)]: for observation_spec in ['RGB', 'RGBD']: run(length, width, height, fps, level_script, observation_spec)
layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION]) num_layouts = len(layouts) self.assertTrue(np.isclose(num_layouts, MAZE_LAYOUT_TRIALS)) for i in six.moves.range(MAZE_LAYOUT_TRIALS): print('phase 2: trial {} out of {}'.format(i+1, MAZE_LAYOUT_TRIALS)) env = deepmind_lab.Lab( 'tests/maze_generation_test', [MAZE_LAYOUT_OBSERVATION], config={ 'mixerSeed': '0', }) env.reset(seed=i+1) layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION]) self.assertEqual(len(layouts), num_layouts) for i in six.moves.range(MAZE_LAYOUT_TRIALS): print('phase 3: trial {} out of {}'.format(i+1, MAZE_LAYOUT_TRIALS)) env = deepmind_lab.Lab( 'tests/maze_generation_test', [MAZE_LAYOUT_OBSERVATION], config={ 'mixerSeed': '1', }) env.reset(seed=i+1) layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION]) self.assertTrue(np.isclose(len(layouts) - num_layouts, MAZE_LAYOUT_TRIALS)) if __name__ == '__main__': if 'TEST_SRCDIR' in os.environ: deepmind_lab.set_runfiles_path( os.path.join(os.environ['TEST_SRCDIR'], 'org_deepmind_lab')) unittest.main()
def __init__(self, platform, args, action_set=DEFAULT_ACTION_SET, main_observation='RGB_INTERLEAVED', action_repeat=4, noise_type='', tv_num_images=30): """Creates a DMLabWrapper. Args: platform: Typically 'dmlab'. args: The environment settings. action_set: The set of discrete actions. main_observation: The observation returned at every time step. action_repeat: Maximum number of times to repeat an action. This can be less at the end of an episode. noise_type: if not empty defines what type of noise to add to the observation. Possible values: image_action, image, noise_action, noise. tv_num_images: number of distinct images to be used for TV purposes. """ homepath = args.pop('homepath') level_name = args.pop('levelName') observation_format = args.pop('observationFormat') renderer = args.pop('renderer') seed = args.pop('seed') string_args = {key: str(value) for key, value in args.items()} if homepath: deepmind_lab.set_runfiles_path(os.path.join(homepath, )) self._env = deepmind_lab.Lab(level_name, observation_format, string_args, renderer) self._random_state = np.random.RandomState(seed=seed) self._env.reset(seed=self._random_state.randint(0, 2**31 - 1)) self._action_set = action_set self._action_repeat = action_repeat self.width = args['width'] self.height = args['height'] self._main_observation = main_observation self._transform_observation = lambda x: x if main_observation == 'DEBUG.CAMERA.PLAYER_VIEW_NO_RETICLE': # This observation format is (RGB, height, width). # Convert it to (height, width, RGB). self._transform_observation = lambda x: np.moveaxis(x, 0, -1) # Build a list of all the possible actions. self._action_list = [] for action in action_set: self._action_list.append(np.array(action, dtype=np.intc)) self._noise_type = noise_type self._images_for_noise = [] if self._noise_type: if 'action' in self._noise_type: assert action_set in [ DEFAULT_ACTION_SET_WITH_IDLE, DEFAULT_ACTION_SET ] for image in range(1, tv_num_images + 1): image_path = '/cns/vz-d/home/raveman/images/%d.jpeg' % image tmp_path = os.path.join(tempfile.gettempdir(), os.path.basename(image_path)) image = cv2.imread(tmp_path, flags=cv2.IMREAD_COLOR) image = cv2.resize(image, (int(self.width / 2), int(self.height / 2)), interpolation=cv2.INTER_AREA) # imread returns BGR not RGB image = image[Ellipsis, ::-1] self._images_for_noise.append(image)
def __init__(self, platform, args, action_set=DEFAULT_ACTION_SET, main_observation='RGB_INTERLEAVED', action_repeat=4, noise_type='', tv_num_images=30, level_cache_dir=None, level_cache_tar=None, level_cache_mode=False, optimize_env_reset=False, debug_name=''): """Creates a DMLabWrapper. Args: platform: Typically 'dmlab'. args: The environment settings. action_set: The set of discrete actions. main_observation: The observation returned at every time step. action_repeat: Maximum number of times to repeat an action. This can be less at the end of an episode. noise_type: if not empty defines what type of noise to add to the observation. Possible values: image_action, image, noise_action, noise. tv_num_images: number of distinct images to be used for TV purposes. """ self._optimize_env_reset = optimize_env_reset homepath = args.pop('homepath') level_name = args.pop('levelName') observation_format = args.pop('observationFormat') renderer = args.pop('renderer') seed = args.pop('seed') string_args = {key: str(value) for key, value in args.items()} if homepath: deepmind_lab.set_runfiles_path(os.path.join(homepath, )) deepmind_lab_kwargs = {} if level_cache_tar is not None: deepmind_lab_kwargs['level_cache'] = LevelCacheTar(level_cache_tar) elif level_cache_dir is not None: deepmind_lab_kwargs['level_cache'] = LevelCache(level_cache_dir) self._env = deepmind_lab.Lab(level_name, observation_format, string_args, renderer, **deepmind_lab_kwargs) self._random_state = np.random.RandomState(seed=seed) self._env.reset(seed=self._random_state.randint(0, 2**31 - 1)) if level_cache_mode: #while True: if debug_name in ['train']: # 20M * 1.1 / 12 / 1800 * (1 or 2) num_resets = (1025 if optimize_env_reset else 2050) elif debug_name in ['valid', 'test']: # 20M * 1.1 / 12 / 256 / 25 * (2 or 3) num_resets = (580 if optimize_env_reset else 865) else: assert False print('Level caching starts ({}). Process {}'.format( num_resets, os.getpid())) for _ in range(num_resets): self._env.reset(seed=self._random_state.randint(0, 2**31 - 1)) print('Level caching done ({}). Exiting process {}'.format( num_resets, os.getpid())) import sys sys.exit(0) # Enable level caching. self._noise_random_state = np.random.RandomState(seed=seed + 345) self._action_set = action_set self._action_repeat = action_repeat self.width = args['width'] self.height = args['height'] self._main_observation = main_observation self._transform_observation = lambda x: x if main_observation == 'DEBUG.CAMERA.PLAYER_VIEW_NO_RETICLE': # This observation format is (RGB, height, width). # Convert it to (height, width, RGB). self._transform_observation = lambda x: np.moveaxis(x, 0, -1) # Build a list of all the possible actions. self._action_list = [] for action in action_set: self._action_list.append(np.array(action, dtype=np.intc)) self._noise_type = noise_type self._images_for_noise = [] if self._noise_type: if 'action' in self._noise_type: assert action_set in [ DEFAULT_ACTION_SET_WITH_IDLE, DEFAULT_ACTION_SET ] if 'image' in self._noise_type: for image in range(1, tv_num_images + 1): #image_path = '/cns/vz-d/home/raveman/images/%d.jpeg' % image image_path = 'tv_images/%d.jpeg' % image #tmp_path = os.path.join(tempfile.gettempdir(), # os.path.basename(image_path)) #image = cv2.imread(tmp_path, flags=cv2.IMREAD_COLOR) image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR) image = cv2.resize( image, (int(self.width / 2), int(self.height / 2)), interpolation=cv2.INTER_AREA) # imread returns BGR not RGB image = image[Ellipsis, ::-1] self._images_for_noise.append(image) self._agent_position_history = None self._reward_history = None self._last_maze_layout = None
import os import inspect import deepmind_lab as dl import deepmind_lab_gym as dlg import multiprocdmlab as mpdmlab import numpy as np DEEPMIND_RUNFILES_PATH = os.path.dirname(inspect.getfile(dl)) DEEPMIND_SOURCE_PATH = os.path.abspath(DEEPMIND_RUNFILES_PATH + '/..' * 5) dl.set_runfiles_path(DEEPMIND_RUNFILES_PATH) def get_entity_layer_path(entity_layer_name): global DEEPMIND_RUNFILES_PATH, DEEPMIND_SOURCE_PATH mode, size, num = entity_layer_name.split('-') path_format = '{}/assets/entityLayers/{}/{}/entityLayers/{}.entityLayer' path = path_format.format(DEEPMIND_SOURCE_PATH, size, mode, num) return path def get_game_environment(mapname='training-09x09-0127', mode='training', multiproc=False, random_spawn=True, random_goal=True, apple_prob=0.9, episode_length=5): mapstrings = ','.join( open(get_entity_layer_path(m)).read() for m in mapname.split(','))
def set_runfiles_path(path): """Module-level function to set the path of the DeepMind Lab DSOs.""" deepmind_lab.set_runfiles_path(path)
'alpha': prior_alpha, 'beta': prior_beta, 'theta': prior_theta, 'Sigma': prior_Sigma, 'gibbs_max_iter': gibbs_max_iter, 'gibbs_loglik_eps': gibbs_loglik_eps } bandit = MCMCBanditSampling(A, reward_function, reward_prior, thompsonSampling) return bandit if __name__ == "__main__": path = os.path.dirname(inspect.getfile(deepmind_lab)) deepmind_lab.set_runfiles_path(path) coords, A = DRRN.action_segments() # Rotation Axes, Number of Arms rewards = 0 R = 100 # Number of realizations to run t_max = 300 # Time-instants to run the bandit width = 8 height = 8 d_context = width * height * 3 # Context dimension K = 5 # Number of mixtures per arm of the bandit prior_K = 5 # Assumed prior number of mixtures (per arm) pi = np.random.rand(A, K) pi = pi / pi.sum(axis=1, keepdims=True) # Mixture proportions per arm theta = np.random.randn(A, K, d_context) # Thetas per arm and mixtures sigma = np.ones((A, K)) # Variances per arm and mixtures
obs = env.observations() action = agent.step(reward, obs[observation_spec]) reward = env.step(action, num_steps=1) t1 = time.time() duration = t1 - t0 print('resolution: %i x %i, spec: %s, steps: %i, duration: %.1f, fps: %.1f' % (width, height, observation_spec, length, duration, length / duration)) if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--runfiles_path', type=str, default=None, help='Set the runfiles path to find DeepMind Lab data') args = parser.parse_args() if args.runfiles_path: deepmind_lab.set_runfiles_path(args.runfiles_path) # Test for 1 minute of simulation time at fixed in-game frame rate. length = 3600 fps = 60 # Benchmark at each of the following levels at the specified resolutions and # observation specs. for level_script in ['nav_maze_static_01', 'lt_space_bounce_hard']: for width, height in [(84, 84), (160, 120), (320, 240)]: for observation_spec in ['RGB', 'RGBD']: run(length, width, height, fps, level_script, observation_spec)
parser.add_argument('--level_script', type=str, default='tests/empty_room_test', help='The environment level script to load') parser.add_argument('--base_path', type=str, default='/om/user/prinster/lab/my_data/', help='base_path') parser.add_argument('--num_envs', type=str, default=16, help='num environments to run in parallel') parser.add_argument('--learning_rate', type=str, default=1e-3, help='learning_rate') parser.add_argument('--exp_name', type=str, default='test', help='exp_name') parser.add_argument( '--slurm_array_index', type=int, default=0, help='id provided by slurm for which experiment to run') args = parser.parse_args() if args.runfiles_path: deepmind_lab.set_runfiles_path(args.runfiles_path) run(args.slurm_array_index, args.base_path)
def test_discretized_random_agent_run(self, length=100): env = deepmind_lab.Lab( 'tests/demo_map', ['RGB_INTERLACED'], config={ 'fps': '60', 'width': '80', 'height': '80' }) env.reset() agent = random_agent.DiscretizedRandomAgent() reward = 0 for _ in xrange(length): if not env.is_running(): print('Environment stopped early') env.reset() obs = env.observations() action = agent.step(reward, obs['RGB_INTERLACED']) reward = env.step(action, 1) self.assertIsInstance(reward, float) if __name__ == '__main__': if os.environ.get('TEST_SRCDIR'): deepmind_lab.set_runfiles_path( os.path.join(os.environ['TEST_SRCDIR'], 'org_deepmind_lab')) unittest.main()