示例#1
0
    def __init__(self,
                 level,
                 config,
                 num_action_repeats,
                 seed,
                 runfiles_path=None,
                 level_cache=None):
        self._num_action_repeats = num_action_repeats
        self._random_state = np.random.RandomState(seed=seed)
        if runfiles_path:
            deepmind_lab.set_runfiles_path(runfiles_path)
        config = {k: str(v) for k, v in config.items()}
        self._observation_spec = ['RGB_INTERLEAVED', 'INSTR']

        renderer = config['renderer']
        self.benchmark_mode = config['benchmark_mode']

        if self.benchmark_mode:
            print(
                'BENCHMARK MODE IS ON! USE THIS ONLY FOR TESTING AND THROUGHPUT MEASUREMENT!'
            )

        self._env = deepmind_lab.Lab(
            level=level,
            observations=self._observation_spec,
            config=config,
            level_cache=level_cache,
            renderer=renderer,
        )
示例#2
0
 def __init__(
     self, level, mode, render_size=(64, 64), action_repeat=4,
     action_set=ACTION_SET_DEFAULT, level_cache=None, seed=None,
     runfiles_path=None):
   assert mode in ('train', 'test')
   import deepmind_lab
   if runfiles_path:
     print('Setting DMLab runfiles path:', runfiles_path)
     deepmind_lab.set_runfiles_path(runfiles_path)
   self._config = {}
   self._config['width'] = render_size[0]
   self._config['height'] = render_size[1]
   self._config['logLevel'] = 'WARN'
   if mode == 'test':
     self._config['allowHoldOutLevels'] = 'true'
     self._config['mixerSeed'] = 0x600D5EED
   self._action_repeat = action_repeat
   self._random = np.random.RandomState(seed)
   self._env = deepmind_lab.Lab(
       level='contributed/dmlab30/'+level,
       observations=['RGB_INTERLEAVED'],
       config={k: str(v) for k, v in self._config.items()},
       level_cache=level_cache)
   self._action_set = action_set
   self._last_image = None
   self._done = True
示例#3
0
  def __init__(self, game, num_action_repeats, seed, is_test, config,
               action_set=DEFAULT_ACTION_SET, level_cache_dir=None):
    if is_test:
      config['allowHoldOutLevels'] = 'true'
      # Mixer seed for evalution, see
      # https://github.com/deepmind/lab/blob/master/docs/users/python_api.md
      config['mixerSeed'] = 0x600D5EED

    if game in games.ALL_GAMES:
      game = 'contributed/dmlab30/' + game

    config['datasetPath'] = FLAGS.dataset_path

    self._num_action_repeats = num_action_repeats
    self._random_state = np.random.RandomState(seed=seed)
    if FLAGS.homepath:
      deepmind_lab.set_runfiles_path(FLAGS.homepath)
    self._env = deepmind_lab.Lab(
        level=game,
        observations=['RGB_INTERLEAVED'],
        level_cache=LevelCache(level_cache_dir) if level_cache_dir else None,
        config={k: str(v) for k, v in config.items()},
    )
    self._action_set = action_set
    self.action_space = gym.spaces.Discrete(len(self._action_set))
    self.observation_space = gym.spaces.Box(
        low=0,
        high=255,
        shape=(config['height'], config['width'], 3),
        dtype=np.uint8)
    def __init__(self,
                 level,
                 config,
                 num_action_repeats,
                 seed,
                 runfiles_path=None,
                 level_cache=None):

        # Define initial attributes
        self._num_action_repeats = num_action_repeats
        self._random_state = np.random.RandomState(seed=seed)
        self._width = config.get('width', None)
        self._height = config.get('height', None)

        # Determine observation space
        if FLAGS.depth:
            self._observation_spec = ['RGBD_INTERLEAVED']
        else:
            self._observation_spec = ['RGB_INTERLEAVED']
        if False:
            self._observation_spec.extend(['DEBUG.POS.TRANS', 'DEBUG.POS.ROT'])
        if False:
            self._observation_spec.extend(['DEBUG.POS.TRANS', 'DEBUG.POS.ROT'])

        # Configure deepmind environment
        if runfiles_path:
            deepmind_lab.set_runfiles_path(runfiles_path)
        config = {k: str(v) for k, v in config.iteritems()}
        self._env = deepmind_lab.Lab(
            level=level,
            observations=self._observation_spec,
            config=config,
            level_cache=level_cache,
        )
示例#5
0
    def __init__(self,
                 screen_width=300,
                 screen_height=300,
                 runfiles_path='',
                 state_processor=None,
                 level_script='tests/empty_room_test',
                 frame_skip=1,
                 seed=None,
                 level_directory=''):
        self.width = screen_width
        self.height = screen_height
        self.runfiles_path = runfiles_path
        self.level_script = level_script
        self.frame_skip = frame_skip
        self.seed = seed
        self.level_directory = level_directory
        self.processor = state_processor
        self.current_state = None
        self.mode = 'RGB_INTERLEAVED'

        config = {
            'width': str(screen_width),
            'height': str(screen_height),
            'levelDirectory': str(level_directory)
        }

        if self.runfiles_path:
            deepmind_lab.set_runfiles_path(self.runfiles_path)
        self.env = deepmind_lab.Lab(level_script, [self.mode], config=config)
        self.number_of_actions = len(self.env.action_spec())
示例#6
0
def is_terminal(state):
    # Currently, is terminal if and only if you recieve a reward


def actor_learner_thread(child_conn, level_script, config):
    env = deepmind_lab.Lab(level_script, ['RGB_INTERLEAVED'], config=config)
    t = 1
    while T < T_max:
        d_theta, d_theta_v  = 0, 0
        theta, theta_v = child_conn.recv() # Get most current version
        t_start = t
        state = env.observations()['RGB_INTERLEAVED']
        while not is_terminal(state) or t-t_start == t_max:
            reward, next_state = env_step()
            t += 1
            T += 1
        R = 0 if is_terminal(state) else valNetwork(state) #Bootstrap?
        for i in range(t-1, t_start, -1):
            R = reward[i] + gamma*R #accumulate these
            # Accumulate gradients
            d_theta += 
            d_theta_v += 
        child_conn.send(d_theta, d_theta_v)



def run(width, height, level_script, frame_count):
  """Spins up an environment and runs the random agent."""
  config = {'width': str(width), 'height': str(height)}




if __name__ == '__main__':
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument('--frame_count', type=int, default=10000,
                      help='Number of steps to run the agent')
  parser.add_argument('--width', type=int, default=80,
                      help='Horizontal size of the observations')
  parser.add_argument('--height', type=int, default=80,
                      help='Vertical size of the observations')
  parser.add_argument('--runfiles_path', type=str, default=None,
                      help='Set the runfiles path to find DeepMind Lab data')
  parser.add_argument('--level_script', type=str, default='tests/empty_room_test',
                      help='The environment level script to load')

  args = parser.parse_args()
  if args.runfiles_path:
    deepmind_lab.set_runfiles_path(args.runfiles_path)
  run(args.width, args.height, args.level_script, args.frame_count)
示例#7
0
 def __init__(self, level, config, num_action_repeats, seed,
              runfiles_path=None, level_cache=None):
   self._num_action_repeats = num_action_repeats
   self._random_state = np.random.RandomState(seed=seed)
   if runfiles_path:
     deepmind_lab.set_runfiles_path(runfiles_path)
   config = {k: str(v) for k, v in config.iteritems()}
   self._observation_spec = ['RGB_INTERLEAVED', 'INSTR']
   self._env = deepmind_lab.Lab(
       level=level,
       observations=self._observation_spec,
       config=config,
       level_cache=level_cache,
   )
示例#8
0
    def __init__(self,
                 level,
                 config,
                 seed,
                 runfiles_path=None,
                 level_cache=None):

        self._random_state = np.random.RandomState(seed=seed)
        if runfiles_path:
            deepmind_lab.set_runfiles_path(runfiles_path)
        config = {k: str(v) for k, v in config.items()}
        self._observation_spec = ['RGBD']
        self._env = deepmind_lab.Lab(
            level=level,
            observations=self._observation_spec,
            config=config,
            level_cache=level_cache,
        )
示例#9
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--runfiles_path',
                        type=str,
                        default=None,
                        help='Set the runfiles path to find DeepMind Lab data')

    args = parser.parse_args()
    if args.runfiles_path:
        deepmind_lab.set_runfiles_path(args.runfiles_path)

    # Test for 1 minute of simulation time at fixed in-game frame rate.
    length = 3600
    fps = 60

    # Benchmark at each of the following levels at the specified resolutions and
    # observation specs.
    for level_script in ['nav_maze_static_01', 'lt_space_bounce_hard']:
        for width, height in [(84, 84), (160, 120), (320, 240)]:
            for observation_spec in ['RGB', 'RGBD']:
                run(length, width, height, fps, level_script, observation_spec)
示例#10
0
      layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION])
    num_layouts = len(layouts)
    self.assertTrue(np.isclose(num_layouts, MAZE_LAYOUT_TRIALS))
    for i in six.moves.range(MAZE_LAYOUT_TRIALS):
      print('phase 2: trial {} out of {}'.format(i+1, MAZE_LAYOUT_TRIALS))
      env = deepmind_lab.Lab(
          'tests/maze_generation_test', [MAZE_LAYOUT_OBSERVATION],
          config={
              'mixerSeed': '0',
          })
      env.reset(seed=i+1)
      layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION])
    self.assertEqual(len(layouts), num_layouts)
    for i in six.moves.range(MAZE_LAYOUT_TRIALS):
      print('phase 3: trial {} out of {}'.format(i+1, MAZE_LAYOUT_TRIALS))
      env = deepmind_lab.Lab(
          'tests/maze_generation_test', [MAZE_LAYOUT_OBSERVATION],
          config={
              'mixerSeed': '1',
          })
      env.reset(seed=i+1)
      layouts.add(env.observations()[MAZE_LAYOUT_OBSERVATION])
    self.assertTrue(np.isclose(len(layouts) - num_layouts, MAZE_LAYOUT_TRIALS))

if __name__ == '__main__':
  if 'TEST_SRCDIR' in os.environ:
    deepmind_lab.set_runfiles_path(
        os.path.join(os.environ['TEST_SRCDIR'],
                     'org_deepmind_lab'))
  unittest.main()
示例#11
0
    def __init__(self,
                 platform,
                 args,
                 action_set=DEFAULT_ACTION_SET,
                 main_observation='RGB_INTERLEAVED',
                 action_repeat=4,
                 noise_type='',
                 tv_num_images=30):
        """Creates a DMLabWrapper.

    Args:
      platform: Typically 'dmlab'.
      args: The environment settings.
      action_set: The set of discrete actions.
      main_observation: The observation returned at every time step.
      action_repeat: Maximum number of times to repeat an action.
        This can be less at the end of an episode.
      noise_type: if not empty defines what type of noise to add to the
        observation. Possible values: image_action, image, noise_action, noise.
      tv_num_images: number of distinct images to be used for TV purposes.
    """
        homepath = args.pop('homepath')
        level_name = args.pop('levelName')
        observation_format = args.pop('observationFormat')
        renderer = args.pop('renderer')
        seed = args.pop('seed')
        string_args = {key: str(value) for key, value in args.items()}
        if homepath:
            deepmind_lab.set_runfiles_path(os.path.join(homepath, ))
        self._env = deepmind_lab.Lab(level_name, observation_format,
                                     string_args, renderer)

        self._random_state = np.random.RandomState(seed=seed)
        self._env.reset(seed=self._random_state.randint(0, 2**31 - 1))

        self._action_set = action_set
        self._action_repeat = action_repeat
        self.width = args['width']
        self.height = args['height']

        self._main_observation = main_observation
        self._transform_observation = lambda x: x
        if main_observation == 'DEBUG.CAMERA.PLAYER_VIEW_NO_RETICLE':
            # This observation format is (RGB, height, width).
            # Convert it to (height, width, RGB).
            self._transform_observation = lambda x: np.moveaxis(x, 0, -1)

        # Build a list of all the possible actions.
        self._action_list = []
        for action in action_set:
            self._action_list.append(np.array(action, dtype=np.intc))

        self._noise_type = noise_type
        self._images_for_noise = []
        if self._noise_type:
            if 'action' in self._noise_type:
                assert action_set in [
                    DEFAULT_ACTION_SET_WITH_IDLE, DEFAULT_ACTION_SET
                ]
            for image in range(1, tv_num_images + 1):
                image_path = '/cns/vz-d/home/raveman/images/%d.jpeg' % image
                tmp_path = os.path.join(tempfile.gettempdir(),
                                        os.path.basename(image_path))
                image = cv2.imread(tmp_path, flags=cv2.IMREAD_COLOR)
                image = cv2.resize(image,
                                   (int(self.width / 2), int(self.height / 2)),
                                   interpolation=cv2.INTER_AREA)
                # imread returns BGR not RGB
                image = image[Ellipsis, ::-1]
                self._images_for_noise.append(image)
示例#12
0
    def __init__(self,
                 platform,
                 args,
                 action_set=DEFAULT_ACTION_SET,
                 main_observation='RGB_INTERLEAVED',
                 action_repeat=4,
                 noise_type='',
                 tv_num_images=30,
                 level_cache_dir=None,
                 level_cache_tar=None,
                 level_cache_mode=False,
                 optimize_env_reset=False,
                 debug_name=''):
        """Creates a DMLabWrapper.

    Args:
      platform: Typically 'dmlab'.
      args: The environment settings.
      action_set: The set of discrete actions.
      main_observation: The observation returned at every time step.
      action_repeat: Maximum number of times to repeat an action.
        This can be less at the end of an episode.
      noise_type: if not empty defines what type of noise to add to the
        observation. Possible values: image_action, image, noise_action, noise.
      tv_num_images: number of distinct images to be used for TV purposes.
    """
        self._optimize_env_reset = optimize_env_reset
        homepath = args.pop('homepath')
        level_name = args.pop('levelName')
        observation_format = args.pop('observationFormat')
        renderer = args.pop('renderer')
        seed = args.pop('seed')
        string_args = {key: str(value) for key, value in args.items()}
        if homepath:
            deepmind_lab.set_runfiles_path(os.path.join(homepath, ))

        deepmind_lab_kwargs = {}
        if level_cache_tar is not None:
            deepmind_lab_kwargs['level_cache'] = LevelCacheTar(level_cache_tar)
        elif level_cache_dir is not None:
            deepmind_lab_kwargs['level_cache'] = LevelCache(level_cache_dir)
        self._env = deepmind_lab.Lab(level_name, observation_format,
                                     string_args, renderer,
                                     **deepmind_lab_kwargs)

        self._random_state = np.random.RandomState(seed=seed)
        self._env.reset(seed=self._random_state.randint(0, 2**31 - 1))
        if level_cache_mode:
            #while True:
            if debug_name in ['train']:
                # 20M * 1.1 / 12 / 1800 * (1 or 2)
                num_resets = (1025 if optimize_env_reset else 2050)
            elif debug_name in ['valid', 'test']:
                # 20M * 1.1 / 12 / 256 / 25 * (2 or 3)
                num_resets = (580 if optimize_env_reset else 865)
            else:
                assert False
            print('Level caching starts ({}). Process {}'.format(
                num_resets, os.getpid()))
            for _ in range(num_resets):
                self._env.reset(seed=self._random_state.randint(0, 2**31 - 1))
            print('Level caching done ({}). Exiting process {}'.format(
                num_resets, os.getpid()))
            import sys
            sys.exit(0)

        # Enable level caching.
        self._noise_random_state = np.random.RandomState(seed=seed + 345)

        self._action_set = action_set
        self._action_repeat = action_repeat
        self.width = args['width']
        self.height = args['height']

        self._main_observation = main_observation
        self._transform_observation = lambda x: x
        if main_observation == 'DEBUG.CAMERA.PLAYER_VIEW_NO_RETICLE':
            # This observation format is (RGB, height, width).
            # Convert it to (height, width, RGB).
            self._transform_observation = lambda x: np.moveaxis(x, 0, -1)

        # Build a list of all the possible actions.
        self._action_list = []
        for action in action_set:
            self._action_list.append(np.array(action, dtype=np.intc))

        self._noise_type = noise_type
        self._images_for_noise = []
        if self._noise_type:
            if 'action' in self._noise_type:
                assert action_set in [
                    DEFAULT_ACTION_SET_WITH_IDLE, DEFAULT_ACTION_SET
                ]
            if 'image' in self._noise_type:
                for image in range(1, tv_num_images + 1):
                    #image_path = '/cns/vz-d/home/raveman/images/%d.jpeg' % image
                    image_path = 'tv_images/%d.jpeg' % image
                    #tmp_path = os.path.join(tempfile.gettempdir(),
                    #                        os.path.basename(image_path))
                    #image = cv2.imread(tmp_path, flags=cv2.IMREAD_COLOR)
                    image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR)
                    image = cv2.resize(
                        image, (int(self.width / 2), int(self.height / 2)),
                        interpolation=cv2.INTER_AREA)
                    # imread returns BGR not RGB
                    image = image[Ellipsis, ::-1]
                    self._images_for_noise.append(image)

        self._agent_position_history = None
        self._reward_history = None
        self._last_maze_layout = None
示例#13
0
import os
import inspect
import deepmind_lab as dl
import deepmind_lab_gym as dlg
import multiprocdmlab as mpdmlab
import numpy as np

DEEPMIND_RUNFILES_PATH = os.path.dirname(inspect.getfile(dl))
DEEPMIND_SOURCE_PATH = os.path.abspath(DEEPMIND_RUNFILES_PATH + '/..' * 5)
dl.set_runfiles_path(DEEPMIND_RUNFILES_PATH)


def get_entity_layer_path(entity_layer_name):
    global DEEPMIND_RUNFILES_PATH, DEEPMIND_SOURCE_PATH
    mode, size, num = entity_layer_name.split('-')
    path_format = '{}/assets/entityLayers/{}/{}/entityLayers/{}.entityLayer'
    path = path_format.format(DEEPMIND_SOURCE_PATH, size, mode, num)

    return path


def get_game_environment(mapname='training-09x09-0127',
                         mode='training',
                         multiproc=False,
                         random_spawn=True,
                         random_goal=True,
                         apple_prob=0.9,
                         episode_length=5):
    mapstrings = ','.join(
        open(get_entity_layer_path(m)).read() for m in mapname.split(','))
示例#14
0
def set_runfiles_path(path):
    """Module-level function to set the path of the DeepMind Lab DSOs."""
    deepmind_lab.set_runfiles_path(path)
示例#15
0
        'alpha': prior_alpha,
        'beta': prior_beta,
        'theta': prior_theta,
        'Sigma': prior_Sigma,
        'gibbs_max_iter': gibbs_max_iter,
        'gibbs_loglik_eps': gibbs_loglik_eps
    }

    bandit = MCMCBanditSampling(A, reward_function, reward_prior,
                                thompsonSampling)
    return bandit


if __name__ == "__main__":
    path = os.path.dirname(inspect.getfile(deepmind_lab))
    deepmind_lab.set_runfiles_path(path)

    coords, A = DRRN.action_segments()  # Rotation Axes, Number of Arms
    rewards = 0
    R = 100  # Number of realizations to run
    t_max = 300  # Time-instants to run the bandit
    width = 8
    height = 8
    d_context = width * height * 3  # Context dimension

    K = 5  # Number of mixtures per arm of the bandit
    prior_K = 5  # Assumed prior number of mixtures (per arm)
    pi = np.random.rand(A, K)
    pi = pi / pi.sum(axis=1, keepdims=True)  # Mixture proportions per arm
    theta = np.random.randn(A, K, d_context)  # Thetas per arm and mixtures
    sigma = np.ones((A, K))  # Variances per arm and mixtures
示例#16
0
    obs = env.observations()
    action = agent.step(reward, obs[observation_spec])
    reward = env.step(action, num_steps=1)

  t1 = time.time()
  duration = t1 - t0

  print('resolution: %i x %i, spec: %s, steps: %i, duration: %.1f, fps: %.1f' %
        (width, height, observation_spec, length, duration, length / duration))


if __name__ == '__main__':
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument('--runfiles_path', type=str, default=None,
                      help='Set the runfiles path to find DeepMind Lab data')

  args = parser.parse_args()
  if args.runfiles_path:
    deepmind_lab.set_runfiles_path(args.runfiles_path)

  # Test for 1 minute of simulation time at fixed in-game frame rate.
  length = 3600
  fps = 60

  # Benchmark at each of the following levels at the specified resolutions and
  # observation specs.
  for level_script in ['nav_maze_static_01', 'lt_space_bounce_hard']:
    for width, height in [(84, 84), (160, 120), (320, 240)]:
      for observation_spec in ['RGB', 'RGBD']:
        run(length, width, height, fps, level_script, observation_spec)
示例#17
0
    parser.add_argument('--level_script',
                        type=str,
                        default='tests/empty_room_test',
                        help='The environment level script to load')
    parser.add_argument('--base_path',
                        type=str,
                        default='/om/user/prinster/lab/my_data/',
                        help='base_path')
    parser.add_argument('--num_envs',
                        type=str,
                        default=16,
                        help='num environments to run in parallel')
    parser.add_argument('--learning_rate',
                        type=str,
                        default=1e-3,
                        help='learning_rate')
    parser.add_argument('--exp_name',
                        type=str,
                        default='test',
                        help='exp_name')
    parser.add_argument(
        '--slurm_array_index',
        type=int,
        default=0,
        help='id provided by slurm for which experiment to run')

    args = parser.parse_args()
    if args.runfiles_path:
        deepmind_lab.set_runfiles_path(args.runfiles_path)
    run(args.slurm_array_index, args.base_path)
示例#18
0
  def test_discretized_random_agent_run(self, length=100):
    env = deepmind_lab.Lab(
        'tests/demo_map', ['RGB_INTERLACED'],
        config={
            'fps': '60',
            'width': '80',
            'height': '80'
        })

    env.reset()
    agent = random_agent.DiscretizedRandomAgent()

    reward = 0

    for _ in xrange(length):
      if not env.is_running():
        print('Environment stopped early')
        env.reset()
      obs = env.observations()
      action = agent.step(reward, obs['RGB_INTERLACED'])
      reward = env.step(action, 1)
      self.assertIsInstance(reward, float)


if __name__ == '__main__':
  if os.environ.get('TEST_SRCDIR'):
    deepmind_lab.set_runfiles_path(
        os.path.join(os.environ['TEST_SRCDIR'],
                     'org_deepmind_lab'))
  unittest.main()