def add_new_level(self, level, seed, key, pk3_path): with self.locks[level]: num_used_seeds = self.num_seeds_used_in_current_run[level].value if num_used_seeds < len(self.available_seeds.get(level, [])): log.warning( 'We should only add new levels to cache if we ran out of pre-generated levels (seeds)' ) log.warning( 'Num used seeds: %d, available seeds: %d, level: %s, seed %r, key %r', num_used_seeds, len(self.available_seeds.get(level, [])), level, seed, key, ) # some DMLab-30 environments, e.g. language_select_located_object may require different levels even # for the same seed. This is most likely a bug in DeepMind Lab, because the same seed should generate # identical environments path = os.path.join(self.cache_dir, key) if not os.path.isfile(path): # copy the cached file DeepMind Lab has written to the cache directory shutil.copyfile(pk3_path, path) # add new map to the list of available seeds for this level # so it can be used next time we run the experiment lvl_seeds_filename = join(self.cache_dir, level_to_filename(level)) safe_ensure_dir_exists(os.path.dirname(lvl_seeds_filename)) with open(lvl_seeds_filename, 'a') as fobj: fobj.write(f'{seed} {key}\n')
def _game_init(self, with_locking=True, max_parallel=10): lock_file = lock = None if with_locking: lock_file = doom_lock_file(max_parallel) lock = FileLock(lock_file) init_attempt = 0 while True: init_attempt += 1 try: if with_locking: with lock.acquire(timeout=20): self.game.init() else: self.game.init() break except Timeout: if with_locking: log.debug( 'Another process currently holds the lock %s, attempt: %d', lock_file, init_attempt, ) except Exception as exc: log.warning( 'VizDoom game.init() threw an exception %r. Terminate process...', exc) from seed_rl.envs.env_utils import EnvCriticalError raise EnvCriticalError()
def __init__(self, level, action_repeat, res_w, res_h, benchmark_mode, renderer, extra_cfg=None): self._width = res_w self._height = res_h self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE' self._action_repeat = action_repeat self._random_state = None observation_format = [self._main_observation, 'DEBUG.POS.TRANS'] config = {'width': self._width, 'height': self._height} if extra_cfg is not None: config.update(extra_cfg) config = {k: str(v) for k, v in config.items()} self._dmlab = deepmind_lab.Lab( level, observation_format, config=config, renderer=renderer, level_cache=level_cache, ) self._action_set = ACTION_SET self._action_list = np.array(self._action_set, dtype=np.intc) # DMLAB requires intc type for actions self._last_observation = None self._render_scale = 5 self._render_fps = 30 self._last_frame = time.time() self.action_space = gym.spaces.Discrete(len(self._action_set)) self.observation_space = gym.spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) self.benchmark_mode = benchmark_mode if self.benchmark_mode: log.warning('DmLab benchmark mode is true! Use this only for testing, not for actual training runs!') self.seed()
def load_from_checkpoint(cfg): filename = cfg_file(cfg) if not os.path.isfile(filename): raise Exception( f'Could not load saved parameters for experiment {cfg.experiment}') with open(filename, 'r') as json_file: json_params = json.load(json_file) log.warning('Loading existing experiment configuration from %s', filename) loaded_cfg = AttrDict(json_params) # override the parameters in config file with values passed from command line for key, value in vars(cfg.cli_args).items(): if loaded_cfg[key] != value: log.debug( 'Overriding arg %r with value %r passed from command line', key, value) loaded_cfg[key] = value # incorporate extra CLI parameters that were not present in JSON file for key, value in vars(cfg).items(): if key not in loaded_cfg: log.debug( 'Adding new argument %r=%r that is not in the saved config file!', key, value) loaded_cfg[key] = value return loaded_cfg
def safe_get(q, timeout=1e6, msg='Queue timeout'): """Using queue.get() with timeout is necessary, otherwise KeyboardInterrupt is not handled.""" while True: try: return q.get(timeout=timeout) except Empty: log.warning(msg)
def close(self): try: if self.game is not None: self.game.close() except RuntimeError as exc: log.warning('Runtime error in VizDoom game close(): %r', exc) if self.viewer is not None: self.viewer.close()
def is_udp_port_available(port): try: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.bind(('', port)) sock.close() except OSError as exc: log.warning(f'UDP port {port} cannot be used {str(exc)}') return False else: return True
def maybe_load_from_checkpoint(cfg): filename = cfg_file(cfg) if not os.path.isfile(filename): log.warning( 'Saved parameter configuration for experiment %s not found!', cfg.experiment) log.warning('Starting experiment from scratch!') return AttrDict(vars(cfg)) return load_from_checkpoint(cfg)
def dmlab_env_by_name(name): for spec in DMLAB_ENVS: if spec.name == name: return spec # not a known "named" environment with a predefined spec log.warning( 'Level %s not found. Interpreting the level name as an unmodified DMLab-30 env name!', name) level = name.split('dmlab_')[1] spec = DmLabSpec(name, level) return spec
def get_algo_class(algo): algo_class = Agent if algo == 'PPO': from seed_rl.algorithms.ppo.agent_ppo import AgentPPO algo_class = AgentPPO elif algo == 'APPO': from seed_rl.algorithms.appo.appo import APPO algo_class = APPO else: log.warning('Algorithm %s is not supported', algo) return algo_class
def fetch(self, key, pk3_path): """Environment object itself acts as a proxy to the global level cache.""" if not self.env_uses_level_cache: self.env_uses_level_cache = True # log.debug('Env %s uses level cache!', self.level_name) path = join(LEVEL_CACHE_DIR, key) if os.path.isfile(path): # copy the cached file to the path expected by DeepMind Lab shutil.copyfile(path, pk3_path) return True else: log.warning('Cache miss in environment %s key: %s!', self.level_name, key) return False
def reset(self): self._ensure_initialized() if self.record_to is not None and not self.is_multiplayer: # does not work in multiplayer (uses different mechanism) if not os.path.exists(self.record_to): os.makedirs(self.record_to) demo_path = self.demo_path(self._num_episodes) log.warning('Recording episode demo to %s', demo_path) self.game.new_episode(demo_path) else: if self._num_episodes > 0: # no demo recording (default) self.game.new_episode() self.state = self.game.get_state() img = None try: img = self.state.screen_buffer except AttributeError: # sometimes Doom does not return screen buffer at all??? Rare bug pass if img is None: log.error( 'Game returned None screen buffer! This is not supposed to happen!' ) img = self._black_screen() # Swap current and previous histogram if self.current_histogram is not None and self.previous_histogram is not None: swap = self.current_histogram self.current_histogram = self.previous_histogram self.previous_histogram = swap self.current_histogram.fill(0) self._actions_flattened = None self._last_episode_info = copy.deepcopy(self._prev_info) self._prev_info = None self._num_episodes += 1 return np.transpose(img, (1, 2, 0))
def _load_checkpoint(self, checkpoints_dir): checkpoints = self._get_checkpoints(checkpoints_dir) if len(checkpoints) <= 0: log.warning('No checkpoints found in %s', experiment_dir(cfg=self.cfg)) return None else: latest_checkpoint = checkpoints[-1] log.warning('Loading state from checkpoint %s...', latest_checkpoint) if str( self.device ) == 'cuda': # the checkpoint will try to load onto the GPU storage unless specified checkpoint_dict = torch.load(latest_checkpoint) else: checkpoint_dict = torch.load( latest_checkpoint, map_location=lambda storage, loc: storage) return checkpoint_dict
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() psutil.Process().nice(10) num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE) assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...' assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker' with timing.timeit('env_init'): env_key = 'env' env_desired_num_levels = 0 global_env_id = proc_idx * self.cfg.num_envs_per_worker env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) env.seed(global_env_id) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key = env.unwrapped.level_name env_level = env.unwrapped.level approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[ env_key] num_billions = DESIRED_TRAINING_LENGTH / int(1e9) num_workers_for_env = self.cfg.num_workers // num_envs env_desired_num_levels = int( (approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env) env_num_levels_generated = len( dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0]. all_seeds[env_level]) // num_workers_for_env log.warning('Worker %d (env %s) generated %d/%d levels!', proc_idx, env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(4) env.reset() env_uses_level_cache = env.unwrapped.env_uses_level_cache self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() try: with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: action = env.action_space.sample() with timing.add_time(f'{env_key}.step'): env.step(action) total_env_frames += 1 with timing.add_time(f'{env_key}.reset'): env.reset() env_num_levels_generated += 1 log.debug('Env %s done %d/%d resets', env_key, env_num_levels_generated, env_desired_num_levels) if env_num_levels_generated >= env_desired_num_levels: log.debug('%s finished %d/%d resets, sleeping...', env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(30) # free up CPU time for other envs # if env does not use level cache, there is no need to run it # let other workers proceed if not env_uses_level_cache: log.debug('Env %s does not require cache, sleeping...', env_key) time.sleep(200) with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) if get_free_disk_space_mb() < 3 * 1024: log.error('Not enough disk space! %d', get_free_disk_space_mb()) time.sleep(200) except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.1 + 0.1) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) env.close()
def __init__( self, task_id, level, action_repeat, res_w, res_h, benchmark_mode, renderer, dataset_path, with_instructions, extended_action_set, use_level_cache, gpu_index, extra_cfg=None, ): self.width = res_w self.height = res_h # self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE' self.main_observation = 'RGB_INTERLEAVED' self.instructions_observation = DMLAB_INSTRUCTIONS self.with_instructions = with_instructions and not benchmark_mode self.action_repeat = action_repeat self.random_state = None self.task_id = task_id self.level = level self.level_name = dmlab_level_to_level_name(self.level) # the policy index which currently acts in the environment self.curr_policy_idx = 0 self.curr_cache = dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[ self.curr_policy_idx] self.instructions = np.zeros([DMLAB_MAX_INSTRUCTION_LEN], dtype=np.int32) observation_format = [self.main_observation] if self.with_instructions: observation_format += [self.instructions_observation] config = { 'width': self.width, 'height': self.height, 'gpuDeviceIndex': str(gpu_index), 'datasetPath': dataset_path, } if extra_cfg is not None: config.update(extra_cfg) config = {k: str(v) for k, v in config.items()} self.use_level_cache = use_level_cache env_level_cache = self if use_level_cache else None self.env_uses_level_cache = False # will be set to True when this env instance queries the cache self.last_reset_seed = None if env_level_cache is not None: if not isinstance(self.curr_cache, dmlab_level_cache.DmlabLevelCacheGlobal): raise Exception( 'DMLab global level cache object is not initialized! Make sure to call' 'dmlab_ensure_global_cache_initialized() in the main thread before you fork any child processes' 'or create any DMLab envs') self.dmlab = deepmind_lab.Lab( level, observation_format, config=config, renderer=renderer, level_cache=env_level_cache, ) self.action_set = EXTENDED_ACTION_SET if extended_action_set else ACTION_SET self.action_list = np.array( self.action_set, dtype=np.intc) # DMLAB requires intc type for actions self.last_observation = None self.render_scale = 5 self.render_fps = 30 self.last_frame = time.time() self.action_space = gym.spaces.Discrete(len(self.action_set)) self.observation_space = gym.spaces.Dict( obs=gym.spaces.Box(low=0, high=255, shape=(self.height, self.width, 3), dtype=np.uint8)) if self.with_instructions: self.observation_space.spaces[ self.instructions_observation] = gym.spaces.Box( low=0, high=DMLAB_VOCABULARY_SIZE, shape=[DMLAB_MAX_INSTRUCTION_LEN], dtype=np.int32, ) self.benchmark_mode = benchmark_mode if self.benchmark_mode: log.warning( 'DmLab benchmark mode is true! Use this only for testing, not for actual training runs!' ) self.seed()