示例#1
0
    def reset(self):
        if self._episode_recording_dir is not None and self._record_id > 0:
            # save actions to text file
            with open(join(self._episode_recording_dir, 'actions.json'),
                      'w') as actions_file:
                json.dump(self._recorded_actions, actions_file)

            # rename previous episode dir
            reward = self._recorded_episode_reward + self._recorded_episode_shaping_reward
            new_dir_name = self._episode_recording_dir + f'_r{reward:.2f}'
            os.rename(self._episode_recording_dir, new_dir_name)
            log.info(
                'Finished recording %s (rew %.3f, shaping %.3f)',
                new_dir_name,
                reward,
                self._recorded_episode_shaping_reward,
            )

        dir_name = f'ep_{self._record_id:03d}_p{self._player_id}'
        self._episode_recording_dir = join(self._record_to, dir_name)
        ensure_dir_exists(self._episode_recording_dir)

        self._record_id += 1
        self._frame_id = 0
        self._recorded_episode_reward = 0
        self._recorded_episode_shaping_reward = 0

        self._recorded_actions = []

        return self.env.reset()
示例#2
0
    def test_simple_cmd(self):
        logging.disable(logging.INFO)

        echo_params = ParamGrid([
            ('p1', [3.14, 2.71]),
            ('p2', ['a', 'b', 'c']),
            ('p3', list(np.arange(3))),
        ])
        experiments = [
            Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)),
            Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)),
        ]
        train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests'))
        root_dir_name = '__test_run__'
        rd = RunDescription(root_dir_name, experiments)

        args = runner_argparser().parse_args([])
        args.max_parallel = 8
        args.pause_between = 0
        args.train_dir = train_dir

        run(rd, args)

        rd2 = RunDescription(root_dir_name, experiments, experiment_dirs_sf_format=False, experiment_arg_name='--experiment_tst', experiment_dir_arg_name='--dir')
        run(rd2, args)

        logging.disable(logging.NOTSET)

        shutil.rmtree(join(train_dir, root_dir_name))
示例#3
0
    def __init__(self, run_name, experiments, train_dir=None):
        if train_dir is None:
            train_dir = ensure_dir_exists(join(os.getcwd(), 'train_dir'))

        self.train_dir = train_dir
        self.run_name = run_name
        self.experiments = experiments
        self.experiment_suffix = ''
示例#4
0
def aggregate(path, subpath, experiments, ax, legend_name, group_id):
    print("Started aggregation {}".format(path / subpath))

    curr_dir = os.path.dirname(os.path.abspath(__file__))
    cache_dir = join(curr_dir, 'cache')
    cache_env = join(cache_dir, subpath)

    if os.path.isdir(cache_env):
        with open(join(cache_env, f'{subpath}.pickle'), 'rb') as fobj:
            interpolated_keys = pickle.load(fobj)
    else:
        cache_env = ensure_dir_exists(cache_env)
        interpolated_keys = extract(experiments=experiments)
        with open(join(cache_env, f'{subpath}.pickle'), 'wb') as fobj:
            pickle.dump(interpolated_keys, fobj)

    for i, key in enumerate(interpolated_keys.keys()):
        plot(i, interpolated_keys[key], ax[i], legend_name, group_id)
示例#5
0
def run(run_description, args):
    experiments = run_description.experiments
    max_parallel = args.max_parallel

    log.info('Starting processes with base cmds: %r',
             [e.cmd for e in experiments])
    log.info('Max parallel processes is %d', max_parallel)
    log.info(
        'Monitor log files using\n\n\ttail -f train_dir/%s/**/**/sf_log.txt\n\n',
        run_description.run_name)

    processes = []
    processes_per_gpu = {g: [] for g in range(args.num_gpus)}

    experiments = run_description.generate_experiments(args.train_dir)
    next_experiment = next(experiments, None)

    def find_least_busy_gpu():
        least_busy_gpu = None
        gpu_available_processes = 0

        for gpu_id in range(args.num_gpus):
            available_processes = args.experiments_per_gpu - len(
                processes_per_gpu[gpu_id])
            if available_processes > gpu_available_processes:
                gpu_available_processes = available_processes
                least_busy_gpu = gpu_id

        return least_busy_gpu, gpu_available_processes

    def can_squeeze_another_process():
        if len(processes) >= max_parallel:
            return False

        if args.experiments_per_gpu > 0:
            least_busy_gpu, gpu_available_processes = find_least_busy_gpu()
            if gpu_available_processes <= 0:
                return False

        return True

    failed_processes = []
    last_log_time = 0
    log_interval = 3  # seconds

    while len(processes) > 0 or next_experiment is not None:
        while can_squeeze_another_process() and next_experiment is not None:
            cmd, name, root_dir, exp_env_vars = next_experiment

            cmd_tokens = cmd.split(' ')

            # workaround to make sure we're running the correct python executable from our virtual env
            if cmd_tokens[0].startswith('python'):
                cmd_tokens[0] = sys.executable
                log.debug('Using Python executable %s', cmd_tokens[0])

            ensure_dir_exists(join(args.train_dir, root_dir))

            envvars = os.environ.copy()

            best_gpu = None
            if args.experiments_per_gpu > 0:
                best_gpu, best_gpu_available_processes = find_least_busy_gpu()
                log.info(
                    'The least busy gpu is %d where we can run %d more processes',
                    best_gpu,
                    best_gpu_available_processes,
                )
                envvars['CUDA_VISIBLE_DEVICES'] = f'{best_gpu}'

            log.info('Starting process %r', cmd_tokens)

            if exp_env_vars is not None:
                for key, value in exp_env_vars.items():
                    log.info('Adding env variable %r %r', key, value)
                    envvars[str(key)] = str(value)

            process = subprocess.Popen(cmd_tokens,
                                       stdout=None,
                                       stderr=None,
                                       env=envvars)
            process.gpu_id = best_gpu
            process.proc_cmd = cmd

            processes.append(process)

            if process.gpu_id is not None:
                processes_per_gpu[process.gpu_id].append(process.proc_cmd)

            log.info('Started process %s on GPU %r', process.proc_cmd,
                     process.gpu_id)
            log.info('Waiting for %d seconds before starting next process',
                     args.pause_between)
            time.sleep(args.pause_between)

            next_experiment = next(experiments, None)

        remaining_processes = []
        for process in processes:
            if process.poll() is None:
                remaining_processes.append(process)
                continue
            else:
                if process.gpu_id is not None:
                    processes_per_gpu[process.gpu_id].remove(process.proc_cmd)
                log.info('Process %r finished with code %r', process.proc_cmd,
                         process.returncode)
                if process.returncode != 0:
                    failed_processes.append(
                        (process.proc_cmd, process.pid, process.returncode))
                    log.error('WARNING: RETURN CODE IS %r', process.returncode)

        processes = remaining_processes

        if time.time() - last_log_time > log_interval:
            if failed_processes:
                log.error(
                    'Failed processes: %s', ', '.join([
                        f'PID: {p[1]} code: {p[2]}' for p in failed_processes
                    ]))
            last_log_time = time.time()

        time.sleep(0.1)

    log.info('Done!')

    return 0
示例#6
0
    def __init__(self, cache_dir, experiment_dir, all_levels_for_experiment,
                 policy_idx):
        self.cache_dir = cache_dir
        self.experiment_dir = experiment_dir
        self.policy_idx = policy_idx

        self.all_seeds = dict()
        self.available_seeds = dict()
        self.used_seeds = dict()
        self.num_seeds_used_in_current_run = dict()
        self.locks = dict()

        for lvl in all_levels_for_experiment:
            self.all_seeds[lvl] = []
            self.available_seeds[lvl] = []
            self.num_seeds_used_in_current_run[lvl] = multiprocessing.RawValue(
                ctypes.c_int32, 0)
            self.locks[lvl] = multiprocessing.Lock()

        log.debug('Reading the DMLab level cache...')
        cache_dir = ensure_dir_exists(cache_dir)

        lvl_seed_files = Path(cache_dir).rglob(f'*.{LEVEL_SEEDS_FILE_EXT}')
        for lvl_seed_file in lvl_seed_files:
            lvl_seed_file = str(lvl_seed_file)
            level = filename_to_level(os.path.relpath(lvl_seed_file,
                                                      cache_dir))
            self.all_seeds[level] = read_seeds_file(lvl_seed_file,
                                                    has_keys=True)
            self.all_seeds[level] = list(set(
                self.all_seeds[level]))  # leave only unique seeds
            log.debug('Level %s has %d total seeds available', level,
                      len(self.all_seeds[level]))

        log.debug('Updating level cache for the current experiment...')
        used_lvl_seeds_dir = self.get_used_seeds_dir()
        used_seeds_files = Path(used_lvl_seeds_dir).rglob(
            f'*.{LEVEL_SEEDS_FILE_EXT}')
        self.used_seeds = dict()
        for used_seeds_file in used_seeds_files:
            used_seeds_file = str(used_seeds_file)
            level = filename_to_level(
                os.path.relpath(used_seeds_file, used_lvl_seeds_dir))
            self.used_seeds[level] = read_seeds_file(used_seeds_file,
                                                     has_keys=False)
            log.debug('%d seeds already used in this experiment for level %s',
                      len(self.used_seeds[level]), level)

            self.used_seeds[level] = set(self.used_seeds[level])

        for lvl in all_levels_for_experiment:
            lvl_seeds = self.all_seeds.get(lvl, [])
            lvl_used_seeds = self.used_seeds.get(lvl, [])

            lvl_remaining_seeds = set(lvl_seeds) - set(lvl_used_seeds)
            self.available_seeds[lvl] = list(lvl_remaining_seeds)

            same_levels_for_population = False
            if same_levels_for_population:
                # shuffle with fixed seed so agents in population get the same levels
                random.Random(42).shuffle(self.available_seeds[lvl])
            else:
                random.shuffle(self.available_seeds[lvl])

            log.debug('Env %s has %d remaining unused seeds', lvl,
                      len(self.available_seeds[lvl]))

        log.debug('Done initializing global DMLab level cache!')
示例#7
0
 def get_used_seeds_dir(self):
     return ensure_dir_exists(
         join(self.experiment_dir,
              f'dmlab_used_lvl_seeds_p{self.policy_idx:02d}'))
示例#8
0
    def __init__(self, cfg):
        super().__init__(cfg)

        # we should not use CUDA in the main thread, only on the workers
        set_global_cuda_envvars(cfg)

        tmp_env = make_env_func(self.cfg, env_config=None)
        self.obs_space = tmp_env.observation_space
        self.action_space = tmp_env.action_space
        self.num_agents = tmp_env.num_agents

        self.reward_shaping_scheme = None
        if self.cfg.with_pbt:
            self.reward_shaping_scheme = get_default_reward_shaping(tmp_env)

        tmp_env.close()

        # shared memory allocation
        self.traj_buffers = SharedBuffers(self.cfg, self.num_agents,
                                          self.obs_space, self.action_space)

        self.actor_workers = None

        self.report_queue = MpQueue(40 * 1000 * 1000)
        self.policy_workers = dict()
        self.policy_queues = dict()

        self.learner_workers = dict()

        self.workers_by_handle = None

        self.policy_inputs = [[] for _ in range(self.cfg.num_policies)]
        self.policy_outputs = dict()
        for worker_idx in range(self.cfg.num_workers):
            for split_idx in range(self.cfg.worker_num_splits):
                self.policy_outputs[(worker_idx, split_idx)] = dict()

        self.policy_avg_stats = dict()
        self.policy_lag = [dict() for _ in range(self.cfg.num_policies)]

        self.last_timing = dict()
        self.env_steps = dict()
        self.samples_collected = [0 for _ in range(self.cfg.num_policies)]
        self.total_env_steps_since_resume = 0

        # currently this applies only to the current run, not experiment as a whole
        # to change this behavior we'd need to save the state of the main loop to a filesystem
        self.total_train_seconds = 0

        self.last_report = time.time()
        self.last_experiment_summaries = 0

        self.report_interval = 5.0  # sec
        self.experiment_summaries_interval = self.cfg.experiment_summaries_interval  # sec

        self.avg_stats_intervals = (2, 12, 60
                                    )  # 10 seconds, 1 minute, 5 minutes

        self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals))
        self.throughput_stats = [
            deque([], maxlen=5) for _ in range(self.cfg.num_policies)
        ]
        self.avg_stats = dict()
        self.stats = dict()  # regular (non-averaged) stats

        init_wandb(self.cfg)

        self.writers = dict()
        writer_keys = list(range(self.cfg.num_policies))
        for key in writer_keys:
            summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)),
                               str(key))
            summary_dir = ensure_dir_exists(summary_dir)
            self.writers[key] = SummaryWriter(summary_dir, flush_secs=20)

        self.pbt = PopulationBasedTraining(self.cfg,
                                           self.reward_shaping_scheme,
                                           self.writers)
示例#9
0
    def __init__(
        self,
        task_id,
        level,
        action_repeat,
        res_w,
        res_h,
        benchmark_mode,
        renderer,
        dataset_path,
        with_instructions,
        extended_action_set,
        use_level_cache,
        level_cache_path,
        gpu_index,
        extra_cfg=None,
    ):
        self.width = res_w
        self.height = res_h

        # self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE'
        self.main_observation = 'RGB_INTERLEAVED'
        self.instructions_observation = DMLAB_INSTRUCTIONS
        self.with_instructions = with_instructions and not benchmark_mode

        self.action_repeat = action_repeat

        self.random_state = None

        self.task_id = task_id
        self.level = level
        self.level_name = dmlab_level_to_level_name(self.level)

        # the policy index which currently acts in the environment
        self.curr_policy_idx = 0
        self.curr_cache = dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[
            self.curr_policy_idx]

        self.instructions = np.zeros([DMLAB_MAX_INSTRUCTION_LEN],
                                     dtype=np.int32)

        observation_format = [self.main_observation]
        if self.with_instructions:
            observation_format += [self.instructions_observation]

        config = {
            'width': self.width,
            'height': self.height,
            'gpuDeviceIndex': str(gpu_index),
            'datasetPath': dataset_path,
        }

        if extra_cfg is not None:
            config.update(extra_cfg)
        config = {k: str(v) for k, v in config.items()}

        self.use_level_cache = use_level_cache
        self.level_cache_path = ensure_dir_exists(level_cache_path)

        env_level_cache = self if use_level_cache else None
        self.env_uses_level_cache = False  # will be set to True when this env instance queries the cache
        self.last_reset_seed = None

        if env_level_cache is not None:
            if not isinstance(self.curr_cache,
                              dmlab_level_cache.DmlabLevelCacheGlobal):
                raise Exception(
                    'DMLab global level cache object is not initialized! Make sure to call'
                    'dmlab_ensure_global_cache_initialized() in the main thread before you fork any child processes'
                    'or create any DMLab envs')

        self.dmlab = deepmind_lab.Lab(
            level,
            observation_format,
            config=config,
            renderer=renderer,
            level_cache=env_level_cache,
        )

        self.action_set = EXTENDED_ACTION_SET if extended_action_set else ACTION_SET
        self.action_list = np.array(
            self.action_set,
            dtype=np.intc)  # DMLAB requires intc type for actions

        self.last_observation = None

        self.render_scale = 5
        self.render_fps = 30
        self.last_frame = time.time()

        self.action_space = gym.spaces.Discrete(len(self.action_set))

        self.observation_space = gym.spaces.Dict(
            obs=gym.spaces.Box(low=0,
                               high=255,
                               shape=(self.height, self.width, 3),
                               dtype=np.uint8))
        if self.with_instructions:
            self.observation_space.spaces[
                self.instructions_observation] = gym.spaces.Box(
                    low=0,
                    high=DMLAB_VOCABULARY_SIZE,
                    shape=[DMLAB_MAX_INSTRUCTION_LEN],
                    dtype=np.int32,
                )

        self.benchmark_mode = benchmark_mode
        if self.benchmark_mode:
            log.warning(
                'DmLab benchmark mode is true! Use this only for testing, not for actual training runs!'
            )

        self.seed()