def reset(self):
        if self._episode_recording_dir is not None and self._record_id > 0:
            # save actions to text file
            with open(join(self._episode_recording_dir, 'actions.json'),
                      'w') as actions_file:
                json.dump(self._recorded_actions, actions_file)

            # rename previous episode dir
            reward = self._recorded_episode_reward + self._recorded_episode_shaping_reward
            new_dir_name = self._episode_recording_dir + f'_r{reward:.2f}'
            os.rename(self._episode_recording_dir, new_dir_name)
            log.info(
                'Finished recording %s (rew %.3f, shaping %.3f)',
                new_dir_name,
                reward,
                self._recorded_episode_shaping_reward,
            )

        dir_name = f'ep_{self._record_id:03d}_p{self._player_id}'
        self._episode_recording_dir = join(self._record_to, dir_name)
        ensure_dir_exists(self._episode_recording_dir)

        self._record_id += 1
        self._frame_id = 0
        self._recorded_episode_reward = 0
        self._recorded_episode_shaping_reward = 0

        self._recorded_actions = []

        return self.env.reset()
示例#2
0
    def tmax_train_run(self, env_name=None):
        test_dir_name = self.__class__.__name__

        args, params = parse_args_tmax(AgentTMAX.Params, argv=[])
        if env_name is not None:
            args.env = env_name

        params.experiments_root = test_dir_name
        params.num_envs = 16
        params.train_for_steps = 60
        params.initial_save_rate = 20
        params.batch_size = 32
        params.ppo_epochs = 2
        params.distance_bootstrap = 10
        params.stage_duration = 100
        params.distance_encoder = 'resnet'
        params.locomotion_encoder = 'resnet'

        tmax_train_dir = join(experiments_dir(), params.experiments_root)
        ensure_dir_exists(tmax_train_dir)
        shutil.rmtree(tmax_train_dir)

        status = train(params, args.env)
        self.assertEqual(status, TrainStatus.SUCCESS)

        root_dir = params.experiment_dir()
        self.assertTrue(os.path.isdir(root_dir))

        enjoy(params, args.env, max_num_episodes=1, max_num_frames=50)
        shutil.rmtree(tmax_train_dir)

        self.assertFalse(os.path.isdir(root_dir))
示例#3
0
    def save(self, mdl_name=None, stats_name=None):
        if mdl_name is None:
            mdl_name = self.mdl_name

        if stats_name is None:
            stats_name = self.stats_name

        logger.info("Saving the model to %s." % mdl_name)
        ensure_dir_exists(SAVE_DUMP_FOLDER)
        mdl_path = os.path.join(SAVE_DUMP_FOLDER, mdl_name)
        stats_path = os.path.join(SAVE_DUMP_FOLDER, stats_name)
        self.model.params.save(mdl_path)
        pkl.dump(self.stats, open(stats_path, 'wb'), 2)
示例#4
0
    def save(self, mdl_name=None, stats_name=None):
        if mdl_name is None:
            mdl_name = self.mdl_name

        if stats_name is None:
            stats_name = self.stats_name

        logger.info("Saving the model to %s." % mdl_name)
        ensure_dir_exists(SAVE_DUMP_FOLDER)
        mdl_path = os.path.join(SAVE_DUMP_FOLDER, mdl_name)
        stats_path = os.path.join(SAVE_DUMP_FOLDER, stats_name)
        self.model.params.save(mdl_path)
        pkl.dump(self.stats, open(stats_path, "wb"), 2)
    def save(self, experiment_dir):
        trajectories_dir = ensure_dir_exists(
            join(experiment_dir, '.trajectories'))

        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")
        trajectory_dir = ensure_dir_exists(
            join(trajectories_dir, f'traj_{timestamp}'))
        log.info('Saving trajectory to %s...', trajectory_dir)

        with open(join(trajectory_dir, 'trajectory.pickle'),
                  'wb') as traj_file:
            pickle.dump(self.__dict__, traj_file)

        return trajectory_dir
示例#6
0
    def __init__(self, run_name, experiments, train_dir=None):
        if train_dir is None:
            train_dir = ensure_dir_exists(join(os.getcwd(), 'train_dir'))

        self.train_dir = train_dir
        self.run_name = run_name
        self.experiments = experiments
示例#7
0
    def _save(self):
        checkpoint = self._get_checkpoint_dict()
        assert checkpoint is not None

        checkpoint_dir = self.checkpoint_dir(self.cfg, self.policy_id)
        tmp_filepath = join(checkpoint_dir, '.temp_checkpoint')
        checkpoint_name = f'checkpoint_{self.train_step:09d}_{self.env_steps}.pth'
        filepath = join(checkpoint_dir, checkpoint_name)
        log.info('Saving %s...', tmp_filepath)
        torch.save(checkpoint, tmp_filepath)
        log.info('Renaming %s to %s', tmp_filepath, filepath)
        os.rename(tmp_filepath, filepath)

        while len(self.get_checkpoints(
                checkpoint_dir)) > self.cfg.keep_checkpoints:
            oldest_checkpoint = self.get_checkpoints(checkpoint_dir)[0]
            if os.path.isfile(oldest_checkpoint):
                log.debug('Removing %s', oldest_checkpoint)
                os.remove(oldest_checkpoint)

        if self.cfg.save_milestones_sec > 0:
            # milestones enabled
            if time.time(
            ) - self.last_milestone_time >= self.cfg.save_milestones_sec:
                milestones_dir = ensure_dir_exists(
                    join(checkpoint_dir, 'milestones'))
                milestone_path = join(milestones_dir,
                                      f'{checkpoint_name}.milestone')
                log.debug('Saving a milestone %s', milestone_path)
                shutil.copy(filepath, milestone_path)
                self.last_milestone_time = time.time()
示例#8
0
    def test_simple_cmd(self):
        logging.disable(logging.INFO)

        echo_params = ParamGrid([
            ('p1', [3.14, 2.71]),
            ('p2', ['a', 'b', 'c']),
            ('p3', list(np.arange(3))),
        ])
        experiments = [
            Experiment('test_echo1', 'echo',
                       echo_params.generate_params(randomize=True)),
            Experiment('test_echo2', 'echo',
                       echo_params.generate_params(randomize=False)),
        ]
        train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests'))
        root_dir_name = '__test_run__'
        rd = RunDescription(root_dir_name, experiments, train_dir)

        args = runner_argparser().parse_args([])
        args.max_parallel = 8
        args.pause_between = 0

        run(rd, args)
        logging.disable(logging.NOTSET)

        shutil.rmtree(join(train_dir, root_dir_name))
示例#9
0
    def _visualize_data(self):
        min_vis = 10
        if len(self.buffer) < min_vis:
            return

        close_examples, far_examples = [], []
        labels = self.buffer.labels
        obs_first, obs_second = self.buffer.obs_first, self.buffer.obs_second

        for i in range(len(labels)):
            if labels[i] == 0 and len(close_examples) < min_vis:
                close_examples.append((obs_first[i], obs_second[i]))
            elif labels[i] == 1 and len(far_examples) < min_vis:
                far_examples.append((obs_first[i], obs_second[i]))

        if len(close_examples) < min_vis or len(far_examples) < min_vis:
            return

        img_folder = vis_dir(self.params.experiment_dir())
        img_folder = ensure_dir_exists(join(img_folder, 'dist'))
        img_folder = ensure_dir_exists(join(img_folder, f'dist_{time.time()}'))

        def save_images(examples, close_or_far):
            for visualize_i in range(len(examples)):
                img_first_name = join(
                    img_folder, f'{close_or_far}_{visualize_i}_first.png')
                img_second_name = join(
                    img_folder, f'{close_or_far}_{visualize_i}_second.png')
                cv2.imwrite(img_first_name, examples[visualize_i][0])
                cv2.imwrite(img_second_name, examples[visualize_i][1])

        save_images(close_examples, 'close')
        save_images(far_examples, 'far')

        self._vis_dirs.append(img_folder)
        while len(self._vis_dirs) > 20:
            dir_name = self._vis_dirs.popleft()
            if os.path.isdir(dir_name):
                shutil.rmtree(dir_name)
def aggregate(path, subpath, experiments, ax):
    print("Started aggregation {}".format(path))
    curr_dir = os.path.dirname(os.path.abspath(__file__))
    cache_dir = join(curr_dir, 'cache')
    cache_env = join(cache_dir, subpath)
    if os.path.isdir(cache_env):
        with open(join(cache_env, f'{subpath}.pickle'), 'rb') as fobj:
            interpolated_keys = pickle.load(fobj)
    else:
        cache_env = ensure_dir_exists(cache_env)
        interpolated_keys = extract(experiments=experiments)
        with open(join(cache_env, f'{subpath}.pickle'), 'wb') as fobj:
            pickle.dump(interpolated_keys, fobj)
    for i, key in enumerate(interpolated_keys.keys()):
        plot(i, interpolated_keys[key], ax[i])
示例#11
0
def trajectories_to_sparse_map(init_map, trajectories, trajectories_dir, agent,
                               map_img, coord_limits):
    """This is just a test."""
    m = init_map()
    map_builder = MapBuilder(agent)
    for t in trajectories:
        m.new_episode()
        is_frame_a_landmark = map_builder.add_trajectory_to_sparse_map_fixed_landmarks(
            m, t)
        landmark_frames = np.nonzero(is_frame_a_landmark)
        log.info('Landmark frames %r', landmark_frames)
    sparse_map_dir = ensure_dir_exists(join(trajectories_dir, 'sparse_map'))
    m.save_checkpoint(sparse_map_dir,
                      map_img=map_img,
                      coord_limits=coord_limits,
                      verbose=True,
                      is_sparse=True)
    return m
示例#12
0
def aggregate(env, experiments, count, ax):
    print("Started aggregation {}".format(env))

    curr_dir = os.path.dirname(os.path.abspath(__file__))
    cache_dir = join(curr_dir, 'cache')
    cache_env = join(cache_dir, env)

    if os.path.isdir(cache_env):
        with open(join(cache_env, f'{env}.pickle'), 'rb') as fobj:
            interpolated_keys = pickle.load(fobj)
    else:
        cache_env = ensure_dir_exists(cache_env)
        interpolated_keys = extract(experiments)
        with open(join(cache_env, f'{env}.pickle'), 'wb') as fobj:
            pickle.dump(interpolated_keys, fobj)

    for key in interpolated_keys.keys():
        plot(env, key, interpolated_keys[key], ax, count)
def aggregate(env, experiments, framework):
    print('Started aggregation {}'.format(env))

    curr_dir = os.path.dirname(os.path.abspath(__file__))
    cache_dir = join(curr_dir, 'cache')
    cache_env = join(cache_dir, f'{env}_{framework}')

    with_cache = False
    if with_cache:
        if os.path.isdir(cache_env):
            with open(join(cache_env, f'{env}.pickle'), 'rb') as fobj:
                interpolated_keys = pickle.load(fobj)
        else:
            cache_env = ensure_dir_exists(cache_env)
            interpolated_keys = extract(experiments, framework)
            with open(join(cache_env, f'{env}.pickle'), 'wb') as fobj:
                pickle.dump(interpolated_keys, fobj)
    else:
        interpolated_keys = extract(experiments, framework)

    return interpolated_keys
示例#14
0
def aggregate(env, experiments, output):
    aggregation_ops = [np.mean, np.min, np.max, np.median, np.std, np.var]

    ops = {'summary': aggregate_to_summary, 'csv': aggregate_to_csv}

    print("Started aggregation {}".format(env))

    curr_dir = os.path.dirname(os.path.abspath(__file__))
    cache_dir = join(curr_dir, 'cache')
    cache_env = join(cache_dir, env)

    if os.path.isdir(cache_env):
        with open(join(cache_env, f'{env}.pickle'), 'rb') as fobj:
            interpolated_keys = pickle.load(fobj)
    else:
        cache_env = ensure_dir_exists(cache_env)
        interpolated_keys = extract(experiments)
        with open(join(cache_env, f'{env}.pickle'), 'wb') as fobj:
            pickle.dump(interpolated_keys, fobj)

    for key in interpolated_keys.keys():
        plot(env, key, interpolated_keys[key])
示例#15
0
def main():
    # requirements
    # 1) dark background
    # 2) both axis should start at 0
    # 3) Legend should be on background
    # 4) Legend should not obstruct data
    # 5) Export in eps
    # 6) Markers. Little circles for every data point
    # 7) Dashed lines for missing data
    fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3)
    count = 0

    ax = (ax1, ax2, ax3, ax4, ax5, ax6)
    for name, measurement in measurements.items():
        build_plot(name, measurement, ax[count], count)
        count += 1

    handles, labels = ax[-1].get_legend_handles_labels()
    # fig.legend(handles, labels, loc='upper center')
    # lgd = fig.legend(handles, labels, bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=4, mode="expand")
    lgd = fig.legend(handles, labels, bbox_to_anchor=(0.05, 0.88, 0.9, 0.5), loc='lower left', ncol=5, mode="expand")
    lgd.set_in_layout(True)

    # plt.show()
    plot_name = 'throughput'
    # plt.subplots_adjust(wspace=0.05, hspace=0.15)
    # plt.margins(0, 0)
    # plt.tight_layout(rect=(0, 0, 1, 1.2))
    # plt.subplots_adjust(bottom=0.2)

    plt.tight_layout(rect=(0, 0, 1.0, 0.9))
    # plt.show()

    plot_dir = ensure_dir_exists(os.path.join(os.getcwd(), '../final_plots'))

    plt.savefig(os.path.join(plot_dir, f'../final_plots/{plot_name}.pdf'), format='pdf', bbox_extra_artists=(lgd,))
示例#16
0
 def checkpoint_dir(cfg, policy_id):
     checkpoint_dir = join(experiment_dir(cfg=cfg),
                           f'checkpoint_p{policy_id}')
     return ensure_dir_exists(checkpoint_dir)
示例#17
0
 def _checkpoint_dir(self):
     checkpoint_dir = join(experiment_dir(cfg=self.cfg), 'checkpoint')
     return ensure_dir_exists(checkpoint_dir)
示例#18
0
 def get_used_seeds_dir(self):
     return ensure_dir_exists(
         join(self.experiment_dir,
              f'dmlab_used_lvl_seeds_p{self.policy_idx:02d}'))
示例#19
0
    def __init__(self, cache_dir, experiment_dir, all_levels_for_experiment,
                 policy_idx):
        self.cache_dir = cache_dir
        self.experiment_dir = experiment_dir
        self.policy_idx = policy_idx

        self.all_seeds = dict()
        self.available_seeds = dict()
        self.used_seeds = dict()
        self.num_seeds_used_in_current_run = dict()
        self.locks = dict()

        for lvl in all_levels_for_experiment:
            self.all_seeds[lvl] = []
            self.available_seeds[lvl] = []
            self.num_seeds_used_in_current_run[lvl] = multiprocessing.RawValue(
                ctypes.c_int32, 0)
            self.locks[lvl] = multiprocessing.Lock()

        log.debug('Reading the DMLab level cache...')
        cache_dir = ensure_dir_exists(cache_dir)

        lvl_seed_files = Path(cache_dir).rglob(f'*.{LEVEL_SEEDS_FILE_EXT}')
        for lvl_seed_file in lvl_seed_files:
            lvl_seed_file = str(lvl_seed_file)
            level = filename_to_level(os.path.relpath(lvl_seed_file,
                                                      cache_dir))
            self.all_seeds[level] = read_seeds_file(lvl_seed_file,
                                                    has_keys=True)
            self.all_seeds[level] = list(set(
                self.all_seeds[level]))  # leave only unique seeds
            log.debug('Level %s has %d total seeds available', level,
                      len(self.all_seeds[level]))

        log.debug('Updating level cache for the current experiment...')
        used_lvl_seeds_dir = self.get_used_seeds_dir()
        used_seeds_files = Path(used_lvl_seeds_dir).rglob(
            f'*.{LEVEL_SEEDS_FILE_EXT}')
        self.used_seeds = dict()
        for used_seeds_file in used_seeds_files:
            used_seeds_file = str(used_seeds_file)
            level = filename_to_level(
                os.path.relpath(used_seeds_file, used_lvl_seeds_dir))
            self.used_seeds[level] = read_seeds_file(used_seeds_file,
                                                     has_keys=False)
            log.debug('%d seeds already used in this experiment for level %s',
                      len(self.used_seeds[level]), level)

            self.used_seeds[level] = set(self.used_seeds[level])

        for lvl in all_levels_for_experiment:
            lvl_seeds = self.all_seeds.get(lvl, [])
            lvl_used_seeds = self.used_seeds.get(lvl, [])

            lvl_remaining_seeds = set(lvl_seeds) - set(lvl_used_seeds)
            self.available_seeds[lvl] = list(lvl_remaining_seeds)

            same_levels_for_population = False
            if same_levels_for_population:
                # shuffle with fixed seed so agents in population get the same levels
                random.Random(42).shuffle(self.available_seeds[lvl])
            else:
                random.shuffle(self.available_seeds[lvl])

            log.debug('Env %s has %d remaining unused seeds', lvl,
                      len(self.available_seeds[lvl]))

        log.debug('Done initializing global DMLab level cache!')
        width,
        align='center',
        color='#1f77b4',
        label='DeepMind IMPALA')
# ax.legend(bbox_to_anchor=(0., 1.02, 1., 0.102), ncol=2, loc='lower left', mode='expend')
handles, labels = ax.get_legend_handles_labels()

ax.legend(handles,
          labels,
          bbox_to_anchor=(0., 1.01, 0.7, 0.3),
          loc='lower left',
          ncol=2,
          mode='expand',
          frameon=False,
          fontsize=12)

#
# lgd.set_in_layout(True)

ax.set_yticks(y_pos)
ax.set_yticklabels(yticklabels)
ax.invert_yaxis()  # labels read top-to-bottom
ax.axvline(100, color='#b3b3b3')
ax.set_xlabel('Human Normalised Score, %')

# plt.show()
plt.tight_layout()
plot_name = 'dmlab_30_score'
plot_dir = ensure_dir_exists(os.path.join(os.getcwd(), 'final_plots'))
plt.savefig(os.path.join(plot_dir, f'{plot_name}.pdf'), format='pdf')
示例#21
0
    def __init__(
        self,
        task_id,
        level,
        action_repeat,
        res_w,
        res_h,
        benchmark_mode,
        renderer,
        dataset_path,
        with_instructions,
        extended_action_set,
        use_level_cache,
        level_cache_path,
        gpu_index,
        extra_cfg=None,
    ):
        self.width = res_w
        self.height = res_h

        # self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE'
        self.main_observation = 'RGB_INTERLEAVED'
        self.instructions_observation = DMLAB_INSTRUCTIONS
        self.with_instructions = with_instructions and not benchmark_mode

        self.action_repeat = action_repeat

        self.random_state = None

        self.task_id = task_id
        self.level = level
        self.level_name = dmlab_level_to_level_name(self.level)

        # the policy index which currently acts in the environment
        self.curr_policy_idx = 0
        self.curr_cache = dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[
            self.curr_policy_idx]

        self.instructions = np.zeros([DMLAB_MAX_INSTRUCTION_LEN],
                                     dtype=np.int32)

        observation_format = [self.main_observation]
        if self.with_instructions:
            observation_format += [self.instructions_observation]

        config = {
            'width': self.width,
            'height': self.height,
            'gpuDeviceIndex': str(gpu_index),
            'datasetPath': dataset_path,
        }

        if extra_cfg is not None:
            config.update(extra_cfg)
        config = {k: str(v) for k, v in config.items()}

        self.use_level_cache = use_level_cache
        self.level_cache_path = ensure_dir_exists(level_cache_path)

        env_level_cache = self if use_level_cache else None
        self.env_uses_level_cache = False  # will be set to True when this env instance queries the cache
        self.last_reset_seed = None

        if env_level_cache is not None:
            if not isinstance(self.curr_cache,
                              dmlab_level_cache.DmlabLevelCacheGlobal):
                raise Exception(
                    'DMLab global level cache object is not initialized! Make sure to call'
                    'dmlab_ensure_global_cache_initialized() in the main thread before you fork any child processes'
                    'or create any DMLab envs')

        self.dmlab = deepmind_lab.Lab(
            level,
            observation_format,
            config=config,
            renderer=renderer,
            level_cache=env_level_cache,
        )

        self.action_set = EXTENDED_ACTION_SET if extended_action_set else ACTION_SET
        self.action_list = np.array(
            self.action_set,
            dtype=np.intc)  # DMLAB requires intc type for actions

        self.last_observation = None

        self.render_scale = 5
        self.render_fps = 30
        self.last_frame = time.time()

        self.action_space = gym.spaces.Discrete(len(self.action_set))

        self.observation_space = gym.spaces.Dict(
            obs=gym.spaces.Box(low=0,
                               high=255,
                               shape=(self.height, self.width, 3),
                               dtype=np.uint8))
        if self.with_instructions:
            self.observation_space.spaces[
                self.instructions_observation] = gym.spaces.Box(
                    low=0,
                    high=DMLAB_VOCABULARY_SIZE,
                    shape=[DMLAB_MAX_INSTRUCTION_LEN],
                    dtype=np.int32,
                )

        self.benchmark_mode = benchmark_mode
        if self.benchmark_mode:
            log.warning(
                'DmLab benchmark mode is true! Use this only for testing, not for actual training runs!'
            )

        self.seed()
def main():
    """Script entry point."""
    stop_at = 80 * 1000 * 1000
    prefix = 'simple'

    # noinspection PyUnusedLocal
    experiments_very_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_very_sparse/doom_maze_very_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_sweep_very_sparse/doom_sweep_i_0.5_p_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_basic = [
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.05', 'A2C+ICM (curious)'),
    ]

    experiments = [
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.05', 'A2C+ICM (curious)'),
    ]

    plots = [
        Plot('a2c_aux_summary/avg_reward', 'average reward', 'Avg. reward for the last 1000 episodes'),
        Plot(
            'a2c_agent_summary/policy_entropy',
            'policy entropy, nats',
            'Stochastic policy entropy',
        ),
    ]

    for plot in plots:
        fig = plt.figure(figsize=(5, 4))
        fig.add_subplot()

        for ex_i, experiment in enumerate(experiments):
            experiment_name = experiment.name.split(os.sep)[-1]
            experiments_root = join(*(experiment.name.split(os.sep)[:-1]))
            exp_dir = experiment_dir(experiment_name, experiments_root)

            path_to_events_dir = summaries_dir(exp_dir)
            events_files = []
            for f in os.listdir(path_to_events_dir):
                if f.startswith('events'):
                    events_files.append(join(path_to_events_dir, f))

            if len(events_files) == 0:
                log.error('No events file for %s', experiment)
                continue

            events_files = sorted(events_files)
            steps, values = [], []

            for events_file in events_files:
                iterator = tf.train.summary_iterator(events_file)
                while True:
                    try:
                        e = next(iterator, None)
                    except Exception as exc:
                        log.warning(str(exc))
                        break

                    if e is None:
                        break

                    for v in e.summary.value:
                        if e.step >= stop_at:
                            break

                        if v.tag == plot.name:
                            steps.append(e.step)
                            values.append(v.simple_value)

            # just in case
            values = np.nan_to_num(values)

            smooth = 10
            values_smooth = running_mean(values, smooth)
            steps = steps[smooth:]
            values = values[smooth:]

            plt.plot(steps, values, color=COLORS[ex_i], alpha=0.2, label='__nolegend__')
            plt.plot(steps, values_smooth, color=COLORS[ex_i], label=experiment.descr, linewidth=2)

        plt.xlabel('environment steps')
        plt.ylabel(plot.axis)
        plt.title(plot.descr)
        plt.grid(True)
        plt.legend()
        plt.tight_layout()

        plots_dir = ensure_dir_exists(join(experiments_dir(), 'plots'))
        plot_name = plot.name.replace('/', '_')
        plt.savefig(join(plots_dir, f'{prefix}_{plot_name}.png'))
        plt.close()

    return 0
    def save_checkpoint(
        self,
        checkpoint_dir,
        map_img=None,
        coord_limits=None,
        num_to_keep=2,
        is_sparse=False,
        verbose=False,
    ):
        """Verbose mode also dumps all the landmark observations and the graph structure into the directory."""
        t = Timing()
        with t.timeit('map_checkpoint'):
            results = AttrDict()

            prefix = '.map_'
            timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")
            dir_name = f'{prefix}{timestamp}'
            map_dir = join(checkpoint_dir, dir_name)

            if os.path.isdir(map_dir):
                log.warning(
                    'Warning: map checkpoint %s already exists! Overwriting...'
                )
                shutil.rmtree(map_dir)

            map_dir = ensure_dir_exists(map_dir)

            with open(join(map_dir, 'topo_map.pkl'), 'wb') as fobj:
                pkl.dump(self.__dict__, fobj, 2)

            if verbose:
                map_extra = ensure_dir_exists(join(map_dir, '.map_verbose'))
                for node in self.graph.nodes:
                    obs = self.get_observation(node)
                    obs_bgr = cv2.cvtColor(obs, cv2.COLOR_RGB2BGR)
                    obs_bgr_bigger = cv2.resize(
                        obs_bgr, (420, 420), interpolation=cv2.INTER_NEAREST)
                    cv2.imwrite(join(map_extra, f'{node:03d}.jpg'),
                                obs_bgr_bigger)

                figure = plot_graph(
                    self.graph,
                    layout='pos',
                    map_img=map_img,
                    limits=coord_limits,
                    topological_map=True,
                    is_sparse=is_sparse,
                )
                graph_filename = join(map_extra, 'graph.png')
                with open(graph_filename, 'wb') as graph_fobj:
                    plt.savefig(graph_fobj, format='png')
                figure.clear()

                results.graph_filename = graph_filename

            assert num_to_keep > 0
            previous_checkpoints = glob.glob(f'{checkpoint_dir}/{prefix}*')
            previous_checkpoints.sort()
            previous_checkpoints = deque(previous_checkpoints)

            while len(previous_checkpoints) > num_to_keep:
                checkpoint_to_delete = previous_checkpoints[0]
                log.info('Deleting old map checkpoint %s',
                         checkpoint_to_delete)
                shutil.rmtree(checkpoint_to_delete)
                previous_checkpoints.popleft()

        log.info('Map save checkpoint took %s', t)
        return results
示例#24
0
                                  verbose=1,
                                  use_multiprocessing=multiprocessing_data_load,
                                  workers=num_workers_data_load,
                                  max_q_size=30,
                                  callbacks=[tensorboard, model_checkpointer])

    K.clear_session()


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('-j', '--job-dir', help='train job directory with samples and config file', required=True)
    parser.add_argument('-i', '--image-dir', help='directory with image files', required=True)

    args = parser.parse_args()

    image_dir = args.__dict__['image_dir']
    job_dir = args.__dict__['job_dir']

    ensure_dir_exists(os.path.join(job_dir, 'weights'))
    ensure_dir_exists(os.path.join(job_dir, 'logs'))

    config_file = os.path.join(job_dir, 'config.json')
    config = load_config(config_file)

    samples_file = os.path.join(job_dir, 'samples.json')
    samples = load_samples(samples_file)

    train(samples=samples, job_dir=job_dir, image_dir=image_dir, **config)
示例#25
0
def trajectory_to_map(params, env_id):
    def make_env_func():
        e = create_env(env_id)
        e.seed(0)
        return e

    params.num_envs = 1
    params.with_timer = False
    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    map_img, coord_limits = generate_env_map(make_env_func)

    experiment_dir = params.experiment_dir()
    trajectories_dir = ensure_dir_exists(join(experiment_dir, '.trajectories'))

    if params.persistent_map_checkpoint is None:
        prefix = 'traj_'
        all_trajectories = glob.glob(f'{trajectories_dir}/{prefix}*')
        all_trajectories.sort()

        trajectories = []
        for i, trajectory_dir in enumerate(all_trajectories):
            with open(join(trajectory_dir, 'trajectory.pickle'),
                      'rb') as traj_file:
                traj = Trajectory(i)
                traj.__dict__.update(pickle.load(traj_file))
                trajectories.append(traj)
    else:
        loaded_persistent_map = TopologicalMap.create_empty()
        loaded_persistent_map.maybe_load_checkpoint(
            params.persistent_map_checkpoint)

        num_trajectories = loaded_persistent_map.num_trajectories
        trajectories = [Trajectory(i) for i in range(num_trajectories)]

        zero_frame = loaded_persistent_map.graph.nodes[0]
        for i in range(1, num_trajectories):
            trajectories[i].add(zero_frame['obs'], -1, zero_frame['info'])

        for node in loaded_persistent_map.graph.nodes(data=True):
            node_idx, d = node
            trajectories[d['traj_idx']].add(d['obs'], -1, d['info'])

        log.info('Loaded %d trajectories from the map', num_trajectories)
        log.info('Trajectory lengths %r', [len(t) for t in trajectories])

    def init_map():
        return TopologicalMap(
            trajectories[0].obs[0],
            directed_graph=False,
            initial_info=trajectories[0].infos[0],
        )

    map_builder = MapBuilder(agent)
    # trajectories = [map_builder.sparsify_trajectory(t) for t in trajectories]  # TODO

    sparse_map = trajectories_to_sparse_map(
        init_map,
        trajectories,
        trajectories_dir,
        agent,
        map_img,
        coord_limits,
    )

    test_pick_best_trajectory = True
    if test_pick_best_trajectory:
        pick_best_trajectory(init_map, agent, copy.deepcopy(trajectories))

    m = init_map()

    for i, t in enumerate(trajectories):
        m = map_builder.add_trajectory_to_dense_map(m, t)

    map_builder.calc_distances_to_landmarks(sparse_map, m)
    map_builder.sieve_landmarks_by_distance(sparse_map)

    dense_map_dir = ensure_dir_exists(join(trajectories_dir, 'dense_map'))
    m.save_checkpoint(dense_map_dir,
                      map_img=map_img,
                      coord_limits=coord_limits,
                      verbose=True)

    # check if landmark correspondence between dense and sparse map is correct
    for node, data in sparse_map.graph.nodes.data():
        traj_idx = data['traj_idx']
        frame_idx = data['frame_idx']

        dense_map_landmark = m.frame_to_node_idx[traj_idx][frame_idx]
        log.info('Sparse map node %d corresponds to dense map node %d', node,
                 dense_map_landmark)
        log.info('Sparse map node %d distance %d', node, data['distance'])

        obs_sparse = sparse_map.get_observation(node)
        obs_dense = m.get_observation(dense_map_landmark)

        assert np.array_equal(obs_sparse, obs_dense)

        show_landmarks = False
        if show_landmarks:
            import cv2
            cv2.imshow('sparse', obs_sparse)
            cv2.imshow('dense', obs_dense)
            cv2.waitKey()

    agent.finalize()
    return 0
示例#26
0
def visualize_graph_html(nx_graph,
                         output_dir=None,
                         title_text='',
                         layout='kamada_kawai',
                         should_show=False):
    """
    This method visualizes a NetworkX graph using Bokeh.

    :param nx_graph: NetworkX graph with node attributes containing image filenames.
    :param output_dir: Optional output directory for saving html.
    :param title_text: String to be displayed above the visualization.
    :param layout: Which layout function to use.
    :param should_show: Open the browser to look at the graph.
    """
    from bokeh import palettes
    from bokeh.io import output_file, show
    from bokeh.models import Circle, HoverTool, MultiLine, Plot, Range1d, TapTool
    # noinspection PyProtectedMember
    from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges, NodesOnly

    pos = parse_layout(nx_graph, layout)

    hover_tool = HoverTool(
        tooltips='<img src="@imgs" height="200" alt="@imgs" width="200"></img>',
        show_arrow=False)

    plot = Plot(plot_width=800,
                plot_height=800,
                x_range=Range1d(-1.1, 1.1),
                y_range=Range1d(-1.1, 1.1))
    if title_text != '':
        plot.title.text = title_text
    plot.title.align = 'center'
    plot.min_border = 0
    plot.outline_line_color = None

    plot.add_tools(hover_tool, TapTool())
    plot.toolbar.logo = None
    plot.toolbar_location = None

    graph_renderer = from_networkx(nx_graph, pos)

    graph_renderer.node_renderer.data_source.data['imgs'] = [
        n[1]['img'] for n in nx_graph.nodes(data=True)
    ]

    graph_renderer.node_renderer.glyph = Circle(
        size=10, fill_color=palettes.Spectral4[0], line_color=None)
    graph_renderer.node_renderer.selection_glyph = Circle(
        size=10, fill_color=palettes.Spectral4[2], line_color=None)
    graph_renderer.node_renderer.hover_glyph = Circle(
        size=10, fill_color=palettes.Spectral4[1], line_color=None)

    graph_renderer.edge_renderer.glyph = MultiLine(line_color='#CCCCCC',
                                                   line_alpha=0.8,
                                                   line_width=1.5)
    graph_renderer.edge_renderer.selection_glyph = MultiLine(
        line_color=palettes.Spectral4[2], line_width=2)

    graph_renderer.selection_policy = NodesAndLinkedEdges()
    graph_renderer.inspection_policy = NodesOnly()

    plot.renderers.append(graph_renderer)

    if output_dir:
        ensure_dir_exists(output_dir)
        output_file(join(output_dir, 'visualize_graph.html'))

    if should_show:
        show(plot)
示例#27
0
 def __init__(self, cache_dir):
     ensure_dir_exists(cache_dir)
     self._cache_dir = cache_dir
示例#28
0
    def __init__(self, cfg):
        super().__init__(cfg)

        # we should not use CUDA in the main thread, only on the workers
        set_global_cuda_envvars(cfg)

        tmp_env = make_env_func(self.cfg, env_config=None)
        self.obs_space = tmp_env.observation_space
        self.action_space = tmp_env.action_space
        self.num_agents = tmp_env.num_agents

        self.reward_shaping_scheme = None
        if self.cfg.with_pbt:
            if hasattr(tmp_env.unwrapped, '_reward_shaping_wrapper'):
                # noinspection PyProtectedMember
                self.reward_shaping_scheme = tmp_env.unwrapped._reward_shaping_wrapper.reward_shaping_scheme
            else:
                try:
                    from envs.doom.multiplayer.doom_multiagent_wrapper import MultiAgentEnv
                    if isinstance(tmp_env.unwrapped, MultiAgentEnv):
                        self.reward_shaping_scheme = tmp_env.unwrapped.default_reward_shaping
                except ImportError:
                    pass

        tmp_env.close()

        # shared memory allocation
        self.traj_buffers = SharedBuffers(self.cfg, self.num_agents,
                                          self.obs_space, self.action_space)

        self.actor_workers = None

        self.report_queue = MpQueue(20 * 1000 * 1000)
        self.policy_workers = dict()
        self.policy_queues = dict()

        self.learner_workers = dict()

        self.workers_by_handle = None

        self.policy_inputs = [[] for _ in range(self.cfg.num_policies)]
        self.policy_outputs = dict()
        for worker_idx in range(self.cfg.num_workers):
            for split_idx in range(self.cfg.worker_num_splits):
                self.policy_outputs[(worker_idx, split_idx)] = dict()

        self.policy_avg_stats = dict()
        self.policy_lag = [dict() for _ in range(self.cfg.num_policies)]

        self.last_timing = dict()
        self.env_steps = dict()
        self.samples_collected = [0 for _ in range(self.cfg.num_policies)]
        self.total_env_steps_since_resume = 0

        # currently this applies only to the current run, not experiment as a whole
        # to change this behavior we'd need to save the state of the main loop to a filesystem
        self.total_train_seconds = 0

        self.last_report = time.time()
        self.last_experiment_summaries = 0

        self.report_interval = 5.0  # sec
        self.experiment_summaries_interval = self.cfg.experiment_summaries_interval  # sec

        self.avg_stats_intervals = (2, 12, 60
                                    )  # 10 seconds, 1 minute, 5 minutes

        self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals))
        self.throughput_stats = [
            deque([], maxlen=5) for _ in range(self.cfg.num_policies)
        ]
        self.avg_stats = dict()
        self.stats = dict()  # regular (non-averaged) stats

        self.writers = dict()
        writer_keys = list(range(self.cfg.num_policies))
        for key in writer_keys:
            summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)),
                               str(key))
            summary_dir = ensure_dir_exists(summary_dir)
            self.writers[key] = SummaryWriter(summary_dir, flush_secs=20)

        self.pbt = PopulationBasedTraining(self.cfg,
                                           self.reward_shaping_scheme,
                                           self.writers)
示例#29
0
if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-j',
        '--job-dir',
        help='train job directory with samples and config file',
        required=True)
    parser.add_argument('-i',
                        '--image-dir',
                        help='directory with image files',
                        required=True)

    args = parser.parse_args()

    image_dir = args.__dict__['image_dir']
    job_dir = args.__dict__['job_dir']

    ensure_dir_exists(os.path.join(job_dir, 'weights'))
    ensure_dir_exists(os.path.join(job_dir, 'logs'))

    # config_file = os.path.join(job_dir, 'config.json')
    config_file = os.path.join(job_dir, 'config_vgg16_technical.json')
    config = load_config(config_file)

    # samples_file = os.path.join(job_dir, 'samples.json')
    samples_file = os.path.join(job_dir, 'tid_labels_train.json')
    samples = load_samples(samples_file)

    train(samples=samples, job_dir=job_dir, image_dir=image_dir, **config)
示例#30
0
def run(run_description, args):
    experiments = run_description.experiments
    train_dir = run_description.train_dir
    max_parallel = args.max_parallel

    log.info('Starting processes with base cmds: %r',
             [e.cmd for e in experiments])
    log.info('Max parallel processes is %d', max_parallel)
    log.info(
        'Monitor log files using\n\n\ttail -f train_dir/%s/**/**/log.txt\n\n',
        run_description.run_name)

    processes = []
    processes_per_gpu = {g: [] for g in range(args.num_gpus)}

    experiments = run_description.generate_experiments()
    next_experiment = next(experiments, None)

    def find_least_busy_gpu():
        least_busy_gpu = None
        gpu_available_processes = 0

        for gpu_id in range(args.num_gpus):
            available_processes = args.experiments_per_gpu - len(
                processes_per_gpu[gpu_id])
            if available_processes > gpu_available_processes:
                gpu_available_processes = available_processes
                least_busy_gpu = gpu_id

        return least_busy_gpu, gpu_available_processes

    def can_squeeze_another_process():
        if len(processes) >= max_parallel:
            return False

        if args.experiments_per_gpu > 0:
            least_busy_gpu, gpu_available_processes = find_least_busy_gpu()
            if gpu_available_processes <= 0:
                return False

        return True

    while len(processes) > 0 or next_experiment is not None:
        while can_squeeze_another_process() and next_experiment is not None:
            cmd, name, root_dir, exp_env_vars = next_experiment

            cmd_tokens = cmd.split(' ')

            # workaround to make sure we're running the correct python executable from our virtual env
            if cmd_tokens[0] == 'python':
                cmd_tokens[0] = sys.executable
                log.debug('Using Python executable %s', cmd_tokens[0])

            experiment_dir = ensure_dir_exists(join(train_dir, root_dir, name))
            logfile = open(join(experiment_dir, 'log.txt'), 'wb')
            envvars = os.environ.copy()

            best_gpu = None
            if args.experiments_per_gpu > 0:
                best_gpu, best_gpu_available_processes = find_least_busy_gpu()
                log.info(
                    'The least busy gpu is %d where we can run %d more processes',
                    best_gpu,
                    best_gpu_available_processes,
                )
                envvars['CUDA_VISIBLE_DEVICES'] = f'{best_gpu}'

            log.info('Starting process %r', cmd_tokens)

            if exp_env_vars is not None:
                for key, value in exp_env_vars.items():
                    log.info('Adding env variable %r %r', key, value)
                    envvars[str(key)] = str(value)

            process = subprocess.Popen(cmd_tokens,
                                       stdout=logfile,
                                       stderr=logfile,
                                       env=envvars)
            process.process_logfile = logfile
            process.gpu_id = best_gpu
            process.proc_cmd = cmd

            processes.append(process)

            if process.gpu_id is not None:
                processes_per_gpu[process.gpu_id].append(process.proc_cmd)

            log.info('Started process %s on GPU %r', process.proc_cmd,
                     process.gpu_id)
            log.info('Waiting for %d seconds before starting next process',
                     args.pause_between)
            time.sleep(args.pause_between)

            next_experiment = next(experiments, None)

        remaining_processes = []
        for process in processes:
            if process.poll() is None:
                remaining_processes.append(process)
                continue
            else:
                if process.gpu_id is not None:
                    processes_per_gpu[process.gpu_id].remove(process.proc_cmd)
                process.process_logfile.close()
                log.info('Process %r finished with code %r', process.proc_cmd,
                         process.returncode)
                if process.returncode != 0:
                    log.error('WARNING: RETURN CODE IS %r', process.returncode)

        processes = remaining_processes
        time.sleep(0.1)

    log.info('Done!')

    return 0