示例#1
0
    def __init__(self, cfg):
        self.cfg = cfg

        if self.cfg.seed is not None:
            log.info('Settings fixed seed %d', self.cfg.seed)
            torch.manual_seed(self.cfg.seed)
            np.random.seed(self.cfg.seed)

        self.device = torch.device('cuda')

        self.train_step = self.env_steps = 0

        self.total_train_seconds = 0
        self.last_training_step = time.time()

        self.best_avg_reward = math.nan

        self.summary_rate_decay = LinearDecay([(0, 100), (1000000, 2000),
                                               (10000000, 10000)])
        self.last_summary_written = -1e9
        self.save_rate_decay = LinearDecay([(0, self.cfg.initial_save_rate),
                                            (1000000, 5000)],
                                           staircase=100)

        summary_dir = summaries_dir(experiment_dir(cfg=self.cfg))
        self.writer = SummaryWriter(summary_dir, flush_secs=10)
示例#2
0
    def test_example(self):
        experiment_name = 'test_example'

        register_custom_components()

        # test training for a few thousand frames
        cfg = custom_parse_args(argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}'])
        cfg.num_workers = 2
        cfg.train_for_env_steps = 100000
        cfg.save_every_sec = 1
        cfg.decorrelate_experience_max_seconds = 0
        cfg.seed = 0
        cfg.device = 'cpu'

        status = run_algorithm(cfg)
        self.assertEqual(status, ExperimentStatus.SUCCESS)

        # then test the evaluation of the saved model
        cfg = custom_parse_args(
            argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}'],
            evaluation=True,
        )
        cfg.device = 'cpu'
        status, avg_reward = enjoy(cfg, max_num_frames=1000)

        directory = experiment_dir(cfg=cfg)
        self.assertTrue(isdir(directory))
        shutil.rmtree(directory, ignore_errors=True)
        # self.assertFalse(isdir(directory))

        self.assertEqual(status, ExperimentStatus.SUCCESS)

        # not sure if we should check it here, it's optional
        # maybe a longer test where it actually has a chance to converge
        self.assertGreater(avg_reward, 60)
示例#3
0
def ensure_initialized(cfg, env_name):
    global DMLAB_INITIALIZED
    if DMLAB_INITIALIZED:
        return

    dmlab_register_models()

    if env_name == 'dmlab_30':
        # extra functions to calculate human-normalized score etc.
        EXTRA_EPISODIC_STATS_PROCESSING.append(dmlab_extra_episodic_stats_processing)
        EXTRA_PER_POLICY_SUMMARIES.append(dmlab_extra_summaries)

    num_policies = cfg.num_policies if hasattr(cfg, 'num_policies') else 1
    all_levels = list_all_levels_for_experiment(env_name)
    level_cache_dir = cfg.dmlab_level_cache_path
    dmlab_ensure_global_cache_initialized(experiment_dir(cfg=cfg), all_levels, num_policies, level_cache_dir)

    DMLAB_INITIALIZED = True
示例#4
0
def run_many(run_description):
    experiments = run_description.experiments
    max_parallel = run_description.max_parallel

    log.info('Starting processes with base cmds: %r', [e.cmd for e in experiments])
    log.info('Max parallel processes is %d', max_parallel)
    log.info('Monitor log files using tail -f train_dir/%s/**/**/log.txt', run_description.run_name)

    processes = []

    experiments = run_description.generate_experiments()
    next_experiment = next(experiments, None)

    while len(processes) > 0 or next_experiment is not None:
        while len(processes) < max_parallel and next_experiment is not None:
            cmd, name, root_dir = next_experiment
            log.info('Starting experiment "%s"', cmd)
            cmd_tokens = cmd.split(' ')

            logfile = open(join(experiment_dir(name, root_dir), 'log.txt'), 'wb')
            process = subprocess.Popen(cmd_tokens, stdout=logfile, stderr=logfile)
            process.process_logfile = logfile

            processes.append(process)

            next_experiment = next(experiments, None)

        remaining_processes = []
        for process in processes:
            if process.poll() is None:
                remaining_processes.append(process)
                continue
            else:
                process.process_logfile.close()

            log.info('Process %r finished with code %r', process, process.returncode)

        processes = remaining_processes
        time.sleep(0.1)

    log.info('Done!')

    return 0
示例#5
0
    def _load_checkpoint(self, checkpoints_dir):
        checkpoints = self._get_checkpoints(checkpoints_dir)
        if len(checkpoints) <= 0:
            log.warning('No checkpoints found in %s',
                        experiment_dir(cfg=self.cfg))
            return None
        else:
            latest_checkpoint = checkpoints[-1]
            log.warning('Loading state from checkpoint %s...',
                        latest_checkpoint)

            if str(
                    self.device
            ) == 'cuda':  # the checkpoint will try to load onto the GPU storage unless specified
                checkpoint_dict = torch.load(latest_checkpoint)
            else:
                checkpoint_dict = torch.load(
                    latest_checkpoint,
                    map_location=lambda storage, loc: storage)

            return checkpoint_dict
def policy_reward_shaping_file(cfg, policy_id):
    return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_reward_shaping.json')
def policy_cfg_file(cfg, policy_id):
    return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_cfg.json')
示例#8
0
 def initialize(self):
     self._save_cfg()
     save_git_diff(experiment_dir(cfg=self.cfg))
示例#9
0
    def __init__(self, cfg):
        super().__init__(cfg)

        # we should not use CUDA in the main thread, only on the workers
        set_global_cuda_envvars(cfg)

        tmp_env = make_env_func(self.cfg, env_config=None)
        self.obs_space = tmp_env.observation_space
        self.action_space = tmp_env.action_space
        self.num_agents = tmp_env.num_agents

        self.reward_shaping_scheme = None
        if self.cfg.with_pbt:
            if hasattr(tmp_env.unwrapped, '_reward_shaping_wrapper'):
                # noinspection PyProtectedMember
                self.reward_shaping_scheme = tmp_env.unwrapped._reward_shaping_wrapper.reward_shaping_scheme
            else:
                try:
                    from envs.doom.multiplayer.doom_multiagent_wrapper import MultiAgentEnv
                    if isinstance(tmp_env.unwrapped, MultiAgentEnv):
                        self.reward_shaping_scheme = tmp_env.unwrapped.default_reward_shaping
                except ImportError:
                    pass

        tmp_env.close()

        # shared memory allocation
        self.traj_buffers = SharedBuffers(self.cfg, self.num_agents,
                                          self.obs_space, self.action_space)

        self.actor_workers = None

        self.report_queue = MpQueue(20 * 1000 * 1000)
        self.policy_workers = dict()
        self.policy_queues = dict()

        self.learner_workers = dict()

        self.workers_by_handle = None

        self.policy_inputs = [[] for _ in range(self.cfg.num_policies)]
        self.policy_outputs = dict()
        for worker_idx in range(self.cfg.num_workers):
            for split_idx in range(self.cfg.worker_num_splits):
                self.policy_outputs[(worker_idx, split_idx)] = dict()

        self.policy_avg_stats = dict()
        self.policy_lag = [dict() for _ in range(self.cfg.num_policies)]

        self.last_timing = dict()
        self.env_steps = dict()
        self.samples_collected = [0 for _ in range(self.cfg.num_policies)]
        self.total_env_steps_since_resume = 0

        # currently this applies only to the current run, not experiment as a whole
        # to change this behavior we'd need to save the state of the main loop to a filesystem
        self.total_train_seconds = 0

        self.last_report = time.time()
        self.last_experiment_summaries = 0

        self.report_interval = 5.0  # sec
        self.experiment_summaries_interval = self.cfg.experiment_summaries_interval  # sec

        self.avg_stats_intervals = (2, 12, 60
                                    )  # 10 seconds, 1 minute, 5 minutes

        self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals))
        self.throughput_stats = [
            deque([], maxlen=5) for _ in range(self.cfg.num_policies)
        ]
        self.avg_stats = dict()
        self.stats = dict()  # regular (non-averaged) stats

        self.writers = dict()
        writer_keys = list(range(self.cfg.num_policies))
        for key in writer_keys:
            summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)),
                               str(key))
            summary_dir = ensure_dir_exists(summary_dir)
            self.writers[key] = SummaryWriter(summary_dir, flush_secs=20)

        self.pbt = PopulationBasedTraining(self.cfg,
                                           self.reward_shaping_scheme,
                                           self.writers)
示例#10
0
 def checkpoint_dir(cfg, policy_id):
     checkpoint_dir = join(experiment_dir(cfg=cfg),
                           f'checkpoint_p{policy_id}')
     return ensure_dir_exists(checkpoint_dir)
def main():
    """Script entry point."""
    stop_at = 80 * 1000 * 1000
    prefix = 'simple'

    # noinspection PyUnusedLocal
    experiments_very_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_very_sparse/doom_maze_very_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_sweep_very_sparse/doom_sweep_i_0.5_p_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_sparse = [
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.05', 'A2C+ICM (curious)'),
    ]

    # noinspection PyUnusedLocal
    experiments_basic = [
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.05', 'A2C+ICM (curious)'),
    ]

    experiments = [
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.0', 'A2C (no curiosity)'),
        Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.05', 'A2C+ICM (curious)'),
    ]

    plots = [
        Plot('a2c_aux_summary/avg_reward', 'average reward', 'Avg. reward for the last 1000 episodes'),
        Plot(
            'a2c_agent_summary/policy_entropy',
            'policy entropy, nats',
            'Stochastic policy entropy',
        ),
    ]

    for plot in plots:
        fig = plt.figure(figsize=(5, 4))
        fig.add_subplot()

        for ex_i, experiment in enumerate(experiments):
            experiment_name = experiment.name.split(os.sep)[-1]
            experiments_root = join(*(experiment.name.split(os.sep)[:-1]))
            exp_dir = experiment_dir(experiment_name, experiments_root)

            path_to_events_dir = summaries_dir(exp_dir)
            events_files = []
            for f in os.listdir(path_to_events_dir):
                if f.startswith('events'):
                    events_files.append(join(path_to_events_dir, f))

            if len(events_files) == 0:
                log.error('No events file for %s', experiment)
                continue

            events_files = sorted(events_files)
            steps, values = [], []

            for events_file in events_files:
                iterator = tf.train.summary_iterator(events_file)
                while True:
                    try:
                        e = next(iterator, None)
                    except Exception as exc:
                        log.warning(str(exc))
                        break

                    if e is None:
                        break

                    for v in e.summary.value:
                        if e.step >= stop_at:
                            break

                        if v.tag == plot.name:
                            steps.append(e.step)
                            values.append(v.simple_value)

            # just in case
            values = np.nan_to_num(values)

            smooth = 10
            values_smooth = running_mean(values, smooth)
            steps = steps[smooth:]
            values = values[smooth:]

            plt.plot(steps, values, color=COLORS[ex_i], alpha=0.2, label='__nolegend__')
            plt.plot(steps, values_smooth, color=COLORS[ex_i], label=experiment.descr, linewidth=2)

        plt.xlabel('environment steps')
        plt.ylabel(plot.axis)
        plt.title(plot.descr)
        plt.grid(True)
        plt.legend()
        plt.tight_layout()

        plots_dir = ensure_dir_exists(join(experiments_dir(), 'plots'))
        plot_name = plot.name.replace('/', '_')
        plt.savefig(join(plots_dir, f'{prefix}_{plot_name}.png'))
        plt.close()

    return 0
示例#12
0
 def experiment_dir(self):
     return experiment_dir(self._experiment_name, self.experiments_root)
示例#13
0
 def _checkpoint_dir(self):
     checkpoint_dir = join(experiment_dir(cfg=self.cfg), 'checkpoint')
     return ensure_dir_exists(checkpoint_dir)