예제 #1
0
def memory_stats(process, device):
    memory_mb = memory_consumption_mb()
    stats = {f'memory_{process}': memory_mb}
    if device.type != 'cpu':
        gpu_mem_mb = torch.cuda.memory_allocated(device) / 1e6
        gpu_cache_mb = torch.cuda.memory_reserved(device) / 1e6
        stats.update({
            f'gpu_mem_{process}': gpu_mem_mb,
            f'gpu_cache_{process}': gpu_cache_mb
        })

    return stats
예제 #2
0
    def report_experiment_summaries(self, fps, sample_throughput):
        memory_mb = memory_consumption_mb()

        default_policy = 0
        for policy_id, env_steps in self.env_steps.items():
            if policy_id == default_policy:
                self.writers[policy_id].add_scalar('0_aux/_fps', fps,
                                                   env_steps)
                self.writers[policy_id].add_scalar(
                    '0_aux/master_process_memory_mb', float(memory_mb),
                    env_steps)
                for key, value in self.avg_stats.items():
                    if len(value) >= value.maxlen or (
                            len(value) > 10
                            and self.total_train_seconds > 300):
                        self.writers[policy_id].add_scalar(
                            f'stats/{key}', np.mean(value), env_steps)

                for key, value in self.stats.items():
                    self.writers[policy_id].add_scalar(f'stats/{key}', value,
                                                       env_steps)

            if not math.isnan(sample_throughput[policy_id]):
                self.writers[policy_id].add_scalar(
                    '0_aux/_sample_throughput', sample_throughput[policy_id],
                    env_steps)

            for key, stat in self.policy_avg_stats.items():
                if len(stat[policy_id]) >= stat[policy_id].maxlen or (
                        len(stat[policy_id]) > 10
                        and self.total_train_seconds > 300):
                    stat_value = np.mean(stat[policy_id])
                    writer = self.writers[policy_id]
                    writer.add_scalar(f'0_aux/avg_{key}', float(stat_value),
                                      env_steps)

                    # for key stats report min/max as well
                    if key in ('reward', 'true_reward', 'len'):
                        writer.add_scalar(f'0_aux/avg_{key}_min',
                                          float(min(stat[policy_id])),
                                          env_steps)
                        writer.add_scalar(f'0_aux/avg_{key}_max',
                                          float(max(stat[policy_id])),
                                          env_steps)

            for extra_summaries_func in EXTRA_PER_POLICY_SUMMARIES:
                extra_summaries_func(policy_id, self.policy_avg_stats,
                                     env_steps, self.writers[policy_id],
                                     self.cfg)
예제 #3
0
    def _report_basic_summaries(self, fps, avg_reward, avg_length):
        self.writer.add_scalar('0_aux/fps', fps, self.env_steps)

        memory_mb = memory_consumption_mb()
        self.writer.add_scalar('0_aux/master_process_memory_mb',
                               float(memory_mb), self.env_steps)

        if math.isnan(avg_reward) or math.isnan(avg_length):
            # not enough data to report yet
            return

        self.writer.add_scalar('0_aux/avg_reward', float(avg_reward),
                               self.env_steps)
        self.writer.add_scalar('0_aux/avg_length', float(avg_length),
                               self.env_steps)

        self.writer.add_scalar('0_aux/best_reward_ever',
                               float(self.best_avg_reward), self.env_steps)
예제 #4
0
    def _run(self):
        """
        Main loop of the actor worker (rollout worker).
        Process tasks (mainly ROLLOUT_STEP) until we get the termination signal, which usually means end of training.
        Currently there is no mechanism to restart dead workers if something bad happens during training. We can only
        retry on the initial reset(). This is definitely something to work on.
        """
        log.info('Initializing vector env runner %d...', self.worker_idx)

        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        torch.multiprocessing.set_sharing_strategy('file_system')

        timing = Timing()

        last_report = time.time()
        with torch.no_grad():
            while not self.terminate:
                try:
                    try:
                        with timing.add_time('waiting'), timing.timeit('wait_actor'):
                            tasks = self.task_queue.get_many(timeout=0.1)
                    except Empty:
                        tasks = []

                    for task in tasks:
                        task_type, data = task

                        if task_type == TaskType.INIT:
                            self._init()
                            continue

                        if task_type == TaskType.TERMINATE:
                            self._terminate()
                            break

                        # handling actual workload
                        if task_type == TaskType.ROLLOUT_STEP:
                            if 'work' not in timing:
                                timing.waiting = 0  # measure waiting only after real work has started

                            with timing.add_time('work'), timing.timeit('one_step'):
                                self._advance_rollouts(data, timing)
                        elif task_type == TaskType.RESET:
                            with timing.add_time('reset'):
                                self._handle_reset()
                        elif task_type == TaskType.PBT:
                            self._process_pbt_task(data)

                    if time.time() - last_report > 5.0 and 'one_step' in timing:
                        timing_stats = dict(wait_actor=timing.wait_actor, step_actor=timing.one_step)
                        memory_mb = memory_consumption_mb()
                        stats = dict(memory_actor=memory_mb)
                        self.report_queue.put(dict(timing=timing_stats, stats=stats))
                        last_report = time.time()

                except RuntimeError as exc:
                    log.warning('Error while processing data w: %d, exception: %s', self.worker_idx, exc)
                    log.warning('Terminate process...')
                    self.terminate = True
                    self.report_queue.put(dict(critical_error=self.worker_idx))
                except KeyboardInterrupt:
                    self.terminate = True
                except:
                    log.exception('Unknown exception in rollout worker')
                    self.terminate = True

        if self.worker_idx <= 1:
            time.sleep(0.1)
            log.info(
                'Env runner %d, CPU aff. %r, rollouts %d: timing %s',
                self.worker_idx, psutil.Process().cpu_affinity(), self.num_complete_rollouts, timing,
            )
예제 #5
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        if self.cfg.sampler_worker_gpus:
            set_gpus_for_process(
                proc_idx,
                num_gpus_per_process=1,
                process_type='sampler_proc',
                gpu_mask=self.cfg.sampler_worker_gpus,
            )

        timing = Timing()

        from threadpoolctl import threadpool_limits
        with threadpool_limits(limits=1, user_api=None):
            if self.cfg.set_workers_cpu_affinity:
                set_process_cpu_affinity(proc_idx, self.cfg.num_workers)

            initial_cpu_affinity = psutil.Process().cpu_affinity(
            ) if platform != 'darwin' else None
            psutil.Process().nice(10)

            with timing.timeit('env_init'):
                envs = []
                env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)]

                for env_idx in range(self.cfg.num_envs_per_worker):
                    global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx
                    env_config = AttrDict(worker_index=proc_idx,
                                          vector_index=env_idx,
                                          env_id=global_env_id)

                    env = make_env_func(cfg=self.cfg, env_config=env_config)
                    log.debug(
                        'CPU affinity after create_env: %r',
                        psutil.Process().cpu_affinity()
                        if platform != 'darwin' else 'MacOS - None')
                    env.seed(global_env_id)
                    envs.append(env)

                    # this is to track the performance for individual DMLab levels
                    if hasattr(env.unwrapped, 'level_name'):
                        env_key[env_idx] = env.unwrapped.level_name

                episode_length = [0 for _ in envs]
                episode_lengths = [deque([], maxlen=20) for _ in envs]

            # sample a lot of random actions once, otherwise it is pretty slow in Python
            total_random_actions = 500
            actions = [[
                env.action_space.sample() for _ in range(env.num_agents)
            ] for _ in range(total_random_actions)]
            action_i = 0

            try:
                with timing.timeit('first_reset'):
                    for env_idx, env in enumerate(envs):
                        env.reset()
                        log.info('Process %d finished resetting %d/%d envs',
                                 proc_idx, env_idx + 1, len(envs))

                    self.report_queue.put(
                        dict(proc_idx=proc_idx, finished_reset=True))

                self.start_event.wait()

                with timing.timeit('work'):
                    last_report = last_report_frames = total_env_frames = 0
                    while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                        for env_idx, env in enumerate(envs):
                            with timing.add_time(f'{env_key[env_idx]}.step'):
                                obs, rewards, dones, infos = env.step(
                                    actions[action_i])
                                action_i = (action_i +
                                            1) % total_random_actions

                            num_frames = sum(
                                [info.get('num_frames', 1) for info in infos])
                            total_env_frames += num_frames
                            episode_length[env_idx] += num_frames

                            if all(dones):
                                episode_lengths[env_idx].append(
                                    episode_length[env_idx] / env.num_agents)
                                episode_length[env_idx] = 0

                        with timing.add_time('report'):
                            now = time.time()
                            if now - last_report > self.report_every_sec:
                                last_report = now
                                frames_since_last_report = total_env_frames - last_report_frames
                                last_report_frames = total_env_frames
                                self.report_queue.put(
                                    dict(proc_idx=proc_idx,
                                         env_frames=frames_since_last_report))

                                if proc_idx == 0:
                                    log.debug('Memory usage: %.4f Mb',
                                              memory_consumption_mb())

                # Extra check to make sure cpu affinity is preserved throughout the execution.
                # I observed weird effect when some environments tried to alter affinity of the current process, leading
                # to decreased performance.
                # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc.
                # At least user should know about it if this is happening.
                cpu_affinity = psutil.Process().cpu_affinity(
                ) if platform != 'darwin' else None
                assert initial_cpu_affinity == cpu_affinity, \
                    f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \
                    f'This can significantly affect performance!'

            except:
                log.exception('Unknown exception')
                log.error('Unknown exception in worker %d, terminating...',
                          proc_idx)
                self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

            time.sleep(proc_idx * 0.01 + 0.01)
            log.info('Process %d finished sampling. Timing: %s', proc_idx,
                     timing)

            for env_idx, env in enumerate(envs):
                if len(episode_lengths[env_idx]) > 0:
                    log.warning('Level %s avg episode len %d',
                                env_key[env_idx],
                                np.mean(episode_lengths[env_idx]))

            for env in envs:
                env.close()