Пример #1
0
    def pop(self, warn=True, peek_d=None):
        reward_d = {}
        done_d = {}
        info_d = {}
        err_d = self.pop_errors()

        for i, reward_buffer in self.reward_buffers.items():
            name = self.names_by_id[i]

            reward, done, info = reward_buffer.pop(peek_d.get(name))
            reward_d[name] = reward
            done_d[name] = done
            info_d[name] = info

        # TODO: use FPS here rather than 60
        if warn and any(
                info.get('stats.reward.count', 0) > 60
                for info in info_d.values()):
            logger.warn(
                'WARNING: returning more than 60 aggregated rewards: %s. Either your agent is not keeping up with the framerate, or you should have called ".reset()" to clear pending rewards and reset the environments to a known state.',
                {
                    name: '{} (episode_id={})'.format(
                        info['stats.reward.count'],
                        info.get('env_status.episode_id'))
                    for name, info in info_d.items()
                })

        return reward_d, done_d, info_d, err_d
Пример #2
0
    def pop(self, warn=True, peek_d=None):
        reward_d = {}
        done_d = {}
        info_d = {}
        err_d = self.pop_errors()

        for i, reward_buffer in self.reward_buffers.items():
            name = self.names_by_id[i]

            reward, done, info = reward_buffer.pop(peek_d.get(name))
            reward_d[name] = reward
            done_d[name] = done
            info_d[name] = info

        # TODO: use FPS here rather than 60
        if warn and any(info.get('stats.reward.count', 0) > 60 for info in info_d.values()):
            logger.warn('WARNING: returning more than 60 aggregated rewards: %s. Either your agent is not keeping up with the framerate, or you should have called ".reset()" to clear pending rewards and reset the environments to a known state.',
                        {name: '{} (episode_id={})'.format(info['stats.reward.count'], info.get('env_status.episode_id')) for name, info in info_d.items()})

        return reward_d, done_d, info_d, err_d