def pop(self, warn=True, peek_d=None): reward_d = {} done_d = {} info_d = {} err_d = self.pop_errors() for i, reward_buffer in self.reward_buffers.items(): name = self.names_by_id[i] reward, done, info = reward_buffer.pop(peek_d.get(name)) reward_d[name] = reward done_d[name] = done info_d[name] = info # TODO: use FPS here rather than 60 if warn and any( info.get('stats.reward.count', 0) > 60 for info in info_d.values()): logger.warn( 'WARNING: returning more than 60 aggregated rewards: %s. Either your agent is not keeping up with the framerate, or you should have called ".reset()" to clear pending rewards and reset the environments to a known state.', { name: '{} (episode_id={})'.format( info['stats.reward.count'], info.get('env_status.episode_id')) for name, info in info_d.items() }) return reward_d, done_d, info_d, err_d
def pop(self, warn=True, peek_d=None): reward_d = {} done_d = {} info_d = {} err_d = self.pop_errors() for i, reward_buffer in self.reward_buffers.items(): name = self.names_by_id[i] reward, done, info = reward_buffer.pop(peek_d.get(name)) reward_d[name] = reward done_d[name] = done info_d[name] = info # TODO: use FPS here rather than 60 if warn and any(info.get('stats.reward.count', 0) > 60 for info in info_d.values()): logger.warn('WARNING: returning more than 60 aggregated rewards: %s. Either your agent is not keeping up with the framerate, or you should have called ".reset()" to clear pending rewards and reset the environments to a known state.', {name: '{} (episode_id={})'.format(info['stats.reward.count'], info.get('env_status.episode_id')) for name, info in info_d.items()}) return reward_d, done_d, info_d, err_d