def __init__(self, env, info_key='score', name='AverageScore', buffer_size=10, batch_size=None): """ Creates an CubeAverageScoreMetric. Args: env: Instance of gym.Env that implements get_score() which updates the metric info_key: str of info dict key that is being averaged name: metric name buffer_size: number of episodes to compute average over """ # Set a dummy value on self._np_state.obs_val so it gets included in # the first checkpoint (before metric is first called). self._wrapped_gym_envs = env self._info_key = info_key batch_size = batch_size or len(env) self._np_state = numpy_storage.NumpyState() super(AverageGymInfoMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, data_spec, capacity, alpha=0.6): """ Params: data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a single item that can be stored in this buffer. capacity: The maximum number of items that can be stored in the buffer. alpha: α determines how much prioritization is used, with α = 0 corresponding to the uniform case. """ super(PyPrioritizedReplayBuffer, self).__init__(data_spec, capacity) logger.info("Creating an instance of %s. Params: data_spec: %s, capacity: %s, alpha: %s" % (str(type(self).__name__), str(data_spec), str(capacity), str(alpha))) # State variables needed to maintain the replay buffer. These were copied from the uniform replay buffer self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(), capacity) self._lock = threading.Lock() self._np_state = numpy_storage.NumpyState() # Adding elements to the replay buffer is done in a circular way. # Keeps track of the actual size of the replay buffer and the location # where to add new elements. self._np_state.size = np.int64(0) self._np_state.cur_id = np.int64(0) # Total number of items that went through the replay buffer. self._np_state.item_count = np.int64(0) self._prioritized_buffer_alpha = alpha self._prioritized_buffer_capacity = capacity # an array in which we keep track of the priorities. The size of this array is equal to the size of the replay # buffer. Items stored at a given index in the Priority array map to the experience at the same index in the # buffer. The content of the items in the priority array represent the loss of their respective experience the # last time that experience was used for training. self._prioritized_buffer_priorities = np.zeros((capacity,), dtype=np.float32)
def __init__(self, env, name='AverageScore', buffer_size=10, batch_size=None): """ Creates an CubeAverageScoreMetric. Args: env: Instance of gym.Env that implements get_score() which updates the metric name: metric name buffer_size: number of episodes to compute average over """ # Set a dummy value on self._np_state.obs_val so it gets included in # the first checkpoint (before metric is first called). if isinstance(env, list): self._env = env else: self._env = [env] batch_size = batch_size or len(env) self._np_state = numpy_storage.NumpyState() self._np_state.adds_to_buff = np.array(0, dtype=float) # used so that buff is not over-populated by returned trajectories from short episodes super(CubeAverageScoreMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size)
def testSaveRestore(self): arrays = numpy_storage.NumpyState() checkpoint = tf.train.Checkpoint(numpy_arrays=arrays) arrays.x = np.ones([3, 4]) directory = self.get_temp_dir() prefix = os.path.join(directory, 'ckpt') save_path = checkpoint.save(prefix) arrays.x[:] = 0. self.assertAllEqual(arrays.x, np.zeros([3, 4])) checkpoint.restore(save_path).assert_consumed() self.assertAllEqual(arrays.x, np.ones([3, 4])) second_checkpoint = tf.train.Checkpoint( numpy_arrays=numpy_storage.NumpyState()) # Attributes of NumpyState objects are created automatically by restore() second_checkpoint.restore(save_path).assert_consumed() self.assertAllEqual(np.ones([3, 4]), second_checkpoint.numpy_arrays.x)
def __init__(self, name='AverageReturn', buffer_size=10, batch_size=None): """Creates an AverageReturnMetric.""" self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.control_cost so it gets included in # the first checkpoint (before metric is first called). self._np_state.control_cost = np.float64(0) super(AverageControlCostMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, name='QMetric', buffer_size=10): super(QMetric, self).__init__(name) self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64) self._count = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64) self._sumcount = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64) self._np_state = numpy_storage.NumpyState() self._np_state._most_recent_q = np.float64(-100) # pylint: disable=protected-access self._np_state._most_recent_time = np.int64(0) # pylint: disable=protected-access self.reset()
def __init__(self, name: Text = 'AverageEpisodeLength', buffer_size: types.Int = 10, batch_size: Optional[types.Int] = None): """Creates an AverageEpisodeLengthMetric.""" self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.episode_return so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_steps = np.float64(0) super(AverageEpisodeLengthMetric, self).__init__( name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, name='MinitaurAverageMaxSpeed', buffer_size=10, batch_size=None): """Creates a metric for minitaur speed stats.""" self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.obs_val so it gets included in # the first checkpoint (before metric is first called). self._np_state.speed = np.array(0, dtype=float) super(MinitaurAverageMaxSpeedMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, max_episode_len=500, dtype=np.bool, name='AverageEarlyFailure', buffer_size=10, batch_size=None): """Creates an AverageEnvObsDict.""" self._np_state = numpy_storage.NumpyState() self._max_episode_len = max_episode_len # Set a dummy value on self._np_state.obs_val so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_steps = np.array(0, dtype=np.int32) super(AverageEarlyFailureMetric, self).__init__( name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, name='DistributionEpisodeLength', buffer_size=10, batch_size=None): """Creates an AverageEpisodeLengthMetric.""" self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.episode_return so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_steps = np.float64(0) self._np_state.episode_end_mask = np.float64(0) super(DistributionEpisodeLengthMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size) self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
def __init__(self, name='DistributionReturn', buffer_size=10, batch_size=None): """Creates an DistributionReturnMetric.""" self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.episode_return so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_return = np.float64(0) self._np_state.episode_end_mask = np.float64(0) # self.count_episode = 0 super(DistributionReturnMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size) # overwrite buffer to enable more statistics computation self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
def __init__(self, n_agents, name='MultiagentAverageReturn', buffer_size=10, batch_size=None): """Creates an AverageReturnPyMetric.""" self.n_agents = n_agents self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.episode_return so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_return = np.float64(0) self._agent_metrics = [ py_metrics.AverageReturnMetric( 'AverageReturn%i' % i, buffer_size=buffer_size) for i in range(n_agents) ] super(AverageReturnPyMetric, self).__init__(name, buffer_size=buffer_size, batch_size=batch_size)
def __init__(self, name, num_envs, env_batch_size, buffer_size=None): """ Args: name (str): name of the metric num_envs (int): number of tf_agents.environments; each environment is a batched environment (contains multiple independent envs) env_batch_size (int): the size of each batched environment buffer_size (int): the window size of data points we want to average over """ num_envs *= env_batch_size self._env_batch_size = env_batch_size self._np_state = numpy_storage.NumpyState() # Set a dummy value on self._np_state.episode_return so it gets included in # the first checkpoint (before metric is first called). self._np_state.episode_return = np.float64(0) if buffer_size is None: buffer_size = max(env_batch_size, 10) super(AsyncStreamingMetric, self).__init__(buffer_size=buffer_size, num_envs=num_envs, name=name)
def __init__(self, data_spec, capacity): """Creates a PyUniformReplayBuffer. Args: data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a single item that can be stored in this buffer. capacity: The maximum number of items that can be stored in the buffer. """ super(PyUniformReplayBuffer, self).__init__(data_spec, capacity) self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(), capacity) self._lock = threading.Lock() self._np_state = numpy_storage.NumpyState() # Adding elements to the replay buffer is done in a circular way. # Keeps track of the actual size of the replay buffer and the location # where to add new elements. self._np_state.size = np.int64(0) self._np_state.cur_id = np.int64(0) # Total number of items that went through the replay buffer. self._np_state.item_count = np.int64(0)
def __init__(self, name: Text = 'Counter'): super(CounterMetric, self).__init__(name) self._np_state = numpy_storage.NumpyState() self.reset()
def __init__(self, name: Text = 'NumberOfEpisodes'): super(NumberOfEpisodes, self).__init__(name) self._np_state = numpy_storage.NumpyState() self.reset()
def __init__(self, name: Text = 'EnvironmentSteps'): super(EnvironmentSteps, self).__init__(name) self._np_state = numpy_storage.NumpyState() self.reset()
def __init__(self, name='PyScoreMetric', buffer_size=10, batch_size=None): super(PyScoreMetric, self).__init__(name=name, buffer_size=buffer_size, batch_size=batch_size) self._np_state = numpy_storage.NumpyState() self._np_state.episode_score = np.float64(0)