Exemplo n.º 1
0
    def batched_request(self):
        """Batches requests and returns batched request."""
        if self._batched_request is not None:
            return self._batched_request

        # Request used as a filler for coroutines that have already
        # finished.
        filler = next(x for x in self._requests if x is not None)
        # Fill with 0s for easier debugging.
        filler = data.nested_map(np.zeros_like, filler)

        # Substitute the filler for Nones.
        self._requests = [
            x if x is not None else filler for x in self._requests
        ]

        def assert_not_scalar(x):
            assert np.array(x).shape, (
                'All arrays in a PredictRequest must be at least rank 1.')

        data.nested_map(assert_not_scalar, self._requests)

        def flatten_first_2_dims(x):
            return np.reshape(x, (-1, ) + x.shape[2:])

        # Stack instead of concatenate to ensure that all requests have
        # the same shape.
        self._batched_request = data.nested_stack(self._requests)
        # (n_agents, n_requests, ...) -> (n_agents * n_requests, ...)
        self._batched_request = data.nested_map(flatten_first_2_dims,
                                                self._batched_request)
        return self._batched_request
Exemplo n.º 2
0
    def add(self, stacked_datapoints):
        datapoint_shape = data.nested_map(lambda x: x.shape[1:],
                                          stacked_datapoints)
        if datapoint_shape != self._datapoint_shape:
            raise ValueError(
                'Datapoint shape mismatch: got {}, expected {}.'.format(
                    datapoint_shape, self._datapoint_shape))

        n_elems = data.choose_leaf(
            data.nested_map(lambda x: x.shape[0], stacked_datapoints))

        def insert_to_array(buf, elems):
            buf_size = buf.shape[0]
            assert elems.shape[0] == n_elems
            index = self._insert_index

            buf[index:min(index + n_elems, buf_size)] = elems[:buf_size -
                                                              index]

            buf[:max(index + n_elems - buf_size, 0)] = elems[buf_size - index:]

        data.nested_zip_with(insert_to_array,
                             (self._data_buffer, stacked_datapoints))
        if self._size < self._capacity:
            self._size = min(self._insert_index + n_elems, self._capacity)
        self._insert_index = (self._insert_index + n_elems) % self._capacity
Exemplo n.º 3
0
    def __init__(self, datapoint_signature, capacity):
        self._datapoint_shape = data.nested_map(lambda x: x.shape,
                                                datapoint_signature)
        self._capacity = capacity
        self._size = 0
        self._insert_index = 0

        def init_array(signature):
            shape = (self._capacity, ) + signature.shape
            return np.zeros(shape, dtype=signature.dtype)

        self._data_buffer = data.nested_map(init_array, datapoint_signature)
Exemplo n.º 4
0
    def calculate_position_visit_freq(self, state_visit_count):
        obs_labels = self._get_dense_obs_labels()
        room_loc_idxs = obs_labels.index('.loc_y'), obs_labels.index('.loc_x')
        agent_pos_idxs = (obs_labels.index('agent_y'),
                          obs_labels.index('agent_x'))
        keys_idxs = tuple(idx for idx, label in enumerate(obs_labels)
                          if 'key_' in label)
        nb_keys_total = len(keys_idxs)
        max_visits = 0
        assert nb_keys_total == len(self.keys)

        visits_counts = {room_loc: {} for room_loc in self.rooms}

        for state, state_freq in state_visit_count.items():
            obs = self.state2obs(state)
            room_loc = tuple(np.take(obs, room_loc_idxs))
            agent_pos = tuple(np.take(obs, agent_pos_idxs))
            keys_taken = np.sum(1 - np.take(obs, keys_idxs)).astype(int)

            if agent_pos not in visits_counts[room_loc]:
                visits_counts[room_loc][agent_pos] = np.zeros(
                    (1 + nb_keys_total, ))

            visits_counts[room_loc][agent_pos][keys_taken] += state_freq
            if visits_counts[room_loc][agent_pos][keys_taken] > max_visits:
                max_visits = visits_counts[room_loc][agent_pos][keys_taken]

        visits_frequencies = nested_map(lambda x: x / max_visits,
                                        visits_counts)
        return visits_frequencies
Exemplo n.º 5
0
    def predict(self, inputs):
        if self._network_signature is None:

            return inputs
        else:

            input_shapes = data.nested_map(lambda x: x.shape[1:], inputs)
            expected_shapes = data.nested_map(lambda x: x.shape,
                                              self._network_signature.input)
            assert input_shapes == expected_shapes, (
                f'Incorrect input shapes: {input_shapes} != {expected_shapes}.'
            )

            batch_size = data.choose_leaf(inputs).shape[0]
            return data.zero_pytree(self._network_signature.output,
                                    shape_prefix=(batch_size, ))
Exemplo n.º 6
0
    def __init__(self,
                 network_signature,
                 model_fn=mlp,
                 optimizer='adam',
                 loss='mean_squared_error',
                 loss_weights=None,
                 weight_decay=0.0,
                 metrics=None,
                 train_callbacks=None,
                 seed=None,
                 **compile_kwargs):
        super().__init__(network_signature)
        self._network_signature = network_signature
        self._model = model_fn(network_signature)
        self._add_weight_decay(self._model, weight_decay)

        if seed is not None:
            tf.random.set_seed(seed)

        metrics = metrics or []
        (loss, metrics) = data.nested_map(_wrap_loss, (loss, metrics))
        self._model.compile(optimizer=optimizer,
                            loss=loss,
                            loss_weights=loss_weights,
                            metrics=metrics,
                            **compile_kwargs)

        self.train_callbacks = train_callbacks or []
Exemplo n.º 7
0
    def add(self, stacked_datapoints):
        """Adds datapoints to the buffer.

        Args:
            stacked_datapoints (pytree): Transition object containing the
                datapoints, stacked along axis 0.
        """
        n_elems = data.choose_leaf(data.nested_map(
            lambda x: x.shape[0], stacked_datapoints
        ))

        def insert_to_array(buf, elems):
            buf_size = buf.shape[0]
            assert elems.shape[0] == n_elems
            index = self._insert_index
            # Insert up to buf_size at the current index.
            buf[index:min(index + n_elems, buf_size)] = elems[:buf_size - index]
            # Insert whatever's left at the beginning of the buffer.
            buf[:max(index + n_elems - buf_size, 0)] = elems[buf_size - index:]

        # Insert to all arrays in the pytree.
        data.nested_zip_with(
            insert_to_array, (self._data_buffer, stacked_datapoints)
        )
        if self._size < self._capacity:
            self._size = min(self._insert_index + n_elems, self._capacity)
        self._insert_index = (self._insert_index + n_elems) % self._capacity
Exemplo n.º 8
0
    def batched_request(self):
        if self._batched_request is not None:
            return self._batched_request

        def assert_not_scalar(x):
            assert np.array(x).shape, (
                'All arrays in a PredictRequest must be at least rank 1.'
            )

        data.nested_map(assert_not_scalar, self._requests)

        self._batched_request = data.nested_concatenate(self._requests)

        self._batch_sizes = [
            data.choose_leaf(request).shape[0]
            for request in self._requests
        ]
        return self._batched_request
Exemplo n.º 9
0
    def compute_metrics(self, episodes):
        """Computes environment related metrics."""
        nb_visited_rooms = []
        nb_keys_taken = []
        nb_doors_opened = []
        first_visits = {room_loc: [] for room_loc in self.rooms}
        observation_labels = self._get_dense_obs_labels()

        for episode in episodes:
            room_first_visits = data.nested_map(
                lambda idxs: idxs[-1],
                episode.transition_batch.env_info['room_first_visit']
            )
            nb_visited_rooms_e = 0
            for room, first_visit in room_first_visits.items():
                if first_visit is not None:
                    nb_visited_rooms_e += 1
                    first_visits[room].append(first_visit)
                    self.visited_rooms_history.add(room)
            nb_visited_rooms.append(nb_visited_rooms_e)

            last_observation = episode.transition_batch.next_observation[-1]
            keys_taken_mask = [
                1 - last_observation[idx]
                for idx, label in enumerate(observation_labels)
                if 'key_' in label
            ]
            nb_keys_taken_e = sum(keys_taken_mask)
            nb_keys_taken.append(nb_keys_taken_e)

            doors_opened_mask = [
                1 - last_observation[idx]
                for idx, label in enumerate(observation_labels)
                if 'door_' in label
            ]
            nb_doors_opened_e = sum(doors_opened_mask)
            nb_doors_opened.append(nb_doors_opened_e)

        metrics = {
            'mean_visited_rooms_in_episode': np.mean(nb_visited_rooms),
            'total_visited_rooms_in_epoch': sum([
                1 if len(first_visits_room) > 0 else 0
                for first_visits_room in first_visits.values()
            ]),
            'total_visited_rooms_in_history': len(self.visited_rooms_history),
            'mean_keys_taken_in_episode': np.mean(nb_keys_taken),
            'mean_door_opened_in_episode': np.mean(nb_doors_opened),
        }
        for room_loc, visits in first_visits.items():
            metrics[f'visit_freq_{room_loc}'] = len(visits) / len(episodes)
            if len(visits) == 0:
                visits = [-1]
            metrics[f'min_first_visit_{room_loc}'] = min(visits)
            metrics[f'mean_first_visit_{room_loc}'] = np.mean(visits)

        return metrics
Exemplo n.º 10
0
    def batched_request(self):
        """Batches requests and returns batched request."""
        if self._batched_request is not None:
            return self._batched_request

        data.nested_map(_PredictionRequestBatcher._assert_not_scalar,
                        self._requests)

        # Stack instead of concatenate to ensure that all requests have
        # the same shape.
        batched_request_content = data.nested_stack(
            [request.content for request in self._requests])
        # (n_agents, n_requests, ...) -> (n_agents * n_requests, ...)
        batched_request_content = data.nested_map(
            _PredictionRequestBatcher._flatten_first_2_dims,
            batched_request_content)
        self._batched_request = Request(self._request_type,
                                        batched_request_content)
        return self._batched_request
Exemplo n.º 11
0
    def predict(self, inputs):
        """Returns the prediction for a given input.

        Args:
            inputs: (Agent-dependent) Batch of inputs to run prediction on.

        Returns:
            Agent-dependent: Network predictions.
        """
        return data.nested_map(lambda x: x.numpy(),
                               self._model.predict_on_batch(inputs))
Exemplo n.º 12
0
    def predict(self, inputs):
        some_leaf_shape = data.choose_leaf(inputs).shape
        assert some_leaf_shape, 'KerasNetwork only works on batched inputs.'
        batch_size = some_leaf_shape[0]

        def one_array(signature):
            return np.ones(shape=((batch_size, ) + signature.shape),
                           dtype=signature.dtype)

        masks = data.nested_map(one_array, self._network_signature.output)

        return self._model.predict_on_batch((inputs, masks))
Exemplo n.º 13
0
def _make_inputs(input_signature):
    """Initializes keras.Input layers for a given signature.

    Args:
        input_signature (pytree of TensorSignatures): Input signature.

    Returns:
        Pytree of tf.keras.Input layers.
    """
    def init_layer(signature):
        return keras.Input(shape=signature.shape, dtype='float32')

    return data.nested_map(init_layer, input_signature)
Exemplo n.º 14
0
    def transform_model_outputs(self, observations, model_outputs):
        def reduce_fn(x):
            return np.mean(x, axis=-1)

        def take_fn(x):
            return np.take(x, self._index_mask, axis=-1)

        masked_model_outputs = data.nested_map(take_fn, model_outputs)
        ensemble_uncertainties = np.std(
            masked_model_outputs['next_observation'],
            axis=-1,
            dtype=np.float64)
        bonuses = np.mean(ensemble_uncertainties,
                          axis=tuple(range(1, ensemble_uncertainties.ndim)))
        reduced_model_outputs = data.nested_map(reduce_fn,
                                                masked_model_outputs)
        next_observations, rewards, dones, infos = super(
            TrainableEnsembleModelEnv,
            self).transform_model_outputs(observations, reduced_model_outputs)
        for info, bonus in zip(infos, bonuses):
            info.update({'bonus': bonus})
        return next_observations, rewards, dones, infos
Exemplo n.º 15
0
def zero_pytree(signature, shape_prefix=()):
    """Builds a zero-filled pytree of a given signature.

    Args:
        signature (pytree): Pytree of TensorSignature.
        shape_prefix (tuple): Shape to be prepended to each constructed array's
            shape.

    Returns:
        Pytree of a given signature with zero arrays as leaves.
    """
    return data.nested_map(
        lambda sig: np.zeros(shape=shape_prefix + sig.shape, dtype=sig.dtype),
        signature,
    )
Exemplo n.º 16
0
 def render_transition(transition):
     if transition is None:
         return {}
     else:
         return {
             'agent_info':
             render_agent_info(
                 alpacka_data.nested_map(
                     to_primitive,
                     transition.agent_info,
                 )),
             'action':
             env_renderer.render_action(transition.action),
             'reward':
             to_primitive(transition.reward),
         }
Exemplo n.º 17
0
    def sample(self, batch_size):
        """Samples a batch of datapoints.

        Args:
            batch_size (int): Number of datapoints to sample.

        Returns:
            Datapoint object with sampled datapoints stacked along the 0 axis.

        Raises:
            ValueError: If the buffer is empty.
        """
        if self._data_buffer is None:
            raise ValueError('Cannot sample from an empty buffer.')
        indices = np.random.randint(self._size, size=batch_size)
        return data.nested_map(lambda x: x[indices], self._data_buffer)
Exemplo n.º 18
0
    def __init__(self, datapoint_signature, capacity):
        """Initializes the replay buffer.

        Args:
            datapoint_signature (pytree): Pytree of TensorSignatures, defining
                the structure of data to be stored.
            capacity (int): Maximum size of the buffer.
        """
        self._capacity = capacity
        self._size = 0
        self._insert_index = 0

        def init_array(signature):
            shape = (self._capacity,) + signature.shape
            return np.zeros(shape, dtype=signature.dtype)
        self._data_buffer = data.nested_map(init_array, datapoint_signature)
Exemplo n.º 19
0
    def _log_transitions_with_false_positive_rewards(self, episodes):
        predicted_rewards = []
        for episode in episodes:
            predicted_rewards_i = np.empty_like(
                episode.transition_batch.reward
            )
            for t, (action_t, rewards_t) in enumerate(zip(
                episode.transition_batch.action,
                episode.transition_batch.agent_info['children_rewards']
            )):
                predicted_rewards_i[t] = rewards_t[action_t]
            predicted_rewards.append(predicted_rewards_i)
        false_positives_mask = [
            (predicted_reward *
             (1 - episode.transition_batch.reward))
            for episode, predicted_reward in zip(episodes, predicted_rewards)
        ]

        fp_transitions = [
            nested_map(
                functools.partial(
                    self._select_false_positive_transitions, fp_mask=fp_mask
                ),
                episode.transition_batch
            )
            for episode, fp_mask in zip(episodes, false_positives_mask)
            if fp_mask.any()
        ]

        images_logged = 0
        for fp_transition in fp_transitions:
            transition_visualizations = self._logging_env.visualize_transitions(
                fp_transition
            )

            for transition_viz in transition_visualizations:
                metric_logging.log_image(
                    f'episode_model/fp_transition_{self._epoch}',
                    images_logged, transition_viz
                )
                images_logged += 1

                if images_logged > 50:
                    break
            if images_logged > 50:
                break
Exemplo n.º 20
0
    def __init__(self,
                 network_signature,
                 temporal_diff_n,
                 gamma=1.0,
                 batch_size=64,
                 n_steps_per_epoch=1000,
                 replay_buffer_capacity=1000000,
                 replay_buffer_sampling_hierarchy=(),
                 polyak_coeff=None):
        """Initializes TDTrainer.

        Args:
            network_signature (pytree): Input signature for the network.
            temporal_diff_n: temporal difference distance, np.inf is supported
            gamma: discount rate
            batch_size (int): Batch size.
            n_steps_per_epoch (int): Number of optimizer steps to do per
                epoch.
            replay_buffer_capacity (int): Maximum size of the replay buffer.
            replay_buffer_sampling_hierarchy (tuple): Sequence of Episode
                attribute names, defining the sampling hierarchy.
            polyak_coeff: polyak averaging coefficient
        """
        super().__init__(network_signature)
        target = lambda episode: target_n_return(episode, temporal_diff_n,
                                                 gamma)
        self._target_fn = lambda episode: data.nested_map(
            lambda f: f(episode), target)
        self._batch_size = batch_size
        self._n_steps_per_epoch = n_steps_per_epoch

        td_target_signature = TDTargetData(
            cum_reward=network_signature.output,
            bootstrap_obs=network_signature.input,
            bootstrap_gamma=network_signature.output)
        datapoint_sig = (network_signature.input, td_target_signature)
        self._replay_buffer = replay_buffers.HierarchicalReplayBuffer(
            datapoint_sig,
            capacity=replay_buffer_capacity,
            hierarchy_depth=len(replay_buffer_sampling_hierarchy),
        )
        self._sampling_hierarchy = replay_buffer_sampling_hierarchy
        self._polyak_coeff = polyak_coeff
        self._target_network_params = None
        self._target_network = None
Exemplo n.º 21
0
    def unbatch_responses(self, batched_responses):
        def slice_responses(x, start_index, batch_size):
            return x[start_index:(start_index + batch_size)]

        unbatched_responses = []
        start_index = 0
        for batch_size in self._batch_sizes:
            unbatched_responses.append(data.nested_map(
                functools.partial(
                    slice_responses,
                    start_index=start_index,
                    batch_size=batch_size,
                ),
                batched_responses,
            ))
            start_index += batch_size

        return unbatched_responses
Exemplo n.º 22
0
def _make_output_heads(hidden, output_signature, output_activation, zero_init):
    masks = _make_inputs(output_signature)

    def init_head(layer, signature, activation, mask):
        assert signature.dtype == np.float32
        depth = signature.shape[-1]
        kwargs = {'activation': activation}
        if zero_init:
            kwargs['kernel_initializer'] = 'zeros'
            kwargs['bias_initializer'] = 'zeros'
        head = keras.layers.Dense(depth, **kwargs)(layer)
        return AddMask()((head, mask))

    if tf.is_tensor(hidden):
        hidden = data.nested_map(lambda _: hidden, output_signature)

    heads = data.nested_zip_with(
        init_head, (hidden, output_signature, output_activation, masks))
    return (heads, masks)
Exemplo n.º 23
0
    def __init__(
            self,
            network_signature,
            input=input_observation,
            target=target_solved,
            mask=None,
            batch_size=64,
            n_steps_per_epoch=1000,
            replay_buffer_capacity=1000000,
            replay_buffer_sampling_hierarchy=(),
    ):
        super().__init__(network_signature)

        def build_episode_to_pytree_mapper(functions_pytree):
            return lambda episode: data.nested_map(lambda f: f(episode),
                                                   functions_pytree)

        self._input_fn = build_episode_to_pytree_mapper(input)
        self._target_fn = build_episode_to_pytree_mapper(target)

        if mask is None:
            mask = data.nested_map(lambda _: mask_one, target)

        self._mask_fn = lambda episode: data.nested_zip_with(
            lambda f, target: f(episode, target),
            (mask, self._target_fn(episode)))

        self._batch_size = batch_size
        self._n_steps_per_epoch = n_steps_per_epoch

        datapoint_sig = (
            network_signature.input,
            network_signature.output,
            network_signature.output,
        )
        self._replay_buffer = replay_buffers.HierarchicalReplayBuffer(
            datapoint_sig,
            capacity=replay_buffer_capacity,
            hierarchy_depth=len(replay_buffer_sampling_hierarchy),
        )
        self._sampling_hierarchy = replay_buffer_sampling_hierarchy
Exemplo n.º 24
0
    def __init__(self, datapoint_spec, capacity):
        """Initializes the replay buffer.

        Args:
            datapoint_spec (pytree): Pytree of shape tuples, defining the
                structure of data to be stored.
            capacity (int): Maximum size of the buffer.
        """
        self._capacity = capacity
        self._size = 0
        self._insert_index = 0

        def init_array(shape):
            shape = (self._capacity,) + shape
            return np.zeros(shape)
        self._data_buffer = data.nested_map(
            init_array, datapoint_spec,
            # datapoint_spec has shape tuples at leaves, we don't want to map
            # over them so we stop one level higher.
            stop_fn=data.is_last_level,
        )
Exemplo n.º 25
0
    def __init__(
            self,
            network_signature,
            target=target_solved,
            batch_size=64,
            n_steps_per_epoch=1000,
            replay_buffer_capacity=1000000,
            replay_buffer_sampling_hierarchy=(),
    ):
        """Initializes SupervisedTrainer.

        Args:
            network_signature (pytree): Input signature for the network.
            target (pytree): Pytree of functions episode -> target for
                determining the targets for network training. The structure of
                the tree should reflect the structure of a target.
            batch_size (int): Batch size.
            n_steps_per_epoch (int): Number of optimizer steps to do per
                epoch.
            replay_buffer_capacity (int): Maximum size of the replay buffer.
            replay_buffer_sampling_hierarchy (tuple): Sequence of Episode
                attribute names, defining the sampling hierarchy.
        """
        super().__init__(network_signature)
        self._target_fn = lambda episode: data.nested_map(
            lambda f: f(episode), target)
        self._batch_size = batch_size
        self._n_steps_per_epoch = n_steps_per_epoch

        # (input, target)
        datapoint_sig = (network_signature.input, network_signature.output)
        self._replay_buffer = replay_buffers.HierarchicalReplayBuffer(
            datapoint_sig,
            capacity=replay_buffer_capacity,
            hierarchy_depth=len(replay_buffer_sampling_hierarchy),
        )
        self._sampling_hierarchy = replay_buffer_sampling_hierarchy
Exemplo n.º 26
0
def _make_inputs(input_signature):
    def init_layer(signature):
        return keras.Input(shape=signature.shape, dtype=signature.dtype)

    return data.nested_map(init_layer, input_signature)
Exemplo n.º 27
0
 def shapes(tensors):
     return data.nested_map(lambda x: x.shape, tensors)
Exemplo n.º 28
0
 def dtypes(tensors):
     return data.nested_map(lambda x: x.dtype, tensors)
Exemplo n.º 29
0
 def sample(self, batch_size):
     if self._data_buffer is None:
         raise ValueError('Cannot sample from an empty buffer.')
     indices = np.random.randint(self._size, size=batch_size)
     return data.nested_map(lambda x: x[indices], self._data_buffer)
Exemplo n.º 30
0
 def __iter__(self):
     return (data.nested_map(lambda x, idx=index: x[idx], self._data_buffer)
             for index in range(self._size))