def batched_request(self): """Batches requests and returns batched request.""" if self._batched_request is not None: return self._batched_request # Request used as a filler for coroutines that have already # finished. filler = next(x for x in self._requests if x is not None) # Fill with 0s for easier debugging. filler = data.nested_map(np.zeros_like, filler) # Substitute the filler for Nones. self._requests = [ x if x is not None else filler for x in self._requests ] def assert_not_scalar(x): assert np.array(x).shape, ( 'All arrays in a PredictRequest must be at least rank 1.') data.nested_map(assert_not_scalar, self._requests) def flatten_first_2_dims(x): return np.reshape(x, (-1, ) + x.shape[2:]) # Stack instead of concatenate to ensure that all requests have # the same shape. self._batched_request = data.nested_stack(self._requests) # (n_agents, n_requests, ...) -> (n_agents * n_requests, ...) self._batched_request = data.nested_map(flatten_first_2_dims, self._batched_request) return self._batched_request
def add(self, stacked_datapoints): datapoint_shape = data.nested_map(lambda x: x.shape[1:], stacked_datapoints) if datapoint_shape != self._datapoint_shape: raise ValueError( 'Datapoint shape mismatch: got {}, expected {}.'.format( datapoint_shape, self._datapoint_shape)) n_elems = data.choose_leaf( data.nested_map(lambda x: x.shape[0], stacked_datapoints)) def insert_to_array(buf, elems): buf_size = buf.shape[0] assert elems.shape[0] == n_elems index = self._insert_index buf[index:min(index + n_elems, buf_size)] = elems[:buf_size - index] buf[:max(index + n_elems - buf_size, 0)] = elems[buf_size - index:] data.nested_zip_with(insert_to_array, (self._data_buffer, stacked_datapoints)) if self._size < self._capacity: self._size = min(self._insert_index + n_elems, self._capacity) self._insert_index = (self._insert_index + n_elems) % self._capacity
def __init__(self, datapoint_signature, capacity): self._datapoint_shape = data.nested_map(lambda x: x.shape, datapoint_signature) self._capacity = capacity self._size = 0 self._insert_index = 0 def init_array(signature): shape = (self._capacity, ) + signature.shape return np.zeros(shape, dtype=signature.dtype) self._data_buffer = data.nested_map(init_array, datapoint_signature)
def calculate_position_visit_freq(self, state_visit_count): obs_labels = self._get_dense_obs_labels() room_loc_idxs = obs_labels.index('.loc_y'), obs_labels.index('.loc_x') agent_pos_idxs = (obs_labels.index('agent_y'), obs_labels.index('agent_x')) keys_idxs = tuple(idx for idx, label in enumerate(obs_labels) if 'key_' in label) nb_keys_total = len(keys_idxs) max_visits = 0 assert nb_keys_total == len(self.keys) visits_counts = {room_loc: {} for room_loc in self.rooms} for state, state_freq in state_visit_count.items(): obs = self.state2obs(state) room_loc = tuple(np.take(obs, room_loc_idxs)) agent_pos = tuple(np.take(obs, agent_pos_idxs)) keys_taken = np.sum(1 - np.take(obs, keys_idxs)).astype(int) if agent_pos not in visits_counts[room_loc]: visits_counts[room_loc][agent_pos] = np.zeros( (1 + nb_keys_total, )) visits_counts[room_loc][agent_pos][keys_taken] += state_freq if visits_counts[room_loc][agent_pos][keys_taken] > max_visits: max_visits = visits_counts[room_loc][agent_pos][keys_taken] visits_frequencies = nested_map(lambda x: x / max_visits, visits_counts) return visits_frequencies
def predict(self, inputs): if self._network_signature is None: return inputs else: input_shapes = data.nested_map(lambda x: x.shape[1:], inputs) expected_shapes = data.nested_map(lambda x: x.shape, self._network_signature.input) assert input_shapes == expected_shapes, ( f'Incorrect input shapes: {input_shapes} != {expected_shapes}.' ) batch_size = data.choose_leaf(inputs).shape[0] return data.zero_pytree(self._network_signature.output, shape_prefix=(batch_size, ))
def __init__(self, network_signature, model_fn=mlp, optimizer='adam', loss='mean_squared_error', loss_weights=None, weight_decay=0.0, metrics=None, train_callbacks=None, seed=None, **compile_kwargs): super().__init__(network_signature) self._network_signature = network_signature self._model = model_fn(network_signature) self._add_weight_decay(self._model, weight_decay) if seed is not None: tf.random.set_seed(seed) metrics = metrics or [] (loss, metrics) = data.nested_map(_wrap_loss, (loss, metrics)) self._model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=metrics, **compile_kwargs) self.train_callbacks = train_callbacks or []
def add(self, stacked_datapoints): """Adds datapoints to the buffer. Args: stacked_datapoints (pytree): Transition object containing the datapoints, stacked along axis 0. """ n_elems = data.choose_leaf(data.nested_map( lambda x: x.shape[0], stacked_datapoints )) def insert_to_array(buf, elems): buf_size = buf.shape[0] assert elems.shape[0] == n_elems index = self._insert_index # Insert up to buf_size at the current index. buf[index:min(index + n_elems, buf_size)] = elems[:buf_size - index] # Insert whatever's left at the beginning of the buffer. buf[:max(index + n_elems - buf_size, 0)] = elems[buf_size - index:] # Insert to all arrays in the pytree. data.nested_zip_with( insert_to_array, (self._data_buffer, stacked_datapoints) ) if self._size < self._capacity: self._size = min(self._insert_index + n_elems, self._capacity) self._insert_index = (self._insert_index + n_elems) % self._capacity
def batched_request(self): if self._batched_request is not None: return self._batched_request def assert_not_scalar(x): assert np.array(x).shape, ( 'All arrays in a PredictRequest must be at least rank 1.' ) data.nested_map(assert_not_scalar, self._requests) self._batched_request = data.nested_concatenate(self._requests) self._batch_sizes = [ data.choose_leaf(request).shape[0] for request in self._requests ] return self._batched_request
def compute_metrics(self, episodes): """Computes environment related metrics.""" nb_visited_rooms = [] nb_keys_taken = [] nb_doors_opened = [] first_visits = {room_loc: [] for room_loc in self.rooms} observation_labels = self._get_dense_obs_labels() for episode in episodes: room_first_visits = data.nested_map( lambda idxs: idxs[-1], episode.transition_batch.env_info['room_first_visit'] ) nb_visited_rooms_e = 0 for room, first_visit in room_first_visits.items(): if first_visit is not None: nb_visited_rooms_e += 1 first_visits[room].append(first_visit) self.visited_rooms_history.add(room) nb_visited_rooms.append(nb_visited_rooms_e) last_observation = episode.transition_batch.next_observation[-1] keys_taken_mask = [ 1 - last_observation[idx] for idx, label in enumerate(observation_labels) if 'key_' in label ] nb_keys_taken_e = sum(keys_taken_mask) nb_keys_taken.append(nb_keys_taken_e) doors_opened_mask = [ 1 - last_observation[idx] for idx, label in enumerate(observation_labels) if 'door_' in label ] nb_doors_opened_e = sum(doors_opened_mask) nb_doors_opened.append(nb_doors_opened_e) metrics = { 'mean_visited_rooms_in_episode': np.mean(nb_visited_rooms), 'total_visited_rooms_in_epoch': sum([ 1 if len(first_visits_room) > 0 else 0 for first_visits_room in first_visits.values() ]), 'total_visited_rooms_in_history': len(self.visited_rooms_history), 'mean_keys_taken_in_episode': np.mean(nb_keys_taken), 'mean_door_opened_in_episode': np.mean(nb_doors_opened), } for room_loc, visits in first_visits.items(): metrics[f'visit_freq_{room_loc}'] = len(visits) / len(episodes) if len(visits) == 0: visits = [-1] metrics[f'min_first_visit_{room_loc}'] = min(visits) metrics[f'mean_first_visit_{room_loc}'] = np.mean(visits) return metrics
def batched_request(self): """Batches requests and returns batched request.""" if self._batched_request is not None: return self._batched_request data.nested_map(_PredictionRequestBatcher._assert_not_scalar, self._requests) # Stack instead of concatenate to ensure that all requests have # the same shape. batched_request_content = data.nested_stack( [request.content for request in self._requests]) # (n_agents, n_requests, ...) -> (n_agents * n_requests, ...) batched_request_content = data.nested_map( _PredictionRequestBatcher._flatten_first_2_dims, batched_request_content) self._batched_request = Request(self._request_type, batched_request_content) return self._batched_request
def predict(self, inputs): """Returns the prediction for a given input. Args: inputs: (Agent-dependent) Batch of inputs to run prediction on. Returns: Agent-dependent: Network predictions. """ return data.nested_map(lambda x: x.numpy(), self._model.predict_on_batch(inputs))
def predict(self, inputs): some_leaf_shape = data.choose_leaf(inputs).shape assert some_leaf_shape, 'KerasNetwork only works on batched inputs.' batch_size = some_leaf_shape[0] def one_array(signature): return np.ones(shape=((batch_size, ) + signature.shape), dtype=signature.dtype) masks = data.nested_map(one_array, self._network_signature.output) return self._model.predict_on_batch((inputs, masks))
def _make_inputs(input_signature): """Initializes keras.Input layers for a given signature. Args: input_signature (pytree of TensorSignatures): Input signature. Returns: Pytree of tf.keras.Input layers. """ def init_layer(signature): return keras.Input(shape=signature.shape, dtype='float32') return data.nested_map(init_layer, input_signature)
def transform_model_outputs(self, observations, model_outputs): def reduce_fn(x): return np.mean(x, axis=-1) def take_fn(x): return np.take(x, self._index_mask, axis=-1) masked_model_outputs = data.nested_map(take_fn, model_outputs) ensemble_uncertainties = np.std( masked_model_outputs['next_observation'], axis=-1, dtype=np.float64) bonuses = np.mean(ensemble_uncertainties, axis=tuple(range(1, ensemble_uncertainties.ndim))) reduced_model_outputs = data.nested_map(reduce_fn, masked_model_outputs) next_observations, rewards, dones, infos = super( TrainableEnsembleModelEnv, self).transform_model_outputs(observations, reduced_model_outputs) for info, bonus in zip(infos, bonuses): info.update({'bonus': bonus}) return next_observations, rewards, dones, infos
def zero_pytree(signature, shape_prefix=()): """Builds a zero-filled pytree of a given signature. Args: signature (pytree): Pytree of TensorSignature. shape_prefix (tuple): Shape to be prepended to each constructed array's shape. Returns: Pytree of a given signature with zero arrays as leaves. """ return data.nested_map( lambda sig: np.zeros(shape=shape_prefix + sig.shape, dtype=sig.dtype), signature, )
def render_transition(transition): if transition is None: return {} else: return { 'agent_info': render_agent_info( alpacka_data.nested_map( to_primitive, transition.agent_info, )), 'action': env_renderer.render_action(transition.action), 'reward': to_primitive(transition.reward), }
def sample(self, batch_size): """Samples a batch of datapoints. Args: batch_size (int): Number of datapoints to sample. Returns: Datapoint object with sampled datapoints stacked along the 0 axis. Raises: ValueError: If the buffer is empty. """ if self._data_buffer is None: raise ValueError('Cannot sample from an empty buffer.') indices = np.random.randint(self._size, size=batch_size) return data.nested_map(lambda x: x[indices], self._data_buffer)
def __init__(self, datapoint_signature, capacity): """Initializes the replay buffer. Args: datapoint_signature (pytree): Pytree of TensorSignatures, defining the structure of data to be stored. capacity (int): Maximum size of the buffer. """ self._capacity = capacity self._size = 0 self._insert_index = 0 def init_array(signature): shape = (self._capacity,) + signature.shape return np.zeros(shape, dtype=signature.dtype) self._data_buffer = data.nested_map(init_array, datapoint_signature)
def _log_transitions_with_false_positive_rewards(self, episodes): predicted_rewards = [] for episode in episodes: predicted_rewards_i = np.empty_like( episode.transition_batch.reward ) for t, (action_t, rewards_t) in enumerate(zip( episode.transition_batch.action, episode.transition_batch.agent_info['children_rewards'] )): predicted_rewards_i[t] = rewards_t[action_t] predicted_rewards.append(predicted_rewards_i) false_positives_mask = [ (predicted_reward * (1 - episode.transition_batch.reward)) for episode, predicted_reward in zip(episodes, predicted_rewards) ] fp_transitions = [ nested_map( functools.partial( self._select_false_positive_transitions, fp_mask=fp_mask ), episode.transition_batch ) for episode, fp_mask in zip(episodes, false_positives_mask) if fp_mask.any() ] images_logged = 0 for fp_transition in fp_transitions: transition_visualizations = self._logging_env.visualize_transitions( fp_transition ) for transition_viz in transition_visualizations: metric_logging.log_image( f'episode_model/fp_transition_{self._epoch}', images_logged, transition_viz ) images_logged += 1 if images_logged > 50: break if images_logged > 50: break
def __init__(self, network_signature, temporal_diff_n, gamma=1.0, batch_size=64, n_steps_per_epoch=1000, replay_buffer_capacity=1000000, replay_buffer_sampling_hierarchy=(), polyak_coeff=None): """Initializes TDTrainer. Args: network_signature (pytree): Input signature for the network. temporal_diff_n: temporal difference distance, np.inf is supported gamma: discount rate batch_size (int): Batch size. n_steps_per_epoch (int): Number of optimizer steps to do per epoch. replay_buffer_capacity (int): Maximum size of the replay buffer. replay_buffer_sampling_hierarchy (tuple): Sequence of Episode attribute names, defining the sampling hierarchy. polyak_coeff: polyak averaging coefficient """ super().__init__(network_signature) target = lambda episode: target_n_return(episode, temporal_diff_n, gamma) self._target_fn = lambda episode: data.nested_map( lambda f: f(episode), target) self._batch_size = batch_size self._n_steps_per_epoch = n_steps_per_epoch td_target_signature = TDTargetData( cum_reward=network_signature.output, bootstrap_obs=network_signature.input, bootstrap_gamma=network_signature.output) datapoint_sig = (network_signature.input, td_target_signature) self._replay_buffer = replay_buffers.HierarchicalReplayBuffer( datapoint_sig, capacity=replay_buffer_capacity, hierarchy_depth=len(replay_buffer_sampling_hierarchy), ) self._sampling_hierarchy = replay_buffer_sampling_hierarchy self._polyak_coeff = polyak_coeff self._target_network_params = None self._target_network = None
def unbatch_responses(self, batched_responses): def slice_responses(x, start_index, batch_size): return x[start_index:(start_index + batch_size)] unbatched_responses = [] start_index = 0 for batch_size in self._batch_sizes: unbatched_responses.append(data.nested_map( functools.partial( slice_responses, start_index=start_index, batch_size=batch_size, ), batched_responses, )) start_index += batch_size return unbatched_responses
def _make_output_heads(hidden, output_signature, output_activation, zero_init): masks = _make_inputs(output_signature) def init_head(layer, signature, activation, mask): assert signature.dtype == np.float32 depth = signature.shape[-1] kwargs = {'activation': activation} if zero_init: kwargs['kernel_initializer'] = 'zeros' kwargs['bias_initializer'] = 'zeros' head = keras.layers.Dense(depth, **kwargs)(layer) return AddMask()((head, mask)) if tf.is_tensor(hidden): hidden = data.nested_map(lambda _: hidden, output_signature) heads = data.nested_zip_with( init_head, (hidden, output_signature, output_activation, masks)) return (heads, masks)
def __init__( self, network_signature, input=input_observation, target=target_solved, mask=None, batch_size=64, n_steps_per_epoch=1000, replay_buffer_capacity=1000000, replay_buffer_sampling_hierarchy=(), ): super().__init__(network_signature) def build_episode_to_pytree_mapper(functions_pytree): return lambda episode: data.nested_map(lambda f: f(episode), functions_pytree) self._input_fn = build_episode_to_pytree_mapper(input) self._target_fn = build_episode_to_pytree_mapper(target) if mask is None: mask = data.nested_map(lambda _: mask_one, target) self._mask_fn = lambda episode: data.nested_zip_with( lambda f, target: f(episode, target), (mask, self._target_fn(episode))) self._batch_size = batch_size self._n_steps_per_epoch = n_steps_per_epoch datapoint_sig = ( network_signature.input, network_signature.output, network_signature.output, ) self._replay_buffer = replay_buffers.HierarchicalReplayBuffer( datapoint_sig, capacity=replay_buffer_capacity, hierarchy_depth=len(replay_buffer_sampling_hierarchy), ) self._sampling_hierarchy = replay_buffer_sampling_hierarchy
def __init__(self, datapoint_spec, capacity): """Initializes the replay buffer. Args: datapoint_spec (pytree): Pytree of shape tuples, defining the structure of data to be stored. capacity (int): Maximum size of the buffer. """ self._capacity = capacity self._size = 0 self._insert_index = 0 def init_array(shape): shape = (self._capacity,) + shape return np.zeros(shape) self._data_buffer = data.nested_map( init_array, datapoint_spec, # datapoint_spec has shape tuples at leaves, we don't want to map # over them so we stop one level higher. stop_fn=data.is_last_level, )
def __init__( self, network_signature, target=target_solved, batch_size=64, n_steps_per_epoch=1000, replay_buffer_capacity=1000000, replay_buffer_sampling_hierarchy=(), ): """Initializes SupervisedTrainer. Args: network_signature (pytree): Input signature for the network. target (pytree): Pytree of functions episode -> target for determining the targets for network training. The structure of the tree should reflect the structure of a target. batch_size (int): Batch size. n_steps_per_epoch (int): Number of optimizer steps to do per epoch. replay_buffer_capacity (int): Maximum size of the replay buffer. replay_buffer_sampling_hierarchy (tuple): Sequence of Episode attribute names, defining the sampling hierarchy. """ super().__init__(network_signature) self._target_fn = lambda episode: data.nested_map( lambda f: f(episode), target) self._batch_size = batch_size self._n_steps_per_epoch = n_steps_per_epoch # (input, target) datapoint_sig = (network_signature.input, network_signature.output) self._replay_buffer = replay_buffers.HierarchicalReplayBuffer( datapoint_sig, capacity=replay_buffer_capacity, hierarchy_depth=len(replay_buffer_sampling_hierarchy), ) self._sampling_hierarchy = replay_buffer_sampling_hierarchy
def _make_inputs(input_signature): def init_layer(signature): return keras.Input(shape=signature.shape, dtype=signature.dtype) return data.nested_map(init_layer, input_signature)
def shapes(tensors): return data.nested_map(lambda x: x.shape, tensors)
def dtypes(tensors): return data.nested_map(lambda x: x.dtype, tensors)
def sample(self, batch_size): if self._data_buffer is None: raise ValueError('Cannot sample from an empty buffer.') indices = np.random.randint(self._size, size=batch_size) return data.nested_map(lambda x: x[indices], self._data_buffer)
def __iter__(self): return (data.nested_map(lambda x, idx=index: x[idx], self._data_buffer) for index in range(self._size))