def dense_bitwise_categorical_fun(action_space, config, observations): """Dense network with bitwise input and categorical output.""" del config obs_shape = common_layers.shape_list(observations) x = tf.reshape(observations, [-1] + obs_shape[2:]) with tf.variable_scope("network_parameters"): with tf.variable_scope("dense_bitwise"): x = discretization.int_to_bit_embed(x, 8, 32) flat_x = tf.reshape(x, [ obs_shape[0], obs_shape[1], functools.reduce(operator.mul, x.shape.as_list()[1:], 1) ]) x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu) x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu) logits = tf.contrib.layers.fully_connected(x, action_space.n, activation_fn=None) value = tf.contrib.layers.fully_connected(x, 1, activation_fn=None)[..., 0] policy = tf.contrib.distributions.Categorical(logits=logits) return NetworkOutput(policy, value, lambda a: a)
def bottom(self, x): inputs = x with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): common_layers.summarize_video(inputs, "bottom") # Embed bitwise. assert self.top_dimensionality == 256 embedded = discretization.int_to_bit_embed( inputs, 8, self.PIXEL_EMBEDDING_SIZE) # Project. return tf.layers.dense(embedded, self._body_input_depth, name="merge_pixel_embedded_frames")
def bottom(self, inputs): with tf.variable_scope(self.name): common_layers.summarize_video(inputs, "targets_bottom") # Embed bitwise. assert self.top_dimensionality == 256 embedded = discretization.int_to_bit_embed( inputs, 8, self.PIXEL_EMBEDDING_SIZE) # Transpose and project. transposed = common_layers.time_to_channels(embedded) return tf.layers.dense(transposed, self._body_input_depth, name="merge_pixel_embedded_frames")
def targets_bottom(self, x): # pylint: disable=arguments-differ inputs = x with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): common_layers.summarize_video(inputs, "targets_bottom") # Embed bitwise. assert self.top_dimensionality == 256 embedded = discretization.int_to_bit_embed( inputs, 8, self.PIXEL_EMBEDDING_SIZE) # Transpose and project. transposed = common_layers.time_to_channels(embedded) return tf.layers.dense(transposed, self._model_hparams.hidden_size, name="merge_pixel_embedded_frames")
def bottom(self, x): inputs = x with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): common_layers.summarize_video(inputs, "bottom") # Embed bitwise. assert self._vocab_size == 256 embedded = discretization.int_to_bit_embed(inputs, 8, self.PIXEL_EMBEDDING_SIZE) # Project. return tf.layers.dense( embedded, self._model_hparams.hidden_size, name="merge_pixel_embedded_frames")
def body(self, features): observations = features["inputs"] flat_x = tf.layers.flatten(observations) with tf.variable_scope("dense_bitwise"): flat_x = discretization.int_to_bit_embed(flat_x, 8, 32) x = tf.layers.dense(flat_x, 256, activation=tf.nn.relu) x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu) logits = tf.layers.dense(x, self.hparams.problem.num_actions) value = tf.layers.dense(x, 1)[..., 0] return {"target_policy": logits, "target_value": value}
def targets_bottom(self, x): # pylint: disable=arguments-differ inputs = x with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): common_layers.summarize_video(inputs, "targets_bottom") # Embed bitwise. assert self.top_dimensionality == 256 embedded = discretization.int_to_bit_embed(inputs, 8, self.PIXEL_EMBEDDING_SIZE) # Transpose and project. transposed = common_layers.time_to_channels(embedded) return tf.layers.dense( transposed, self._body_input_depth, name="merge_pixel_embedded_frames")
def body(self, features): observations = features["inputs"] obs_shape = common_layers.shape_list(observations) x = tf.reshape(observations, [-1] + obs_shape[2:]) with tf.variable_scope("dense_bitwise"): x = discretization.int_to_bit_embed(x, 8, 32) flat_x = tf.reshape( x, [obs_shape[0], obs_shape[1], functools.reduce(operator.mul, x.shape.as_list()[1:], 1)]) x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu) x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu) logits = tf.contrib.layers.fully_connected( x, self._get_num_actions(features), activation_fn=None ) value = tf.contrib.layers.fully_connected( x, 1, activation_fn=None)[..., 0] return {"target_action": logits, "target_value": value}
def dense_bitwise_categorical_fun(action_space, config, observations): """Dense network with bitwise input and categorical output.""" del config obs_shape = common_layers.shape_list(observations) x = tf.reshape(observations, [-1] + obs_shape[2:]) with tf.variable_scope("network_parameters"): with tf.variable_scope("dense_bitwise"): x = discretization.int_to_bit_embed(x, 8, 32) flat_x = tf.reshape( x, [obs_shape[0], obs_shape[1], functools.reduce(operator.mul, x.shape.as_list()[1:], 1)]) x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu) x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu) logits = tf.contrib.layers.fully_connected(x, action_space.n, activation_fn=None) value = tf.contrib.layers.fully_connected( x, 1, activation_fn=None)[..., 0] policy = tfp.distributions.Categorical(logits=logits) return NetworkOutput(policy, value, lambda a: a)