def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict): import tensorflow.contrib.slim as slim with tf.name_scope("fc_net"): last_layer = input_dict['obs'] activation = get_activation_fn(config.get("fcnet_activation")) for i, size in enumerate(config.get("fcnet_hiddens"), 1): last_layer = slim.fully_connected( inputs=last_layer, num_outputs=size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope="fc{}".format(i), ) last_layer = tf.layers.dropout( inputs=last_layer, rate=config.get("fcnet_dropout_rate"), training=input_dict['is_training'], name="dropout{}".format(i), ) output = slim.fully_connected( inputs=last_layer, num_outputs=num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out", ) return output, last_layer
def _init(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu print("Constructing fcnet {} {}".format(hiddens, activation)) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope="fc{}".format(i)) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def _build_layers(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) activation = get_activation_fn(options.get("fcnet_activation", "tanh")) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def _init(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def _build_layers(self, inputs, num_outputs, options): with tf.name_scope("KhanElibolModel"): last_layer = layers.conv2d( inputs, 16, (4, 4), activation=tf.nn.relu) last_layer = layers.conv2d( last_layer, 32, (2, 2), activation=tf.nn.relu) last_layer = flatten(last_layer) last_layer = layers.dense( last_layer, 256, kernel_initializer=normc_initializer(0.01), activation = tf.nn.relu) output = layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation = None) return output, last_layer
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ import tensorflow.contrib.slim as slim hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def _build_layers(self, inputs, num_outputs, options): """Define the layers of a custom model. Arguments: input_dict (dict): Dictionary of input tensors, including "obs", "prev_action", "prev_reward". num_outputs (int): Output tensor must be of size [BATCH_SIZE, num_outputs]. options (dict): Model options. """ hiddens = options.get("fcnet_hiddens", Config.fcnet_hiddens) activation = get_activation_fn( options.get("fcnet_activation", Config.fcnet_activation)) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): action_mask = input_dict["obs"]["action_mask"] if num_outputs != action_mask.shape[1].value: raise ValueError( "This model assumes num outputs is equal to max avail actions", num_outputs, action_mask) # Standard FC net component. last_layer = input_dict["obs"]["obs"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) action_logits = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): mask = input_dict["obs"]["action_mask"] last_layer = input_dict["obs"]["real_obs"] hiddens = options["fcnet_hiddens"] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) action_logits = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") if num_outputs == 1: return action_logits, last_layer # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.math.log(mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation, name=label) i += 1 label = "fc_out" output = tf.layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name=label) return output, last_layer
def vf_template(last_layer, input_dict): with tf.variable_scope(self.variable_scope): with tf.variable_scope("value_function"): # Simple case: sharing the feature layer if model_config["vf_share_layers"]: return tf.reshape( linear(last_layer, 1, "value_function", normc_initializer(1.0)), [-1]) # Create a new separate model with no RNN state, etc. branch_model_config = model_config.copy() branch_model_config["free_log_std"] = False if branch_model_config["use_lstm"]: branch_model_config["use_lstm"] = False logger.warning( "It is not recommended to use a LSTM model " "with vf_share_layers=False (consider " "setting it to True). If you want to not " "share layers, you can implement a custom " "LSTM model that overrides the " "value_function() method.") branch_instance = legacy_model_cls( input_dict, obs_space, action_space, 1, branch_model_config, state_in=None, seq_lens=None) return tf.reshape(branch_instance.outputs, [-1])
def _build_layers_v2(self, input_dict, num_outputs, options): print(options) print(num_outputs) print(input_dict) action_mask = input_dict["obs"]["action_mask"] self.obs_space = Box(0, 1, shape=(3024, ), dtype=np.float32) #28*108 input_dict["obs"] = input_dict["obs"]["db"] options["fcnet_hiddens"] = [num_outputs * 2 * 2 * 2, num_outputs * 2] self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space, self.action_space, num_outputs, options) last_layer = self.fcnet.last_layer label = "fc_out2" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) inf_mask = tf.maximum(tf.math.log(action_mask), tf.float32.min) masked_logits = inf_mask + output return masked_logits, last_layer,
def value_function(self): assert self.cur_instance, "must call forward first" with self._branch_variable_scope("value_function"): # Simple case: sharing the feature layer if self.model_config["vf_share_layers"]: return tf.reshape( linear(self.cur_instance.last_layer, 1, "value_function", normc_initializer(1.0)), [-1]) # Create a new separate model with no RNN state, etc. branch_model_config = self.model_config.copy() branch_model_config["free_log_std"] = False if branch_model_config["use_lstm"]: branch_model_config["use_lstm"] = False logger.warning( "It is not recommended to use a LSTM model with " "vf_share_layers=False (consider setting it to True). " "If you want to not share layers, you can implement " "a custom LSTM model that overrides the " "value_function() method.") branch_instance = self.legacy_model_cls( self.cur_instance.input_dict, self.obs_space, self.action_space, 1, branch_model_config, state_in=None, seq_lens=None) return tf.reshape(branch_instance.outputs, [-1])
def _setup_graph(self, ob_space, ac_space): self.x = tf.placeholder(tf.float32, [None] + list(ob_space.shape)) dist_class, self.logit_dim = ModelCatalog.get_action_dist( ac_space, self.config["model"]) self._model = LSTM(self.x, self.logit_dim, {}) self.state_in = self._model.state_in self.state_out = self._model.state_out self.logits = self._model.outputs self.action_dist = dist_class(self.logits) # with tf.variable_scope("vf"): # vf_model = ModelCatalog.get_model(self.x, 1) self.vf = tf.reshape( linear(self._model.last_layer, 1, "value", normc_initializer(1.0)), [-1]) self.sample = self.action_dist.sample() self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.constant_initializer( 0, dtype=tf.int32), trainable=False)
def __init__(self, rnn_type: Type[tf.keras.layers.RNN], num_outputs: int, fcnet_hiddens: Sequence[int], fcnet_activation: str, conv_filters: Optional[Sequence[ConvLayerSpec]] = None, conv_activation: str = 'relu', lstm_cell_size: int = 256, lstm_use_prev_action_reward: bool = False, recurrent_args: Optional[Dict[str, Any]] = None, **options): super().__init__(num_outputs, fcnet_hiddens, fcnet_activation, conv_filters, conv_activation, **options) self._recurrent = True self._lstm_cell_size = lstm_cell_size self._lstm_use_prev_action_reward = lstm_use_prev_action_reward if recurrent_args is None: recurrent_args = {} self.rnn = rnn_type(lstm_cell_size, return_state=True, return_sequences=True, **recurrent_args) self.output_layer = Dense(num_outputs, kernel_initializer=normc_initializer(0.01))
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG, **config) self.config = config self.sess = tf.get_default_session() # Setup the policy self.observations = tf.placeholder( tf.float32, [None] + list(observation_space.shape)) dist_class, logit_dim = ModelCatalog.get_action_dist( action_space, self.config["model"]) self.model = ModelCatalog.get_model(self.observations, logit_dim, self.config["model"]) action_dist = dist_class(self.model.outputs) self.vf = tf.reshape( linear(self.model.last_layer, 1, "value", normc_initializer(1.0)), [-1]) self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) # Setup the policy loss if isinstance(action_space, gym.spaces.Box): ac_size = action_space.shape[0] actions = tf.placeholder(tf.float32, [None, ac_size], name="ac") elif isinstance(action_space, gym.spaces.Discrete): actions = tf.placeholder(tf.int64, [None], name="ac") else: raise UnsupportedSpaceException( "Action space {} is not supported for A3C.".format( action_space)) advantages = tf.placeholder(tf.float32, [None], name="advantages") v_target = tf.placeholder(tf.float32, [None], name="v_target") self.loss = A3CLoss(action_dist, actions, advantages, v_target, self.vf, self.config["vf_loss_coeff"], self.config["entropy_coeff"]) # Initialize TFPolicyGraph loss_in = [ ("obs", self.observations), ("actions", actions), ("advantages", advantages), ("value_targets", v_target), ] TFPolicyGraph.__init__( self, observation_space, action_space, self.sess, obs_input=self.observations, action_sampler=action_dist.sample(), loss=self.loss.total_loss, loss_inputs=loss_in, state_inputs=self.model.state_in, state_outputs=self.model.state_out, seq_lens=self.model.seq_lens, max_seq_len=self.config["model"]["max_seq_len"]) self.sess.run(tf.global_variables_initializer())
def _build_layers(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) activation = get_activation_fn(options.get("fcnet_activation", "relu")) with tf.name_scope("fc_net"): last_layer = flatten(inputs) for size in hiddens: last_layer = layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation) output = layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=None) return output, last_layer
def __init__(self, layer_units=None, activation=None, custom_params=None, vf_share_layers=False, dummy=False): """ layer_units: list, a list of the number of units of all layers except the input layer """ keras_models.Model.__init__(self) if dummy: return assert layer_units is not None and activation is not None def _get_initializer(i, n): if i < len(n) - 1: return normc_initializer(1.0) else: return normc_initializer(0.01) if not custom_params: for i, size in enumerate(layer_units): name = f"fc_{i}" layer = Dense(size, activation=(activation if i < len(layer_units) - 1 else None), kernel_initializer=_get_initializer( i, layer_units), name=name) setattr(self, name, layer) if vf_share_layers: name = f"fc_vf" layer = Dense(1, activation=None, kernel_initializer=normc_initializer(1.0), name=name) setattr(self, name, layer) else: if vf_share_layers: assert len(layer_units) == len(custom_params) - 1 else: assert len(layer_units) == len(custom_params) for i, size in enumerate(layer_units): name = f"fc_{i}" layer = Dense(custom_params=custom_params[i], activation=(activation if i < len(layer_units) - 1 else None), name=name) setattr(self, name, layer) if vf_share_layers: name = f"fc_vf" layer = Dense(custom_params=custom_params[-1], activation=None, name=name) setattr(self, name, layer) self._vf_share_layers = vf_share_layers
def _build_layers(self, inputs, num_outputs, _): with tf.name_scope("linear"): output = slim.fully_connected( inputs, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, ) return output, inputs
def _init(self, inputs, num_outputs, options): with tf.name_scope("linear"): label = "linear_out" output = slim.fully_connected( inputs, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, inputs
def _init(self, inputs, num_outputs, options): x = inputs with tf.name_scope("convnet"): for i in range(4): x = tf.nn.elu(conv2d(x, 32, "l{}".format(i+1), [3, 3], [2, 2])) r, c = x.shape[1].value, x.shape[2].value x = tf.reshape(x, [-1, r*c*32]) fc1 = linear(x, 256, "fc1") fc2 = linear(x, num_outputs, "fc2", normc_initializer(0.01)) return fc2, fc1
def value_function(self): """Builds the value function output. This method can be overridden to customize the implementation of the value function (e.g., not sharing hidden layers). Returns: Tensor of size [BATCH_SIZE] for the value function. """ return tf.reshape( linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
def _build_layers_v2(self, input_dict, num_outputs, options): last_layer = input_dict["obs"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = tf.layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=tf.nn.tanh, name=label) # Add a batch norm layer last_layer = tf.layers.batch_normalization( last_layer, training=input_dict["is_training"]) output = tf.layers.dense(last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation=None, name="fc_out") return output, last_layer
def value_function(self): """Builds the value function output. This method can be overridden to customize the implementation of the value function (e.g., not sharing hidden layers). Returns: Tensor of size [BATCH_SIZE] for the value function. """ return tf.reshape( linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw): super(MyKerasQModel, self).__init__( obs_space, action_space, num_outputs, model_config, name, **kw) # Define the core model layers which will be used by the other # output heads of DistributionalQModel self.inputs = tf.keras.layers.Input( shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( 128, name="my_layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(self.inputs) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(layer_1) self.base_model = tf.keras.Model(self.inputs, layer_out) self.register_variables(self.base_model.variables)
def _build_layers_v2(self, input_dict, num_outputs, options): cell_size = options.get("lstm_cell_size") if options.get("lstm_use_prev_action_reward"): action_dim = int( np.product( input_dict["prev_actions"].get_shape().as_list()[1:])) features = tf.concat( [ input_dict["obs"], tf.reshape( tf.cast(input_dict["prev_actions"], tf.float32), [-1, action_dim]), tf.reshape(input_dict["prev_rewards"], [-1, 1]), ], axis=1) else: features = input_dict["obs"] last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) ] # Setup LSTM inputs if self.state_in: c_in, h_in = self.state_in else: c_in = tf.placeholder( tf.float32, [None, lstm.state_size.c], name="c") h_in = tf.placeholder( tf.float32, [None, lstm.state_size.h], name="h") self.state_in = [c_in, h_in] # Setup LSTM outputs state_in = rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn( lstm, last_layer, initial_state=state_in, sequence_length=self.seq_lens, time_major=False, dtype=tf.float32) self.state_out = list(lstm_state) # Compute outputs last_layer = tf.reshape(lstm_out, [-1, cell_size]) logits = linear(last_layer, num_outputs, "action", normc_initializer(0.01)) return logits, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): last_layer = input_dict["obs"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) # Add a batch norm layer last_layer = tf.layers.batch_normalization( last_layer, training=input_dict["is_training"]) output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): cell_size = options.get("lstm_cell_size") if options.get("lstm_use_prev_action_reward"): action_dim = int( np.product( input_dict["prev_actions"].get_shape().as_list()[1:])) features = tf.concat( [ input_dict["obs"], tf.reshape( tf.cast(input_dict["prev_actions"], tf.float32), [-1, action_dim]), tf.reshape(input_dict["prev_rewards"], [-1, 1]), ], axis=1) else: features = input_dict["obs"] last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) ] # Setup LSTM inputs if self.state_in: c_in, h_in = self.state_in else: c_in = tf.placeholder( tf.float32, [None, lstm.state_size.c], name="c") h_in = tf.placeholder( tf.float32, [None, lstm.state_size.h], name="h") self.state_in = [c_in, h_in] # Setup LSTM outputs state_in = rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn( lstm, last_layer, initial_state=state_in, sequence_length=self.seq_lens, time_major=False, dtype=tf.float32) self.state_out = list(lstm_state) # Compute outputs last_layer = tf.reshape(lstm_out, [-1, cell_size]) logits = linear(last_layer, num_outputs, "action", normc_initializer(0.01)) return logits, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] smoothed_rews = None if isinstance(inputs, list): smoothed_rews = inputs[1] inputs = inputs[0] hiddens = [32, 32] with tf.name_scope("custom_net"): inputs = slim.conv2d(inputs, 6, [3, 3], 1, activation_fn=tf.nn.relu, scope="conv") last_layer = flatten(inputs) i = 1 for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu, scope=label) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") if smoothed_rews is not None: output = tf.concat([output, smoothed_rews], axis=-1) return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): # Extract the available actions tensor from the observation. avail_actions = input_dict["obs"]["avail_actions"] action_mask = input_dict["obs"]["action_mask"] action_embed_size = avail_actions.shape[2].value if num_outputs != avail_actions.shape[1].value: raise ValueError( "This model assumes num outputs is equal to max avail actions", num_outputs, avail_actions) # Standard FC net component. last_layer = input_dict["obs"]["cart"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) output = slim.fully_connected( last_layer, action_embed_size, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE]. intent_vector = tf.expand_dims(output, 1) # Batch dot product => shape of logits is [BATCH, MAX_ACTIONS]. action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2) # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): # Extract the available actions tensor from the observation. avail_actions = input_dict["obs"]["avail_actions"] action_mask = input_dict["obs"]["action_mask"] action_embed_size = avail_actions.shape[2].value if num_outputs != avail_actions.shape[1].value: raise ValueError( "This model assumes num outputs is equal to max avail actions", num_outputs, avail_actions) # Standard FC net component. last_layer = input_dict["obs"]["cart"] hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) output = slim.fully_connected( last_layer, action_embed_size, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE]. intent_vector = tf.expand_dims(output, 1) # Batch dot product => shape of logits is [BATCH, MAX_ACTIONS]. action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2) # Mask out invalid actions (use tf.float32.min for stability) inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min) masked_logits = inf_mask + action_logits return masked_logits, last_layer
def __init__(self, num_outputs: int, fcnet_hiddens: Sequence[int], fcnet_activation: str, conv_filters: Optional[Sequence[ConvLayerSpec]] = None, conv_activation: str = 'relu', **options): super().__init__() self._num_outputs = num_outputs self._fcnet_hiddens = fcnet_hiddens self._fcnet_activation = fcnet_activation self._use_conv = conv_filters is not None self._conv_filters = conv_filters self._conv_activation = conv_activation self._recurrent = False self._options = options if conv_filters is not None: filters, kernel_size, strides = list(zip(*conv_filters)) self.conv_layer = Conv2DStack( filters, kernel_size, strides, padding='valid', activation=conv_activation, flatten_output=True) self.dense_layer = DenseStack( fcnet_hiddens, kernel_initializer=normc_initializer(1.0), activation=fcnet_activation, output_activation=fcnet_activation) # WARNING: DO NOT CHANGE KERNEL INITIALIZER!!! # PPO/Gradient based methods are extremely senstive to this and will break # Don't alter this unless you're sure you know what you're doing. self.output_layer = Dense( num_outputs, kernel_initializer=normc_initializer(0.01))
def __init__(self, obs_space, action_space, num_outputs, model_config, name): super(MyKerasModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") layer_1 = tf.keras.layers.Dense( 128, name="my_layer1", activation=tf.nn.relu, kernel_initializer=normc_initializer(1.0))(self.inputs) layer_out = tf.keras.layers.Dense( num_outputs, name="my_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_1) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(layer_1) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) self.register_variables(self.base_model.variables)
def _build_layers_v2(self, input_dict, num_outputs, options): action_mask = input_dict["obs"]["action_mask"] obs = input_dict["obs"]["real_obs"] # Standard FC net component. last_layer = obs hiddens = [20, 20, 15, 15, 10, 10, 9, 9] # hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.tanh, scope=label) output = slim.fully_connected( last_layer, ACTIONS, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min) ouput_mask = output+inf_mask return ouput_mask, last_layer
def _setup_graph(self, ob_space, ac_space): self.x = tf.placeholder(tf.float32, [None] + list(ob_space)) dist_class, self.logit_dim = ModelCatalog.get_action_dist(ac_space) self._model = ModelCatalog.get_model( self.registry, self.x, self.logit_dim, self.config["model"]) self.logits = self._model.outputs self.curr_dist = dist_class(self.logits) self.vf = tf.reshape(linear(self._model.last_layer, 1, "value", normc_initializer(1.0)), [-1]) self.sample = self.curr_dist.sample() self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) self.global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False)
def _init(self, inputs, num_outputs, options): use_tf100_api = (distutils.version.LooseVersion(tf.VERSION) >= distutils.version.LooseVersion("1.0.0")) self.x = x = inputs for i in range(4): x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) # Introduce a "fake" batch dimension of 1 after flatten so that we can # do LSTM over the time dim. x = tf.expand_dims(flatten(x), [0]) size = 256 if use_tf100_api: lstm = rnn.BasicLSTMCell(size, state_is_tuple=True) else: lstm = rnn.rnn_cell.BasicLSTMCell(size, state_is_tuple=True) step_size = tf.shape(self.x)[:1] c_init = np.zeros((1, lstm.state_size.c), np.float32) h_init = np.zeros((1, lstm.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h]) self.state_in = [c_in, h_in] if use_tf100_api: state_in = rnn.LSTMStateTuple(c_in, h_in) else: state_in = rnn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn(lstm, x, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state x = tf.reshape(lstm_out, [-1, size]) logits = linear(x, num_outputs, "action", normc_initializer(0.01)) self.state_out = [lstm_c[:1, :], lstm_h[:1, :]] return logits, x
def _build_layers(self, inputs, num_outputs, options): # Parse options image_shape = options["custom_options"]["image_shape"] convs = options.get("conv_filters", [ [16, [8, 8], 4], [32, [5, 5], 3], [32, [5, 5], 2], [512, [10, 10], 1], ]) hiddens = options.get("fcnet_hiddens", [64]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu # Sanity checks image_size = np.product(image_shape) expected_shape = [image_size + 5 + 2] assert inputs.shape.as_list()[1:] == expected_shape, \ (inputs.shape.as_list()[1:], expected_shape) # Reshape the input vector back into its components vision_in = tf.reshape(inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape) metrics_in = inputs[:, image_size:] print("Vision in shape", vision_in) print("Metrics in shape", metrics_in) # Setup vision layers with tf.name_scope("carla_vision"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): vision_in = slim.conv2d( vision_in, out_size, kernel, stride, scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] vision_in = slim.conv2d( vision_in, out_size, kernel, stride, padding="VALID", scope="conv_out") vision_in = tf.squeeze(vision_in, [1, 2]) # Setup metrics layer with tf.name_scope("carla_metrics"): metrics_in = slim.fully_connected( metrics_in, 64, weights_initializer=xavier_initializer(), activation_fn=activation, scope="metrics_out") print("Shape of vision out is", vision_in.shape) print("Shape of metric out is", metrics_in.shape) # Combine the metrics and vision inputs with tf.name_scope("carla_out"): i = 1 last_layer = tf.concat([vision_in, metrics_in], axis=1) print("Shape of concatenated out is", last_layer.shape) for size in hiddens: last_layer = slim.fully_connected( last_layer, size, weights_initializer=xavier_initializer(), activation_fn=activation, scope="fc{}".format(i)) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): def spy(sequences, state_in, state_out, seq_lens): if len(sequences) == 1: return 0 # don't capture inference inputs # TF runs this function in an isolated context, so we have to use # redis to communicate back to our suite ray.experimental.internal_kv._internal_kv_put( "rnn_spy_in_{}".format(RNNSpyModel.capture_index), pickle.dumps({ "sequences": sequences, "state_in": state_in, "state_out": state_out, "seq_lens": seq_lens }), overwrite=True) RNNSpyModel.capture_index += 1 return 0 features = input_dict["obs"] cell_size = 3 last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) ] # Setup LSTM inputs if self.state_in: c_in, h_in = self.state_in else: c_in = tf.placeholder( tf.float32, [None, lstm.state_size.c], name="c") h_in = tf.placeholder( tf.float32, [None, lstm.state_size.h], name="h") self.state_in = [c_in, h_in] # Setup LSTM outputs state_in = rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn( lstm, last_layer, initial_state=state_in, sequence_length=self.seq_lens, time_major=False, dtype=tf.float32) self.state_out = list(lstm_state) spy_fn = tf.py_func( spy, [ last_layer, self.state_in, self.state_out, self.seq_lens, ], tf.int64, stateful=True) # Compute outputs with tf.control_dependencies([spy_fn]): last_layer = tf.reshape(lstm_out, [-1, cell_size]) logits = linear(last_layer, num_outputs, "action", normc_initializer(0.01)) return logits, last_layer