示例#1
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        mask = input_dict["obs"]["action_mask"]

        last_layer = input_dict["obs"]["real_obs"]
        hiddens = options["fcnet_hiddens"]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = tf.layers.dense(
                last_layer,
                size,
                kernel_initializer=normc_initializer(1.0),
                activation=tf.nn.relu,
                name=label)
        action_logits = tf.layers.dense(
            last_layer,
            num_outputs,
            kernel_initializer=normc_initializer(0.01),
            activation=None,
            name="fc_out")

        if num_outputs == 1:
            return action_logits, last_layer

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.log(mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
示例#2
0
    def create_inverse_model(self, model_config, encoder):
        """
        Create the inverse submodel of the SCM.
        Inputs:[Encoded state at t,
                Encoded state at t - 1,
                Actions at t - 1,
                MOA LSTM output at t - 1]
        Output: Predicted social influence reward at t - 1
        :param model_config: The model config dict.
        :param encoder: The SCM encoder submodel.
        :return: A new inverse model.
        """
        encoder_output_size = encoder.output_shape[-1]
        inputs = [
            self.create_encoded_input_layer(encoder_output_size, "encoded_input_now"),
            self.create_encoded_input_layer(encoder_output_size, "encoded_input_next"),
            self.create_action_input_layer(self.action_space.n, self.num_other_agents + 1),
            self.create_lstm_input_layer(model_config),
        ]
        inputs_concatenated = tf.keras.layers.concatenate(inputs)
        activation = get_activation_fn(model_config.get("fcnet_activation"))

        fc_layer = tf.keras.layers.Dense(
            32, name="fc_forward", activation=activation, kernel_initializer=normc_initializer(1.0),
        )(inputs_concatenated)

        output_layer = tf.keras.layers.Dense(
            1, activation="relu", kernel_initializer=normc_initializer(1.0),
        )(fc_layer)

        return tf.keras.Model(inputs, output_layer, name="SCM_Inverse_Model")
示例#3
0
    def _build_layers_v2(self, parameters, outs, args):

        obs_real_obs = parameters["obs"]["real_obs"]
        fcnet_hiddens = args["fcnet_hiddens"]
        obs_action_mask = parameters["obs"]["action_mask"]

        for i, size in enumerate(fcnet_hiddens):
            label = "fc{}".format(i)
            obs_real_obs = slim.fully_connected(
                obs_real_obs,
                size,
                weights_initializer=normc_initializer(1.0),
                activation_fn=tf.nn.tanh,
                scope=label)

        action_logits = slim.fully_connected(
            obs_real_obs,
            outs,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")

        if outs == 1:
            return action_logits, obs_real_obs

        mask = tf.maximum(tf.log(obs_action_mask), tf.float32.min)
        logits = mask + action_logits

        return logits, obs_real_obs
示例#4
0
文件: model.py 项目: oxwhirl/smac
    def _build_layers_v2(self, input_dict, num_outputs, options):
        action_mask = input_dict["obs"]["action_mask"]
        if num_outputs != action_mask.shape[1].value:
            raise ValueError(
                "This model assumes num outputs is equal to max avail actions",
                num_outputs,
                action_mask,
            )

        # Standard fully connected network
        last_layer = input_dict["obs"]["obs"]
        hiddens = options.get("fcnet_hiddens")
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = tf.layers.dense(
                last_layer,
                size,
                kernel_initializer=normc_initializer(1.0),
                activation=tf.nn.tanh,
                name=label,
            )
        action_logits = tf.layers.dense(
            last_layer,
            num_outputs,
            kernel_initializer=normc_initializer(0.01),
            activation=None,
            name="fc_out",
        )

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
示例#5
0
    def _build_layers_v2(self, input_dict, num_outputs, options):

        inputs = input_dict["obs"]

        hiddens = [32, 32]
        with tf.name_scope("custom_net"):
            inputs = slim.conv2d(inputs,
                                 6, [3, 3],
                                 1,
                                 activation_fn=tf.nn.relu,
                                 scope="conv")
            last_layer = flatten(inputs)
            i = 1
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=tf.nn.relu,
                    scope=label)
                i += 1
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out")
            return output, last_layer
示例#6
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)
        inputs = tf.keras.layers.Input(shape=obs_space.shape, name="inputs")
        is_training = tf.keras.layers.Input(shape=(),
                                            dtype=tf.bool,
                                            batch_size=1,
                                            name="is_training")
        last_layer = inputs
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = tf.keras.layers.Dense(
                units=size,
                kernel_initializer=normc_initializer(1.0),
                activation=tf.nn.tanh,
                name=label)(last_layer)
            # Add a batch norm layer
            last_layer = tf.keras.layers.BatchNormalization()(
                last_layer, training=is_training[0])
        output = tf.keras.layers.Dense(
            units=self.num_outputs,
            kernel_initializer=normc_initializer(0.01),
            activation=None,
            name="fc_out")(last_layer)
        value_out = tf.keras.layers.Dense(
            units=1,
            kernel_initializer=normc_initializer(0.01),
            activation=None,
            name="value_out")(last_layer)

        self.base_model = tf.keras.models.Model(inputs=[inputs, is_training],
                                                outputs=[output, value_out])
        self.register_variables(self.base_model.variables)
示例#7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kw):
        dropout_rate = 0.2
        num_outputs = 5
        hidden_dim = 10
        tf = try_import_tf()
        super(DQNModel, self).__init__(obs_space, action_space, num_outputs,
                                       model_config, name, **kw)
        # Define the core model layers which will be used by the other
        # output heads of DistributionalQModel
        self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                            name="observations")
        layer_0 = tf.keras.layers.Dropout(rate=dropout_rate,
                                          name="my_layer0")(self.inputs)
        layer_1 = tf.keras.layers.Dense(
            hidden_dim,
            name="my_layer1",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_0)
        layer_2 = tf.keras.layers.Dropout(rate=dropout_rate,
                                          name="my_layer2")(layer_1)

        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_2)

        self.base_model = tf.keras.Model(inputs=self.inputs, outputs=layer_out)
        self.register_variables(self.base_model.variables)
示例#8
0
    def __init__(
        self,
        obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):
        super(ConvFCNet, self).__init__(obs_space, action_space, num_outputs,
                                        model_config, name)

        inputs_conv = tf.keras.layers.Input(shape=(11, 11, 9), )
        inputs_dense = tf.keras.layers.Input(shape=(1260 - 11 * 11 * 9, ), )
        feats = Encoder(128)((inputs_conv, inputs_dense))

        logits_out = tf.keras.layers.Dense(
            num_outputs,
            name="fc_out",
            kernel_initializer=normc_initializer(0.01),
        )(feats)
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            kernel_initializer=normc_initializer(0.01),
        )(feats)

        self.base_model = tf.keras.Model(
            inputs=[inputs_conv, inputs_dense],
            outputs=[logits_out, value_out],
        )
        print(self.base_model.summary())
        self.register_variables(self.base_model.variables)

        self._value_out = None
示例#9
0
    def _build_value_model(self, model_config: ModelConfigDict):
        """Build value model with given model configuration
        model_config = {'activation': str, 'hiddens': Sequence}
        """
        activation = get_activation_fn(model_config.get("activation"))
        hiddens = model_config.get("hiddens", [])
        inputs = tf.keras.layers.Input(
            shape=(np.product(self.critic_preprocessor.shape),), name="value-inputs"
        )

        last_layer = inputs
        for i, size in enumerate(hiddens):
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0),
            )(last_layer)

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01),
        )(last_layer)

        return tf.keras.Model(inputs, [value_out])
示例#10
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     super(MyKerasModel, self).__init__(obs_space, action_space,
                                        num_outputs, model_config, name)
     self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                         name="observations")
     layer_1 = tf.keras.layers.Dense(
         16,
         name="layer1",
         activation=tf.nn.relu,
         kernel_initializer=normc_initializer(1.0),
     )(self.inputs)
     layer_out = tf.keras.layers.Dense(
         num_outputs,
         name="out",
         activation=None,
         kernel_initializer=normc_initializer(0.01),
     )(layer_1)
     if self.model_config["vf_share_layers"]:
         value_out = tf.keras.layers.Dense(
             1,
             name="value",
             activation=None,
             kernel_initializer=normc_initializer(0.01),
         )(layer_1)
         self.base_model = tf.keras.Model(self.inputs,
                                          [layer_out, value_out])
     else:
         self.base_model = tf.keras.Model(self.inputs, layer_out)
示例#11
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     super(TestKerasModel, self).__init__(obs_space, action_space,
                                          num_outputs, model_config, name)
     self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                         name="observations")
     layer_1 = tf.keras.layers.Dense(
         256,
         name="my_layer1",
         activation=tf.nn.relu,
         kernel_initializer=normc_initializer(1.0))(self.inputs)
     layer_2 = tf.keras.layers.Dense(
         256,
         name="my_layer2",
         activation=tf.nn.relu,
         kernel_initializer=normc_initializer(1.0))(layer_1)
     layer_out = tf.keras.layers.Dense(
         num_outputs,
         name="my_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_2)
     value_out = tf.keras.layers.Dense(
         1,
         name="value_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_2)
     self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
示例#12
0
 def __init__(self,
              obs_space,
              action_space,
              num_outputs,
              model_config,
              name="my_model"):
     super(MLPModelV2, self).__init__(obs_space, action_space, num_outputs,
                                      model_config, name)
     # Simplified to one layer.
     input_layer = tf.keras.layers.Input(obs_space.shape,
                                         dtype=obs_space.dtype)
     layer_1 = tf.keras.layers.Dense(
         400, activation="relu",
         kernel_initializer=normc_initializer(1.0))(input_layer)
     layer_2 = tf.keras.layers.Dense(
         300, activation="relu",
         kernel_initializer=normc_initializer(1.0))(layer_1)
     output = tf.keras.layers.Dense(
         num_outputs,
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_2)
     value_out = tf.keras.layers.Dense(
         1,
         activation=None,
         name="value_out",
         kernel_initializer=normc_initializer(0.01))(layer_2)
     self.base_model = tf.keras.Model(input_layer, [output, value_out])
     self.register_variables(self.base_model.variables)
示例#13
0
    def forward(self, input_dict, state, seq_lens):
        last_layer = input_dict["obs"]
        hiddens = [256, 256]
        with tf1.variable_scope("model", reuse=tf1.AUTO_REUSE):
            if isinstance(input_dict, SampleBatch):
                is_training = input_dict.is_training
            else:
                is_training = input_dict["is_training"]
            for i, size in enumerate(hiddens):
                last_layer = tf1.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=tf.nn.tanh,
                    name="fc{}".format(i),
                )
                # Add a batch norm layer
                last_layer = tf1.layers.batch_normalization(
                    last_layer, training=is_training, name="bn_{}".format(i))

            output = tf1.layers.dense(
                last_layer,
                self.num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="out",
            )
            self._value_out = tf1.layers.dense(
                last_layer,
                1,
                kernel_initializer=normc_initializer(1.0),
                activation=None,
                name="vf",
            )

        # Register variables.
        # NOTE: This is not the recommended way of doing things. We would
        # prefer creating keras-style Layers like it's done in the
        # `KerasBatchNormModel` class above and then have TFModelV2 auto-detect
        # the created vars. However, since there is a bug
        # in keras/tf that prevents us from using that KerasBatchNormModel
        # example (see comments above), we do variable registration the old,
        # manual way for this example Model here.
        if not self._registered:
            # Register already auto-detected variables (from the wrapping
            # Model, e.g. DQNTFModel).
            self.register_variables(self.variables())
            # Then register everything we added to the graph in this `forward`
            # call.
            self.register_variables(
                tf1.get_collection(tf1.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=".+/model/.+"))
            self._registered = True

        return output, []
示例#14
0
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
        hiddens_size=128,
        cell_size=128,
    ):
        super(RNNModel, self).__init__(obs_space, action_space, num_outputs,
                                       model_config, name)
        self.cell_size = cell_size

        input_layer = tf.keras.layers.Input(shape=(None, obs_space.shape[0]),
                                            name="inputs")
        state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
        state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        dense1 = DenseLayer(hiddens_size)(input_layer)
        dense2 = DenseLayer(hiddens_size)(dense1)
        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            cell_size,
            return_sequences=True,
            return_state=True,
            name="lstm",
        )(
            inputs=dense2,
            mask=tf.sequence_mask(seq_in),
            initial_state=[state_in_h, state_in_c],
        )
        lstm_out = tf.keras.layers.LayerNormalization()(lstm_out)

        logits = tf.keras.layers.Dense(
            self.num_outputs,
            name="logits",
            kernel_initializer=normc_initializer(0.01),
        )(lstm_out)

        values = tf.keras.layers.Dense(
            1,
            activation=None,
            name="values",
            kernel_initializer=normc_initializer(0.01),
        )(lstm_out)

        # Create the RNN model
        self.rnn_model = tf.keras.Model(
            inputs=[input_layer, seq_in, state_in_h, state_in_c],
            outputs=[logits, values, state_h, state_c],
        )
        self.register_variables(self.rnn_model.variables)
示例#15
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name, **kwargs)

        conv_filters = model_config['conv_filters']
        self.is_conv = bool(conv_filters)
        orig_shape = obs_space.original_space['board']
        new_shape = orig_shape.shape + (1, ) if self.is_conv else (np.prod(
            orig_shape.shape), )

        self.inputs = tf.keras.layers.Input(shape=new_shape,
                                            name='observations')
        last_layer = self.inputs

        if self.is_conv:
            conv_activation = get_activation_fn(
                model_config['conv_activation'])
            for i, (filters, kernel_size,
                    stride) in enumerate(conv_filters, 1):
                last_layer = tf.keras.layers.Conv2D(filters,
                                                    kernel_size,
                                                    stride,
                                                    name="conv{}".format(i),
                                                    activation=conv_activation,
                                                    padding='same')(last_layer)
            last_layer = tf.keras.layers.Flatten()(last_layer)

        fc_activation = get_activation_fn(model_config['fcnet_activation'])
        for i, size in enumerate(model_config['fcnet_hiddens'], 1):
            last_layer = tf.keras.layers.Dense(
                size,
                name='fc{}'.format(i),
                activation=fc_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)

        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)
        self._value_out = None
示例#16
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """
        # Soft deprecate this class. All Models should use the ModelV2
        # API from here on.
        deprecation_warning("Model->FullyConnectedNetwork",
                            "ModelV2->FullyConnectedNetwork",
                            error=False)

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        if len(inputs.shape) > 2:
            inputs = tf.layers.flatten(inputs)

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                # skip final linear layer
                if options.get("no_final_linear") and i == len(hiddens):
                    output = tf.layers.dense(
                        last_layer,
                        num_outputs,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation,
                        name="fc_out")
                    return output, output

                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1

            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="fc_out")
            return output, last_layer
示例#17
0
 def __init__(self, dim: int, **kwargs):
     super().__init__(**kwargs)
     self.dense = tf.keras.layers.Dense(
         dim,
         kernel_initializer=normc_initializer(1.0),
     )
     self.norm = tf.keras.layers.LayerNormalization()
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        # TODO: (sven) Support Dicts as well.
        assert isinstance(obs_space.original_space, (Tuple)), \
            "`obs_space.original_space` must be Tuple!"

        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)

        # Build the CNN(s) given obs_space's image components.
        self.cnns = {}
        concat_size = 0
        for i, component in enumerate(obs_space.original_space):
            # Image space.
            if len(component.shape) == 3:
                config = {
                    "conv_filters":
                    model_config.get("conv_filters",
                                     get_filter_config(component.shape)),
                    "conv_activation":
                    model_config.get("conv_activation"),
                }
                cnn = ModelCatalog.get_model_v2(component,
                                                action_space,
                                                num_outputs=None,
                                                model_config=config,
                                                framework="tf",
                                                name="cnn_{}".format(i))
                concat_size += cnn.num_outputs
                self.cnns[i] = cnn
            # Discrete inputs -> One-hot encode.
            elif isinstance(component, Discrete):
                concat_size += component.n
            # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers).
            # Everything else (1D Box).
            else:
                assert len(component.shape) == 1, \
                    "Only input Box 1D or 3D spaces allowed!"
                concat_size += component.shape[-1]

        self.logits_and_value_model = None
        self._value_out = None
        if num_outputs:
            # Action-distribution head.
            concat_layer = tf.keras.layers.Input((concat_size, ))
            logits_layer = tf.keras.layers.Dense(
                num_outputs,
                activation=tf.keras.activations.linear,
                name="logits")(concat_layer)

            # Create the value branch model.
            value_layer = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(concat_layer)
            self.logits_and_value_model = tf.keras.models.Model(
                concat_layer, [logits_layer, value_layer])
        else:
            self.num_outputs = concat_size
示例#19
0
        def value_function(self):
            assert self.cur_instance, "must call forward first"

            with self._branch_variable_scope("value_function"):
                # Simple case: sharing the feature layer
                if self.model_config["vf_share_layers"]:
                    return tf.reshape(
                        linear(self.cur_instance.last_layer, 1,
                               "value_function", normc_initializer(1.0)), [-1])

                # Create a new separate model with no RNN state, etc.
                branch_model_config = self.model_config.copy()
                branch_model_config["free_log_std"] = False
                if branch_model_config["use_lstm"]:
                    branch_model_config["use_lstm"] = False
                    logger.warning(
                        "It is not recommended to use a LSTM model with "
                        "vf_share_layers=False (consider setting it to True). "
                        "If you want to not share layers, you can implement "
                        "a custom LSTM model that overrides the "
                        "value_function() method.")
                branch_instance = self.legacy_model_cls(
                    self.cur_instance.input_dict,
                    self.obs_space,
                    self.action_space,
                    1,
                    branch_model_config,
                    state_in=None,
                    seq_lens=None)
                return tf.reshape(branch_instance.outputs, [-1])
示例#20
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Hard deprecate this class. All Models should use the ModelV2
        # API from here on.
        deprecation_warning("Model->LSTM", "RecurrentNetwork", error=False)

        cell_size = options.get("lstm_cell_size")
        if options.get("lstm_use_prev_action_reward"):
            action_dim = int(
                np.product(
                    input_dict["prev_actions"].get_shape().as_list()[1:]))
            features = tf.concat(
                [
                    input_dict["obs"],
                    tf.reshape(
                        tf.cast(input_dict["prev_actions"], tf.float32),
                        [-1, action_dim]),
                    tf.reshape(input_dict["prev_rewards"], [-1, 1]),
                ],
                axis=1)
        else:
            features = input_dict["obs"]
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = tf1.nn.rnn_cell.LSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf1.placeholder(
                tf.float32, [None, lstm.state_size.c], name="c")
            h_in = tf1.placeholder(
                tf.float32, [None, lstm.state_size.h], name="h")
            self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = tf1.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf1.nn.dynamic_rnn(
            lstm,
            last_layer,
            initial_state=state_in,
            sequence_length=self.seq_lens,
            time_major=False,
            dtype=tf.float32)

        self.state_out = list(lstm_state)

        # Compute outputs
        last_layer = tf.reshape(lstm_out, [-1, cell_size])
        logits = linear(last_layer, num_outputs, "action",
                        normc_initializer(0.01))
        return logits, last_layer
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name="atari_model"):
     super(AtariModel, self).__init__(obs_space, action_space, num_outputs, model_config,
                      name)
     inputs  = tf.keras.layers.Input(shape=(84,84,4), name='observations')
     inputs2 = tf.keras.layers.Input(shape=(2,), name="agent_indicator")
     # Convolutions on the frames on the screen
     layer1 = tf.keras.layers.Conv2D(
             32,
             [8, 8],
             strides=(4, 4),
             activation="relu",
             data_format='channels_last')(inputs)
     layer2 = tf.keras.layers.Conv2D(
             64,
             [4, 4],
             strides=(2, 2),
             activation="relu",
             data_format='channels_last')(layer1)
     layer3 = tf.keras.layers.Conv2D(
             64,
             [3, 3],
             strides=(1, 1),
             activation="relu",
             data_format='channels_last')(layer2)
     layer4 = tf.keras.layers.Flatten()(layer3)
     concat_layer = tf.keras.layers.Concatenate()([layer4, inputs2])
     layer5 = tf.keras.layers.Dense(
             512,
             activation="relu",
             kernel_initializer=normc_initializer(1.0))(concat_layer)
     action = tf.keras.layers.Dense(
             num_outputs,
             activation="linear",
             name="actions",
             kernel_initializer=normc_initializer(0.01))(layer5)
     value_out = tf.keras.layers.Dense(
             1,
             activation=None,
             name="value_out",
             kernel_initializer=normc_initializer(0.01))(layer5)
     self.base_model = tf.keras.Model([inputs, inputs2], [action, value_out])
     self.register_variables(self.base_model.variables)
示例#22
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        if len(inputs.shape) > 2:
            inputs = tf.layers.flatten(inputs)

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                # skip final linear layer
                if options.get("no_final_linear") and i == len(hiddens):
                    output = tf.layers.dense(
                        last_layer,
                        num_outputs,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation,
                        name="fc_out")
                    return output, output

                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1

            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="fc_out")
            return output, last_layer
示例#23
0
文件: mlp_net.py 项目: ntalele/seagul
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     print(obs_space)
     super(Linear, self).__init__(obs_space, action_space, num_outputs,
                                  model_config, name)
     self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                         name="observations")
     layer_out = tf.keras.layers.Dense(
         num_outputs,
         name="my_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(self.inputs)
     value_out = tf.keras.layers.Dense(
         1,
         name="value_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(self.inputs)
     self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
     self.register_variables(self.base_model.variables)
示例#24
0
        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            # for i, (out_size, kernel, stride) in enumerate(cnn_filters):
            #     _last_layer = maybe_td(tf.keras.layers.Conv2D(
            #         filters=out_size,
            #         kernel_size=kernel,
            #         strides=stride,
            #         activation=conv_activation,
            #         padding="same",
            #         name="{}_conv_{}".format(prefix, i)))(_last_layer)
            #
            state_out = state_in
            # if self.use_lstm and not self.fake_lstm:
            #     for i, (out_size, kernel, stride) in enumerate(lstm_filters):
            #         if i > 0:
            #             raise NotImplementedError("Only single lstm layers are implemented right now")
            #
            #         _last_layer, *state_out = tf.keras.layers.ConvLSTM2D(
            #             filters=out_size,
            #             kernel_size=kernel,
            #             strides=stride,
            #             activation=conv_activation,
            #             padding="same",
            #             return_sequences=True,
            #             return_state=True,
            #             name="{}_convlstm".format(prefix))(
            #             inputs=_last_layer,
            #             mask=tf.sequence_mask(seq_lens_in),
            #             initial_state=state_in)

            for i, size in enumerate(model_config['fcnet_hiddens']):
                _last_layer = maybe_td(
                    tf.keras.layers.Dense(size,
                                          name="{}_fc_{}".format(prefix, i),
                                          activation=conv_activation,
                                          kernel_initializer=normc_initializer(
                                              1.0)))(_last_layer)

            # state_out = state_in
            # if self.use_lstm:
            #     _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer)
            #     _last_layer, *state_out = tf.keras.layers.LSTM(
            #         units=64,
            #         return_sequences=True,
            #         return_state=True,
            #         name="{}_lstm".format(prefix))(
            #         inputs=_last_layer,
            #         mask=tf.sequence_mask(seq_lens_in),
            #         initial_state=state_in)

            return _last_layer, state_out
示例#25
0
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
        cell_size=64,
    ):
        """
        Create a LSTM with an actor-critic output: an output head with size num_outputs for the
        policy, and an output head of size 1 for the value function.
        :param obs_space: The size of the previous layer.
        :param action_space: The amount of actions available to the agent.
        :param num_outputs: The amount of actions available to the agent.
        :param model_config: The config dict for the model, unused.
        :param name: The name of the model.
        :param cell_size: The amount of LSTM units.
        """
        super(ActorCriticLSTM, self).__init__(obs_space, action_space,
                                              num_outputs, model_config, name)

        self.cell_size = cell_size

        input_layer = tf.keras.layers.Input(shape=(None, obs_space),
                                            name="inputs")

        state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
        state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            cell_size, return_sequences=True, return_state=True, name="lstm")(
                inputs=input_layer,
                mask=tf.sequence_mask(seq_in),
                initial_state=[state_in_h, state_in_c],
            )

        # Postprocess LSTM output with another hidden layer and compute values
        logits = tf.keras.layers.Dense(self.num_outputs,
                                       activation=tf.keras.activations.linear,
                                       name=name)(lstm_out)

        inputs = [input_layer, seq_in, state_in_h, state_in_c]
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01),
        )(lstm_out)
        outputs = [logits, value_out, state_h, state_c]

        self.rnn_model = tf.keras.Model(inputs=inputs,
                                        outputs=outputs,
                                        name="Actor_Critic_Model")
示例#26
0
文件: model.py 项目: xiaming9880/ray
    def value_function(self):
        """Builds the value function output.

        This method can be overridden to customize the implementation of the
        value function (e.g., not sharing hidden layers).

        Returns:
            Tensor of size [BATCH_SIZE] for the value function.
        """
        return tf.reshape(
            linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
 def _build_layers_v2(self, input_dict, num_outputs, options):
     last_layer = input_dict["obs"]
     hiddens = [256, 256]
     for i, size in enumerate(hiddens):
         label = "fc{}".format(i)
         last_layer = tf.layers.dense(
             last_layer,
             size,
             kernel_initializer=normc_initializer(1.0),
             activation=tf.nn.tanh,
             name=label)
         # Add a batch norm layer
         last_layer = tf.layers.batch_normalization(
             last_layer, training=input_dict["is_training"])
     output = tf.layers.dense(last_layer,
                              num_outputs,
                              kernel_initializer=normc_initializer(0.01),
                              activation=None,
                              name="fc_out")
     return output, last_layer
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(MyModel, self).__init__(obs_space, action_space, num_outputs,
                                      model_config, name)
        self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                            name="observations")
        activation = tf.nn.tanh
        last_layer = layer_out = self.inputs
        i = 1
        hiddens = [256, 256, 256]
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1
        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="fc_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        # build a parallel set of hidden layers for the value net
        last_layer = self.inputs
        i = 1
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_value_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)
        self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)
示例#29
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kw):
        super(MyKerasQModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name, **kw)

        # Define the core model layers which will be used by the other
        # output heads of DistributionalQModel
        self.inputs = tf.keras.layers.Input(
            shape=obs_space.shape, name="observations")
        layer_1 = tf.keras.layers.Dense(
            128,
            name="my_layer1",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(self.inputs)
        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_1)
        self.base_model = tf.keras.Model(self.inputs, layer_out)
        self.register_variables(self.base_model.variables)
示例#30
0
    def init(self):
        _board = tf.keras.layers.Input(shape=[11, 11, 4], name="board")
        _attribute = tf.keras.layers.Input(shape=[4], name="attribute")

        net = _board
        net = tf.keras.layers.Conv2D(32, 5, strides=2, padding="same", activation=tf.nn.relu,
                                     kernel_initializer=normc_initializer(0.01))(net)
        net = tf.keras.layers.BatchNormalization()(net)

        net = tf.keras.layers.Conv2D(64, 3, strides=1, padding="valid", activation=tf.nn.relu,
                                     kernel_initializer=normc_initializer(0.01))(net)
        net = tf.keras.layers.BatchNormalization()(net)

        net = tf.keras.layers.Conv2D(128, 3, strides=1, padding="valid", activation=tf.nn.relu,
                                     kernel_initializer=normc_initializer(0.01))(net)
        net = tf.keras.layers.BatchNormalization()(net)
        net = tf.keras.layers.Conv2D(128, 2, strides=1, padding="valid", activation=tf.nn.relu,
                                     kernel_initializer=normc_initializer(0.01))(net)
        net = tf.keras.layers.BatchNormalization()(net)
        net = tf.reshape(net, (-1, net.shape[-1]))

        net = tf.concat([net, _attribute], axis=1)

        net = tf.keras.layers.Dense(1024, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net)
        # net = tf.keras.layers.Dense(128, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net)
        # net = tf.keras.layers.BatchNormalization()(net)
        net = tf.keras.layers.Dense(1024, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net)
        # net = tf.keras.layers.Dense(64, activation=tf.nn.relu, kernel_initializer=normc_initializer(0.01))(net)
        # net = tf.keras.layers.BatchNormalization()(net)

        action_out = tf.keras.layers.Dense(self.num_outputs,kernel_initializer=kernel_initializer)(net)
        value_out = tf.keras.layers.Dense(1,kernel_initializer=kernel_initializer)(net)

        self.base_model = tf.keras.Model([_board, _attribute], [action_out, value_out])
        self.register_variables(self.base_model.variables)