Exemplo n.º 1
0
    def _build_layers_v2(self, input_dict: dict, num_outputs: int, config: dict):
        import tensorflow.contrib.slim as slim

        with tf.name_scope("fc_net"):
            last_layer = input_dict['obs']
            activation = get_activation_fn(config.get("fcnet_activation"))
            for i, size in enumerate(config.get("fcnet_hiddens"), 1):
                last_layer = slim.fully_connected(
                    inputs=last_layer,
                    num_outputs=size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
                last_layer = tf.layers.dropout(
                    inputs=last_layer,
                    rate=config.get("fcnet_dropout_rate"),
                    training=input_dict['is_training'],
                    name="dropout{}".format(i),
                )
            output = slim.fully_connected(
                inputs=last_layer,
                num_outputs=num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )
            return output, last_layer
Exemplo n.º 2
0
    def _init(self, inputs, num_outputs, options):
        hiddens = options.get("fcnet_hiddens", [256, 256])
        fcnet_activation = options.get("fcnet_activation", "tanh")
        if fcnet_activation == "tanh":
            activation = tf.nn.tanh
        elif fcnet_activation == "relu":
            activation = tf.nn.relu
        print("Constructing fcnet {} {}".format(hiddens, activation))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope="fc{}".format(i))
                i += 1
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out")
            return output, last_layer
Exemplo n.º 3
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope=label)
                i += 1
            label = "fc_out"
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope=label)
            return output, last_layer
Exemplo n.º 4
0
    def _build_layers(self, inputs, num_outputs, options):
        hiddens = options.get("fcnet_hiddens", [256, 256])

        activation = get_activation_fn(options.get("fcnet_activation", "tanh"))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope=label)
                i += 1
            label = "fc_out"
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope=label)
            return output, last_layer
Exemplo n.º 5
0
    def _init(self, inputs, num_outputs, options):
        hiddens = options.get("fcnet_hiddens", [256, 256])

        fcnet_activation = options.get("fcnet_activation", "tanh")
        if fcnet_activation == "tanh":
            activation = tf.nn.tanh
        elif fcnet_activation == "relu":
            activation = tf.nn.relu

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer, size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope=label)
                i += 1
            label = "fc_out"
            output = slim.fully_connected(
                last_layer, num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None, scope=label)
            return output, last_layer
    def _build_layers(self, inputs, num_outputs, options):
        with tf.name_scope("KhanElibolModel"):
            last_layer = layers.conv2d(
                    inputs,
                    16,
                    (4, 4),
                    activation=tf.nn.relu)

            last_layer = layers.conv2d(
                    last_layer,
                    32,
                    (2, 2),
                    activation=tf.nn.relu)

            last_layer = flatten(last_layer)
            last_layer = layers.dense(
                    last_layer,
                    256,
                    kernel_initializer=normc_initializer(0.01),
                    activation = tf.nn.relu)
            output = layers.dense(
                    last_layer,
                    num_outputs,
                    kernel_initializer=normc_initializer(0.01),
                    activation = None)
            return output, last_layer
Exemplo n.º 7
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        import tensorflow.contrib.slim as slim

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope=label)
                i += 1
            label = "fc_out"
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope=label)
            return output, last_layer
Exemplo n.º 8
0
    def _build_layers(self, inputs, num_outputs, options):
        """Define the layers of a custom model.

        Arguments:
            input_dict (dict): Dictionary of input tensors, including "obs",
                "prev_action", "prev_reward".
            num_outputs (int): Output tensor must be of size
                [BATCH_SIZE, num_outputs].
            options (dict): Model options.
        """
        hiddens = options.get("fcnet_hiddens", Config.fcnet_hiddens)
        activation = get_activation_fn(
            options.get("fcnet_activation", Config.fcnet_activation))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=activation,
                    scope=label)
                i += 1
            label = "fc_out"
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope=label)
            return output, last_layer
Exemplo n.º 9
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        action_mask = input_dict["obs"]["action_mask"]
        if num_outputs != action_mask.shape[1].value:
            raise ValueError(
                "This model assumes num outputs is equal to max avail actions",
                num_outputs, action_mask)

        # Standard FC net component.
        last_layer = input_dict["obs"]["obs"]
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = slim.fully_connected(
                last_layer,
                size,
                weights_initializer=normc_initializer(1.0),
                activation_fn=tf.nn.tanh,
                scope=label)
        action_logits = slim.fully_connected(
            last_layer,
            num_outputs,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
Exemplo n.º 10
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        mask = input_dict["obs"]["action_mask"]

        last_layer = input_dict["obs"]["real_obs"]
        hiddens = options["fcnet_hiddens"]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = slim.fully_connected(
                last_layer,
                size,
                weights_initializer=normc_initializer(1.0),
                activation_fn=tf.nn.tanh,
                scope=label)
        action_logits = slim.fully_connected(
            last_layer,
            num_outputs,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")

        if num_outputs == 1:
            return action_logits, last_layer

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.math.log(mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
Exemplo n.º 11
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1
            label = "fc_out"
            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name=label)
            return output, last_layer
Exemplo n.º 12
0
            def vf_template(last_layer, input_dict):
                with tf.variable_scope(self.variable_scope):
                    with tf.variable_scope("value_function"):
                        # Simple case: sharing the feature layer
                        if model_config["vf_share_layers"]:
                            return tf.reshape(
                                linear(last_layer, 1, "value_function",
                                       normc_initializer(1.0)), [-1])

                        # Create a new separate model with no RNN state, etc.
                        branch_model_config = model_config.copy()
                        branch_model_config["free_log_std"] = False
                        if branch_model_config["use_lstm"]:
                            branch_model_config["use_lstm"] = False
                            logger.warning(
                                "It is not recommended to use a LSTM model "
                                "with vf_share_layers=False (consider "
                                "setting it to True). If you want to not "
                                "share layers, you can implement a custom "
                                "LSTM model that overrides the "
                                "value_function() method.")
                        branch_instance = legacy_model_cls(
                            input_dict,
                            obs_space,
                            action_space,
                            1,
                            branch_model_config,
                            state_in=None,
                            seq_lens=None)
                        return tf.reshape(branch_instance.outputs, [-1])
    def _build_layers_v2(self, input_dict, num_outputs, options):
        print(options)
        print(num_outputs)
        print(input_dict)
        action_mask = input_dict["obs"]["action_mask"]
        self.obs_space = Box(0, 1, shape=(3024, ), dtype=np.float32)  #28*108
        input_dict["obs"] = input_dict["obs"]["db"]

        options["fcnet_hiddens"] = [num_outputs * 2 * 2 * 2, num_outputs * 2]

        self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
                                           self.action_space, num_outputs,
                                           options)
        last_layer = self.fcnet.last_layer
        label = "fc_out2"
        output = slim.fully_connected(
            last_layer,
            num_outputs,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope=label)

        inf_mask = tf.maximum(tf.math.log(action_mask), tf.float32.min)

        masked_logits = inf_mask + output
        return masked_logits, last_layer,
Exemplo n.º 14
0
        def value_function(self):
            assert self.cur_instance, "must call forward first"

            with self._branch_variable_scope("value_function"):
                # Simple case: sharing the feature layer
                if self.model_config["vf_share_layers"]:
                    return tf.reshape(
                        linear(self.cur_instance.last_layer, 1,
                               "value_function", normc_initializer(1.0)), [-1])

                # Create a new separate model with no RNN state, etc.
                branch_model_config = self.model_config.copy()
                branch_model_config["free_log_std"] = False
                if branch_model_config["use_lstm"]:
                    branch_model_config["use_lstm"] = False
                    logger.warning(
                        "It is not recommended to use a LSTM model with "
                        "vf_share_layers=False (consider setting it to True). "
                        "If you want to not share layers, you can implement "
                        "a custom LSTM model that overrides the "
                        "value_function() method.")
                branch_instance = self.legacy_model_cls(
                    self.cur_instance.input_dict,
                    self.obs_space,
                    self.action_space,
                    1,
                    branch_model_config,
                    state_in=None,
                    seq_lens=None)
                return tf.reshape(branch_instance.outputs, [-1])
Exemplo n.º 15
0
    def _setup_graph(self, ob_space, ac_space):
        self.x = tf.placeholder(tf.float32, [None] + list(ob_space.shape))
        dist_class, self.logit_dim = ModelCatalog.get_action_dist(
            ac_space, self.config["model"])
        self._model = LSTM(self.x, self.logit_dim, {})

        self.state_in = self._model.state_in
        self.state_out = self._model.state_out

        self.logits = self._model.outputs
        self.action_dist = dist_class(self.logits)
        # with tf.variable_scope("vf"):
        #     vf_model = ModelCatalog.get_model(self.x, 1)
        self.vf = tf.reshape(
            linear(self._model.last_layer, 1, "value", normc_initializer(1.0)),
            [-1])

        self.sample = self.action_dist.sample()
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)
        self.global_step = tf.get_variable("global_step", [],
                                           tf.int32,
                                           initializer=tf.constant_initializer(
                                               0, dtype=tf.int32),
                                           trainable=False)
Exemplo n.º 16
0
    def __init__(self,
                 rnn_type: Type[tf.keras.layers.RNN],
                 num_outputs: int,
                 fcnet_hiddens: Sequence[int],
                 fcnet_activation: str,
                 conv_filters: Optional[Sequence[ConvLayerSpec]] = None,
                 conv_activation: str = 'relu',
                 lstm_cell_size: int = 256,
                 lstm_use_prev_action_reward: bool = False,
                 recurrent_args: Optional[Dict[str, Any]] = None,
                 **options):
        super().__init__(num_outputs, fcnet_hiddens, fcnet_activation,
                         conv_filters, conv_activation, **options)

        self._recurrent = True

        self._lstm_cell_size = lstm_cell_size
        self._lstm_use_prev_action_reward = lstm_use_prev_action_reward

        if recurrent_args is None:
            recurrent_args = {}

        self.rnn = rnn_type(lstm_cell_size,
                            return_state=True,
                            return_sequences=True,
                            **recurrent_args)

        self.output_layer = Dense(num_outputs,
                                  kernel_initializer=normc_initializer(0.01))
Exemplo n.º 17
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.agents.a3c.a3c.DEFAULT_CONFIG, **config)
        self.config = config
        self.sess = tf.get_default_session()

        # Setup the policy
        self.observations = tf.placeholder(
            tf.float32, [None] + list(observation_space.shape))
        dist_class, logit_dim = ModelCatalog.get_action_dist(
            action_space, self.config["model"])
        self.model = ModelCatalog.get_model(self.observations, logit_dim,
                                            self.config["model"])
        action_dist = dist_class(self.model.outputs)
        self.vf = tf.reshape(
            linear(self.model.last_layer, 1, "value", normc_initializer(1.0)),
            [-1])
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)

        # Setup the policy loss
        if isinstance(action_space, gym.spaces.Box):
            ac_size = action_space.shape[0]
            actions = tf.placeholder(tf.float32, [None, ac_size], name="ac")
        elif isinstance(action_space, gym.spaces.Discrete):
            actions = tf.placeholder(tf.int64, [None], name="ac")
        else:
            raise UnsupportedSpaceException(
                "Action space {} is not supported for A3C.".format(
                    action_space))
        advantages = tf.placeholder(tf.float32, [None], name="advantages")
        v_target = tf.placeholder(tf.float32, [None], name="v_target")
        self.loss = A3CLoss(action_dist, actions, advantages, v_target,
                            self.vf, self.config["vf_loss_coeff"],
                            self.config["entropy_coeff"])

        # Initialize TFPolicyGraph
        loss_in = [
            ("obs", self.observations),
            ("actions", actions),
            ("advantages", advantages),
            ("value_targets", v_target),
        ]
        TFPolicyGraph.__init__(
            self,
            observation_space,
            action_space,
            self.sess,
            obs_input=self.observations,
            action_sampler=action_dist.sample(),
            loss=self.loss.total_loss,
            loss_inputs=loss_in,
            state_inputs=self.model.state_in,
            state_outputs=self.model.state_out,
            seq_lens=self.model.seq_lens,
            max_seq_len=self.config["model"]["max_seq_len"])

        self.sess.run(tf.global_variables_initializer())
    def _build_layers(self, inputs, num_outputs, options):
        hiddens = options.get("fcnet_hiddens", [256, 256])
        activation = get_activation_fn(options.get("fcnet_activation", "relu"))

        with tf.name_scope("fc_net"):
            last_layer = flatten(inputs)
            for size in hiddens:
                last_layer = layers.dense(
                        last_layer,
                        size,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation)
            output = layers.dense(
                    last_layer,
                    num_outputs,
                    kernel_initializer=normc_initializer(1.0),
                    activation=None)
            return output, last_layer
Exemplo n.º 19
0
    def __init__(self,
                 layer_units=None,
                 activation=None,
                 custom_params=None,
                 vf_share_layers=False,
                 dummy=False):
        """
            layer_units: list, a list of the number of units of all layers
                except the input layer
        """
        keras_models.Model.__init__(self)
        if dummy:
            return
        assert layer_units is not None and activation is not None

        def _get_initializer(i, n):
            if i < len(n) - 1:
                return normc_initializer(1.0)
            else:
                return normc_initializer(0.01)

        if not custom_params:
            for i, size in enumerate(layer_units):
                name = f"fc_{i}"
                layer = Dense(size,
                              activation=(activation if
                                          i < len(layer_units) - 1 else None),
                              kernel_initializer=_get_initializer(
                                  i, layer_units),
                              name=name)
                setattr(self, name, layer)
            if vf_share_layers:
                name = f"fc_vf"
                layer = Dense(1,
                              activation=None,
                              kernel_initializer=normc_initializer(1.0),
                              name=name)
                setattr(self, name, layer)
        else:
            if vf_share_layers:
                assert len(layer_units) == len(custom_params) - 1
            else:
                assert len(layer_units) == len(custom_params)
            for i, size in enumerate(layer_units):
                name = f"fc_{i}"
                layer = Dense(custom_params=custom_params[i],
                              activation=(activation if
                                          i < len(layer_units) - 1 else None),
                              name=name)
                setattr(self, name, layer)
            if vf_share_layers:
                name = f"fc_vf"
                layer = Dense(custom_params=custom_params[-1],
                              activation=None,
                              name=name)
                setattr(self, name, layer)
        self._vf_share_layers = vf_share_layers
Exemplo n.º 20
0
 def _build_layers(self, inputs, num_outputs, _):
     with tf.name_scope("linear"):
         output = slim.fully_connected(
             inputs,
             num_outputs,
             weights_initializer=normc_initializer(0.01),
             activation_fn=None,
         )
         return output, inputs
Exemplo n.º 21
0
 def _init(self, inputs, num_outputs, options):
     with tf.name_scope("linear"):
         label = "linear_out"
         output = slim.fully_connected(
             inputs,
             num_outputs,
             weights_initializer=normc_initializer(0.01),
             activation_fn=None,
             scope=label)
         return output, inputs
Exemplo n.º 22
0
 def _init(self, inputs, num_outputs, options):
     x = inputs
     with tf.name_scope("convnet"):
         for i in range(4):
             x = tf.nn.elu(conv2d(x, 32, "l{}".format(i+1), [3, 3], [2, 2]))
         r, c = x.shape[1].value, x.shape[2].value
         x = tf.reshape(x, [-1, r*c*32])
         fc1 = linear(x, 256, "fc1")
         fc2 = linear(x, num_outputs, "fc2", normc_initializer(0.01))
         return fc2, fc1
Exemplo n.º 23
0
    def value_function(self):
        """Builds the value function output.

        This method can be overridden to customize the implementation of the
        value function (e.g., not sharing hidden layers).

        Returns:
            Tensor of size [BATCH_SIZE] for the value function.
        """
        return tf.reshape(
            linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
Exemplo n.º 24
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     last_layer = input_dict["obs"]
     hiddens = [256, 256]
     for i, size in enumerate(hiddens):
         label = "fc{}".format(i)
         last_layer = tf.layers.dense(
             last_layer,
             size,
             kernel_initializer=normc_initializer(1.0),
             activation=tf.nn.tanh,
             name=label)
         # Add a batch norm layer
         last_layer = tf.layers.batch_normalization(
             last_layer, training=input_dict["is_training"])
     output = tf.layers.dense(last_layer,
                              num_outputs,
                              kernel_initializer=normc_initializer(0.01),
                              activation=None,
                              name="fc_out")
     return output, last_layer
Exemplo n.º 25
0
    def value_function(self):
        """Builds the value function output.

        This method can be overridden to customize the implementation of the
        value function (e.g., not sharing hidden layers).

        Returns:
            Tensor of size [BATCH_SIZE] for the value function.
        """
        return tf.reshape(
            linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1])
Exemplo n.º 26
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kw):
        super(MyKerasQModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name, **kw)

        # Define the core model layers which will be used by the other
        # output heads of DistributionalQModel
        self.inputs = tf.keras.layers.Input(
            shape=obs_space.shape, name="observations")
        layer_1 = tf.keras.layers.Dense(
            128,
            name="my_layer1",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(self.inputs)
        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_1)
        self.base_model = tf.keras.Model(self.inputs, layer_out)
        self.register_variables(self.base_model.variables)
Exemplo n.º 27
0
Arquivo: lstm.py Projeto: zdpau/ray-1
    def _build_layers_v2(self, input_dict, num_outputs, options):
        cell_size = options.get("lstm_cell_size")
        if options.get("lstm_use_prev_action_reward"):
            action_dim = int(
                np.product(
                    input_dict["prev_actions"].get_shape().as_list()[1:]))
            features = tf.concat(
                [
                    input_dict["obs"],
                    tf.reshape(
                        tf.cast(input_dict["prev_actions"], tf.float32),
                        [-1, action_dim]),
                    tf.reshape(input_dict["prev_rewards"], [-1, 1]),
                ],
                axis=1)
        else:
            features = input_dict["obs"]
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.c], name="c")
            h_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.h], name="h")
            self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = rnn.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(
            lstm,
            last_layer,
            initial_state=state_in,
            sequence_length=self.seq_lens,
            time_major=False,
            dtype=tf.float32)

        self.state_out = list(lstm_state)

        # Compute outputs
        last_layer = tf.reshape(lstm_out, [-1, cell_size])
        logits = linear(last_layer, num_outputs, "action",
                        normc_initializer(0.01))
        return logits, last_layer
Exemplo n.º 28
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     last_layer = input_dict["obs"]
     hiddens = [256, 256]
     for i, size in enumerate(hiddens):
         label = "fc{}".format(i)
         last_layer = slim.fully_connected(
             last_layer,
             size,
             weights_initializer=normc_initializer(1.0),
             activation_fn=tf.nn.tanh,
             scope=label)
         # Add a batch norm layer
         last_layer = tf.layers.batch_normalization(
             last_layer, training=input_dict["is_training"])
     output = slim.fully_connected(
         last_layer,
         num_outputs,
         weights_initializer=normc_initializer(0.01),
         activation_fn=None,
         scope="fc_out")
     return output, last_layer
Exemplo n.º 29
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        cell_size = options.get("lstm_cell_size")
        if options.get("lstm_use_prev_action_reward"):
            action_dim = int(
                np.product(
                    input_dict["prev_actions"].get_shape().as_list()[1:]))
            features = tf.concat(
                [
                    input_dict["obs"],
                    tf.reshape(
                        tf.cast(input_dict["prev_actions"], tf.float32),
                        [-1, action_dim]),
                    tf.reshape(input_dict["prev_rewards"], [-1, 1]),
                ],
                axis=1)
        else:
            features = input_dict["obs"]
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.c], name="c")
            h_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.h], name="h")
            self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = rnn.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(
            lstm,
            last_layer,
            initial_state=state_in,
            sequence_length=self.seq_lens,
            time_major=False,
            dtype=tf.float32)

        self.state_out = list(lstm_state)

        # Compute outputs
        last_layer = tf.reshape(lstm_out, [-1, cell_size])
        logits = linear(last_layer, num_outputs, "action",
                        normc_initializer(0.01))
        return logits, last_layer
Exemplo n.º 30
0
    def _build_layers_v2(self, input_dict, num_outputs, options):

        inputs = input_dict["obs"]

        smoothed_rews = None
        if isinstance(inputs, list):
            smoothed_rews = inputs[1]
            inputs = inputs[0]

        hiddens = [32, 32]
        with tf.name_scope("custom_net"):

            inputs = slim.conv2d(inputs,
                                 6, [3, 3],
                                 1,
                                 activation_fn=tf.nn.relu,
                                 scope="conv")
            last_layer = flatten(inputs)

            i = 1
            for size in hiddens:
                label = "fc{}".format(i)
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=normc_initializer(1.0),
                    activation_fn=tf.nn.relu,
                    scope=label)
                i += 1
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out")

            if smoothed_rews is not None:
                output = tf.concat([output, smoothed_rews], axis=-1)

            return output, last_layer
Exemplo n.º 31
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Extract the available actions tensor from the observation.
        avail_actions = input_dict["obs"]["avail_actions"]
        action_mask = input_dict["obs"]["action_mask"]
        action_embed_size = avail_actions.shape[2].value
        if num_outputs != avail_actions.shape[1].value:
            raise ValueError(
                "This model assumes num outputs is equal to max avail actions",
                num_outputs, avail_actions)

        # Standard FC net component.
        last_layer = input_dict["obs"]["cart"]
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = slim.fully_connected(
                last_layer,
                size,
                weights_initializer=normc_initializer(1.0),
                activation_fn=tf.nn.tanh,
                scope=label)
        output = slim.fully_connected(
            last_layer,
            action_embed_size,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")

        # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
        # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
        intent_vector = tf.expand_dims(output, 1)

        # Batch dot product => shape of logits is [BATCH, MAX_ACTIONS].
        action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2)

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
Exemplo n.º 32
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        # Extract the available actions tensor from the observation.
        avail_actions = input_dict["obs"]["avail_actions"]
        action_mask = input_dict["obs"]["action_mask"]
        action_embed_size = avail_actions.shape[2].value
        if num_outputs != avail_actions.shape[1].value:
            raise ValueError(
                "This model assumes num outputs is equal to max avail actions",
                num_outputs, avail_actions)

        # Standard FC net component.
        last_layer = input_dict["obs"]["cart"]
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
            last_layer = slim.fully_connected(
                last_layer,
                size,
                weights_initializer=normc_initializer(1.0),
                activation_fn=tf.nn.tanh,
                scope=label)
        output = slim.fully_connected(
            last_layer,
            action_embed_size,
            weights_initializer=normc_initializer(0.01),
            activation_fn=None,
            scope="fc_out")

        # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
        # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
        intent_vector = tf.expand_dims(output, 1)

        # Batch dot product => shape of logits is [BATCH, MAX_ACTIONS].
        action_logits = tf.reduce_sum(avail_actions * intent_vector, axis=2)

        # Mask out invalid actions (use tf.float32.min for stability)
        inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
        masked_logits = inf_mask + action_logits

        return masked_logits, last_layer
Exemplo n.º 33
0
    def __init__(self,
                 num_outputs: int,
                 fcnet_hiddens: Sequence[int],
                 fcnet_activation: str,
                 conv_filters: Optional[Sequence[ConvLayerSpec]] = None,
                 conv_activation: str = 'relu',
                 **options):
        super().__init__()

        self._num_outputs = num_outputs
        self._fcnet_hiddens = fcnet_hiddens
        self._fcnet_activation = fcnet_activation
        self._use_conv = conv_filters is not None
        self._conv_filters = conv_filters
        self._conv_activation = conv_activation
        self._recurrent = False
        self._options = options

        if conv_filters is not None:
            filters, kernel_size, strides = list(zip(*conv_filters))
            self.conv_layer = Conv2DStack(
                filters, kernel_size, strides,
                padding='valid',
                activation=conv_activation,
                flatten_output=True)

        self.dense_layer = DenseStack(
            fcnet_hiddens,
            kernel_initializer=normc_initializer(1.0),
            activation=fcnet_activation,
            output_activation=fcnet_activation)

        # WARNING: DO NOT CHANGE KERNEL INITIALIZER!!!
        # PPO/Gradient based methods are extremely senstive to this and will break
        # Don't alter this unless you're sure you know what you're doing.
        self.output_layer = Dense(
            num_outputs,
            kernel_initializer=normc_initializer(0.01))
Exemplo n.º 34
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     super(MyKerasModel, self).__init__(obs_space, action_space,
                                        num_outputs, model_config, name)
     self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                         name="observations")
     layer_1 = tf.keras.layers.Dense(
         128,
         name="my_layer1",
         activation=tf.nn.relu,
         kernel_initializer=normc_initializer(1.0))(self.inputs)
     layer_out = tf.keras.layers.Dense(
         num_outputs,
         name="my_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_1)
     value_out = tf.keras.layers.Dense(
         1,
         name="value_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_1)
     self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
     self.register_variables(self.base_model.variables)
Exemplo n.º 35
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     action_mask = input_dict["obs"]["action_mask"]
     obs = input_dict["obs"]["real_obs"]
     # Standard FC net component.
     last_layer = obs
     hiddens = [20, 20, 15, 15, 10, 10, 9, 9]
     # hiddens = [256, 256]
     for i, size in enumerate(hiddens):
         label = "fc{}".format(i)
         last_layer = slim.fully_connected(
             last_layer,
             size,
             weights_initializer=normc_initializer(1.0),
             activation_fn=tf.nn.tanh,
             scope=label)
     output = slim.fully_connected(
         last_layer,
         ACTIONS,
         weights_initializer=normc_initializer(0.01),
         activation_fn=None,
         scope="fc_out")
     inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
     ouput_mask = output+inf_mask
     return ouput_mask, last_layer
Exemplo n.º 36
0
    def _setup_graph(self, ob_space, ac_space):
        self.x = tf.placeholder(tf.float32, [None] + list(ob_space))
        dist_class, self.logit_dim = ModelCatalog.get_action_dist(ac_space)
        self._model = ModelCatalog.get_model(
            self.registry, self.x, self.logit_dim, self.config["model"])
        self.logits = self._model.outputs
        self.curr_dist = dist_class(self.logits)
        self.vf = tf.reshape(linear(self._model.last_layer, 1, "value",
                                    normc_initializer(1.0)), [-1])

        self.sample = self.curr_dist.sample()
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)
        self.global_step = tf.get_variable(
            "global_step", [], tf.int32,
            initializer=tf.constant_initializer(0, dtype=tf.int32),
            trainable=False)
Exemplo n.º 37
0
Arquivo: lstm.py Projeto: adgirish/ray
    def _init(self, inputs, num_outputs, options):
        use_tf100_api = (distutils.version.LooseVersion(tf.VERSION) >=
                         distutils.version.LooseVersion("1.0.0"))

        self.x = x = inputs
        for i in range(4):
            x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
        # Introduce a "fake" batch dimension of 1 after flatten so that we can
        # do LSTM over the time dim.
        x = tf.expand_dims(flatten(x), [0])

        size = 256
        if use_tf100_api:
            lstm = rnn.BasicLSTMCell(size, state_is_tuple=True)
        else:
            lstm = rnn.rnn_cell.BasicLSTMCell(size, state_is_tuple=True)
        step_size = tf.shape(self.x)[:1]

        c_init = np.zeros((1, lstm.state_size.c), np.float32)
        h_init = np.zeros((1, lstm.state_size.h), np.float32)
        self.state_init = [c_init, h_init]
        c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c])
        h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h])
        self.state_in = [c_in, h_in]

        if use_tf100_api:
            state_in = rnn.LSTMStateTuple(c_in, h_in)
        else:
            state_in = rnn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(lstm, x,
                                                 initial_state=state_in,
                                                 sequence_length=step_size,
                                                 time_major=False)
        lstm_c, lstm_h = lstm_state
        x = tf.reshape(lstm_out, [-1, size])
        logits = linear(x, num_outputs, "action", normc_initializer(0.01))
        self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        return logits, x
Exemplo n.º 38
0
    def _build_layers(self, inputs, num_outputs, options):
        # Parse options
        image_shape = options["custom_options"]["image_shape"]
        convs = options.get("conv_filters", [
            [16, [8, 8], 4],
            [32, [5, 5], 3],
            [32, [5, 5], 2],
            [512, [10, 10], 1],
        ])
        hiddens = options.get("fcnet_hiddens", [64])
        fcnet_activation = options.get("fcnet_activation", "tanh")
        if fcnet_activation == "tanh":
            activation = tf.nn.tanh
        elif fcnet_activation == "relu":
            activation = tf.nn.relu

        # Sanity checks
        image_size = np.product(image_shape)
        expected_shape = [image_size + 5 + 2]
        assert inputs.shape.as_list()[1:] == expected_shape, \
            (inputs.shape.as_list()[1:], expected_shape)

        # Reshape the input vector back into its components
        vision_in = tf.reshape(inputs[:, :image_size],
                               [tf.shape(inputs)[0]] + image_shape)
        metrics_in = inputs[:, image_size:]
        print("Vision in shape", vision_in)
        print("Metrics in shape", metrics_in)

        # Setup vision layers
        with tf.name_scope("carla_vision"):
            for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
                vision_in = slim.conv2d(
                    vision_in,
                    out_size,
                    kernel,
                    stride,
                    scope="conv{}".format(i))
            out_size, kernel, stride = convs[-1]
            vision_in = slim.conv2d(
                vision_in,
                out_size,
                kernel,
                stride,
                padding="VALID",
                scope="conv_out")
            vision_in = tf.squeeze(vision_in, [1, 2])

        # Setup metrics layer
        with tf.name_scope("carla_metrics"):
            metrics_in = slim.fully_connected(
                metrics_in,
                64,
                weights_initializer=xavier_initializer(),
                activation_fn=activation,
                scope="metrics_out")

        print("Shape of vision out is", vision_in.shape)
        print("Shape of metric out is", metrics_in.shape)

        # Combine the metrics and vision inputs
        with tf.name_scope("carla_out"):
            i = 1
            last_layer = tf.concat([vision_in, metrics_in], axis=1)
            print("Shape of concatenated out is", last_layer.shape)
            for size in hiddens:
                last_layer = slim.fully_connected(
                    last_layer,
                    size,
                    weights_initializer=xavier_initializer(),
                    activation_fn=activation,
                    scope="fc{}".format(i))
                i += 1
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out")

        return output, last_layer
Exemplo n.º 39
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        def spy(sequences, state_in, state_out, seq_lens):
            if len(sequences) == 1:
                return 0  # don't capture inference inputs
            # TF runs this function in an isolated context, so we have to use
            # redis to communicate back to our suite
            ray.experimental.internal_kv._internal_kv_put(
                "rnn_spy_in_{}".format(RNNSpyModel.capture_index),
                pickle.dumps({
                    "sequences": sequences,
                    "state_in": state_in,
                    "state_out": state_out,
                    "seq_lens": seq_lens
                }),
                overwrite=True)
            RNNSpyModel.capture_index += 1
            return 0

        features = input_dict["obs"]
        cell_size = 3
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
        lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
        ]

        # Setup LSTM inputs
        if self.state_in:
            c_in, h_in = self.state_in
        else:
            c_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.c], name="c")
            h_in = tf.placeholder(
                tf.float32, [None, lstm.state_size.h], name="h")
        self.state_in = [c_in, h_in]

        # Setup LSTM outputs
        state_in = rnn.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(
            lstm,
            last_layer,
            initial_state=state_in,
            sequence_length=self.seq_lens,
            time_major=False,
            dtype=tf.float32)

        self.state_out = list(lstm_state)
        spy_fn = tf.py_func(
            spy, [
                last_layer,
                self.state_in,
                self.state_out,
                self.seq_lens,
            ],
            tf.int64,
            stateful=True)

        # Compute outputs
        with tf.control_dependencies([spy_fn]):
            last_layer = tf.reshape(lstm_out, [-1, cell_size])
            logits = linear(last_layer, num_outputs, "action",
                            normc_initializer(0.01))
        return logits, last_layer