Пример #1
0
 def __init__(self, obs_space, action_space, num_outputs,
              model_config, name):
     super(KerasCnnModel, self).__init__(
         obs_space, action_space, num_outputs, model_config, name)
     self.inputs = tf.keras.layers.Input(
         shape=obs_space.shape, name="observations")
     conv1 = tf.keras.layers.Conv2D(filters=6, kernel_size=3, strides=2,
                                    activation=get_activation_fn(
                                        model_config.get("conv_activation")
                                    ))(self.inputs)
     conv2 = tf.keras.layers.Conv2D(filters=16, kernel_size=3, strides=2,
                                    activation=get_activation_fn(
                                        model_config.get("conv_activation")
                                    ))(conv1)
     conv3 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1,
                                    activation=get_activation_fn(
                                        model_config.get("conv_activation")
                                    ))(conv2)
     conv_flatten = tf.keras.layers.Flatten()(conv3)
     state = tf.keras.layers.Dense(model_config['custom_options']
                                   ['hidden_units'],
                                   activation=get_activation_fn(
         model_config.get("fcnet_activation")))(conv_flatten)
     layer_out = tf.keras.layers.Dense(
         num_outputs, name="act_output")(state)
     value_out = tf.keras.layers.Dense(1, name="value_output")(state)
     self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
     self.register_variables(self.base_model.variables)
Пример #2
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name, **kwargs)

        conv_filters = model_config['conv_filters']
        self.is_conv = bool(conv_filters)
        orig_shape = obs_space.original_space['board']
        new_shape = orig_shape.shape + (1, ) if self.is_conv else (np.prod(
            orig_shape.shape), )

        self.inputs = tf.keras.layers.Input(shape=new_shape,
                                            name='observations')
        last_layer = self.inputs

        if self.is_conv:
            conv_activation = get_activation_fn(
                model_config['conv_activation'])
            for i, (filters, kernel_size,
                    stride) in enumerate(conv_filters, 1):
                last_layer = tf.keras.layers.Conv2D(filters,
                                                    kernel_size,
                                                    stride,
                                                    name="conv{}".format(i),
                                                    activation=conv_activation,
                                                    padding='same')(last_layer)
            last_layer = tf.keras.layers.Flatten()(last_layer)

        fc_activation = get_activation_fn(model_config['fcnet_activation'])
        for i, size in enumerate(model_config['fcnet_hiddens'], 1):
            last_layer = tf.keras.layers.Dense(
                size,
                name='fc{}'.format(i),
                activation=fc_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)

        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)
        self._value_out = None
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
    ):
        super(FeedForward, self).__init__(obs_space, action_space, num_outputs,
                                          model_config, name)

        # Define input layers
        if 'original_space' in dir(obs_space):
            curr_obs_space = obs_space.original_space.spaces["obs"]
        else:
            curr_obs_space = obs_space
        self.use_prev_action = model_config["custom_options"].get(
            "use_prev_action")
        if self.use_prev_action:
            obs_shape = curr_obs_space.shape[0]
            action_shape = action_space.shape[0]
            input_layer = tf.keras.layers.Input(shape=(obs_shape +
                                                       action_shape),
                                                name="inputs")
        else:
            input_layer = tf.keras.layers.Input(
                shape=(curr_obs_space.shape[0]), name="inputs")
        # Preprocess observations with the appropriate number of hidden layers
        last_layer = input_layer
        i = 1
        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        logits = tf.keras.layers.Dense(self.num_outputs,
                                       activation=tf.keras.activations.linear,
                                       name="logits")(last_layer)
        values = tf.keras.layers.Dense(1, activation=None,
                                       name="values")(last_layer)

        inputs = [input_layer]

        # Create the RNN model
        self.model = tf.keras.Model(inputs=inputs, outputs=[logits, values])
        self.register_variables(self.model.variables)
        self.model.summary()
Пример #4
0
    def _build_layers_v2(self, input_dict, num_outputs, options):
        inputs = input_dict["obs"]
        filters = options.get("conv_filters")
        if not filters:
            filters = _get_filter_config(inputs.shape.as_list()[1:])

        activation = get_activation_fn(options.get("conv_activation"))

        with tf.name_scope("vision_net"):
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                inputs = tf.layers.conv2d(
                    inputs,
                    out_size,
                    kernel,
                    stride,
                    activation=activation,
                    padding="same",
                    name="conv{}".format(i))
            out_size, kernel, stride = filters[-1]

            # skip final linear layer
            if options.get("no_final_linear"):
                fc_out = tf.layers.conv2d(
                    inputs,
                    num_outputs,
                    kernel,
                    stride,
                    activation=activation,
                    padding="valid",
                    name="fc_out")
                return flatten(fc_out), flatten(fc_out)

            fc1 = tf.layers.conv2d(
                inputs,
                out_size,
                kernel,
                stride,
                activation=activation,
                padding="valid",
                name="fc1")
            fc2 = tf.layers.conv2d(
                fc1,
                num_outputs, [1, 1],
                activation=None,
                padding="same",
                name="fc2")
            return flatten(fc2), flatten(fc1)
Пример #5
0
    def _build_layers(self, inputs, num_outputs, options):
        """Process the flattened inputs.

        Note that dict inputs will be flattened into a vector. To define a
        model that processes the components separately, use _build_layers_v2().
        """

        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

        if len(inputs.shape) > 2:
            inputs = tf.layers.flatten(inputs)

        with tf.name_scope("fc_net"):
            i = 1
            last_layer = inputs
            for size in hiddens:
                # skip final linear layer
                if options.get("no_final_linear") and i == len(hiddens):
                    output = tf.layers.dense(
                        last_layer,
                        num_outputs,
                        kernel_initializer=normc_initializer(1.0),
                        activation=activation,
                        name="fc_out")
                    return output, output

                label = "fc{}".format(i)
                last_layer = tf.layers.dense(
                    last_layer,
                    size,
                    kernel_initializer=normc_initializer(1.0),
                    activation=activation,
                    name=label)
                i += 1

            output = tf.layers.dense(
                last_layer,
                num_outputs,
                kernel_initializer=normc_initializer(0.01),
                activation=None,
                name="fc_out")
            return output, last_layer
Пример #6
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     super(KerasFcModel, self).__init__(obs_space, action_space,
                                        num_outputs, model_config, name)
     self.inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                         name="observations")
     layer_1 = tf.keras.layers.Dense(
         model_config.get("fcnet_hiddens")[0],  # Uses only the first layer
         name="my_layer1",
         activation=get_activation_fn(
             model_config.get("fcnet_activation")),  # tf.nn.relu,
         kernel_initializer=normc_initializer(1.0))(self.inputs)
     layer_out = tf.keras.layers.Dense(
         num_outputs,
         name="my_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_1)
     value_out = tf.keras.layers.Dense(
         1,
         name="value_out",
         activation=None,
         kernel_initializer=normc_initializer(0.01))(layer_1)
     self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
     self.register_variables(self.base_model.variables)
Пример #7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        with tf.variable_scope(f"{name}_model", reuse=tf.AUTO_REUSE):
            super().__init__(obs_space, action_space, num_outputs,
                             model_config, name)
            custom_opts = model_config.get("custom_options", {})
            self.use_comm = custom_opts.get("use_comm", True)

            self.message_coeff = custom_opts.get("message_entropy_coeff", 0.0)
            obs_space_shape = custom_opts.get("obs_shape", obs_space.shape)
            if len(obs_space.shape) == 1:
                n_extra_obs = obs_space.shape[0] - np.prod(obs_space_shape)
            else:
                n_extra_obs = 0

            # Conv
            activation = get_activation_fn(model_config.get("conv_activation"))
            filters = model_config.get("conv_filters")
            if filters is None:
                filters = _get_filter_config(obs_space_shape)

            inputs = tf.keras.layers.Input(shape=(None, *obs_space_shape),
                                           name=f"{name}_observations_time")
            model_inputs = [inputs]

            cnn_in = tf.reshape(inputs, [-1, *obs_space_shape])
            conv_out = build_cnn(cnn_in,
                                 filters,
                                 activation,
                                 name=f"{name}_conv")

            # FC
            activation = get_activation_fn(
                model_config.get("fcnet_activation"))
            hiddens = model_config.get("fcnet_hiddens")

            if n_extra_obs > 0:
                extra_inputs = tf.keras.layers.Input(
                    shape=(n_extra_obs, ), name=f"{name}_extra_observations")
                model_inputs.append(extra_inputs)
                fc_in = tf.keras.layers.Concatenate(name=f"{name}_fc_in")(
                    [tf.keras.layers.Flatten()(conv_out), extra_inputs])
            else:
                fc_in = tf.keras.layers.Flatten(name=f"{name}_fc_in")(conv_out)
            fc_out = build_fc(fc_in, hiddens, activation, name=f"{name}_fc")

            # LSTM
            self.cell_size = model_config.get("lstm_cell_size", 256)

            state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ),
                                               name=f"{name}_h")
            state_in_c = tf.keras.layers.Input(shape=(self.cell_size, ),
                                               name=f"{name}_c")
            seq_in = tf.keras.layers.Input(shape=(),
                                           name=f"{name}_seq_in",
                                           dtype=tf.int32)

            prev_actions = tf.keras.layers.Input(shape=(),
                                                 name=f"{name}_prev_actions",
                                                 dtype=tf.int32)
            prev_rewards = tf.keras.layers.Input(shape=(),
                                                 name=f"{name}_prev_rewards")

            model_inputs.extend(
                [prev_actions, prev_rewards, seq_in, state_in_h, state_in_c])

            if model_config.get("lstm_use_prev_action_reward"):
                prev_actions_onehot = tf.one_hot(prev_actions,
                                                 action_space[0].n)
                in_tensors = [fc_out, prev_actions_onehot, prev_rewards]
            else:
                in_tensors = [fc_out]

            # CPC objective
            self.use_cpc = custom_opts.get("use_cpc", False)
            if self.use_cpc:
                cpc_params = custom_opts["cpc_opts"]
                self.cpc_in_shape = [cpc_params["cpc_code_size"]]
                self.cpc_out_shape = [
                    cpc_params["cpc_len"], cpc_params["cpc_code_size"]
                ]
                cpc_params["name"] = f"{name}_cpc"
                # The actual CPC encodings
                self._cpc_ins = None
                self._cpc_preds = None
            else:
                cpc_params = {}

            lstm_out, model_outputs = build_lstm(
                in_tensors,
                state_in_h=state_in_h,
                state_in_c=state_in_c,
                seq_in=seq_in,
                cell_size=self.cell_size,
                add_cpc=self.use_cpc,
                cpc_params=cpc_params,
                name=f"{name}_lstm",
            )

            # Final layer, logits has both actions and messages
            self.use_inference_policy = custom_opts.get(
                "use_inference_policy", False)
            if self.use_inference_policy:
                inference_policy_opts = custom_opts["inference_policy_opts"]
                self.pm_type = inference_policy_opts["type"]
                self.ewma_momentum = inference_policy_opts.get("ewma_momentum")
                self.pm_hidden = inference_policy_opts.get(
                    "pm_hidden", [64, 64])
                self.message_size = action_space[1].n

                action_logits = tf.keras.layers.Dense(
                    action_space[0].n,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_action_logits",
                )(lstm_out)
                unscaled_message_logits = tf.keras.layers.Dense(
                    self.message_size,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_unscaled_message_logits",
                )(lstm_out)
                unscaled_message_p = tf.nn.softmax(unscaled_message_logits)
                model_outputs.append(unscaled_message_p)

                if self.pm_type == "moving_avg":
                    self._avg_message_p = tf.Variable(
                        name=f"{name}_avg",
                        initial_value=tf.ones(
                            (self.message_size, )) / self.message_size,
                        trainable=False,
                    )
                    avg_message_vars = [self._avg_message_p]
                    if self.ewma_momentum is None:
                        self._avg_message_t = tf.Variable(
                            name=f"{name}_t",
                            initial_value=tf.zeros(()),
                            trainable=False,
                        )
                        avg_message_vars.append(self._avg_message_t)

                    self.register_variables(avg_message_vars)

                logits = tf.keras.layers.Concatenate(name=f"{name}_logits")(
                    [action_logits, unscaled_message_logits])
            else:
                logits = tf.keras.layers.Dense(
                    num_outputs,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_logits",
                )(lstm_out)

            values = tf.keras.layers.Dense(1,
                                           activation=None,
                                           name=f"{name}_values")(lstm_out)
            self._value_out = None  # The actual value
            model_outputs = [logits, values] + model_outputs

            # Create the RNN model
            self.rnn_model = tf.keras.Model(inputs=model_inputs,
                                            outputs=model_outputs)
            self.register_variables(self.rnn_model.variables)
            self._model_out = None  # Actual logits
            self.rnn_model.summary()

            if self.use_inference_policy and self.pm_type == "hyper_nn":
                flattened_vars = []
                message_model = tf.keras.Model(inputs=model_inputs,
                                               outputs=unscaled_message_logits)
                for e in message_model.variables:
                    flattened_vars.append(
                        tf.reshape(tf.stop_gradient(e), shape=[1, -1]))

                concat_vars = tf.keras.layers.Concatenate()(flattened_vars)
                pm_fc_out, pm_fc_vars = build_fc(concat_vars,
                                                 self.pm_hidden,
                                                 "relu",
                                                 name="pm_fc",
                                                 return_vars=True)
                pm_logits_layer = tf.keras.layers.Dense(
                    self.message_size,
                    activation=tf.keras.activations.linear,
                    name=f"{name}_pm_logits",
                )
                self._pm_logits = pm_logits_layer(pm_fc_out)
                self.register_variables(pm_fc_vars)
                self.register_variables(pm_logits_layer.variables)

            # Extra variable definitions
            self.use_receiver_bias = custom_opts.get("use_receiver_bias",
                                                     False)
            self.no_message_outputs = None
            self._unscaled_message_p = None
Пример #8
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        self.fake_lstm = model_config['custom_options'].get('fake_lstm')
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        lstm_filters = model_config["custom_options"]['lstm_filters']
        cnn_filters = model_config.get("conv_filters")
        final_pi_filter_amt = model_config["custom_options"][
            "final_pi_filter_amt"]

        rows = obs_space.original_space[self.pi_obs_key].shape[0]
        colums = obs_space.original_space[self.pi_obs_key].shape[1]

        if self.use_lstm:
            if self.fake_lstm:
                self._lstm_state_shape = (1, )
            else:
                self._lstm_state_shape = (rows, colums, lstm_filters[0][0])

        if self.use_lstm:

            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="pi_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="pi_lstm_c"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_c")
            ]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.pi_obs_key].shape),
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape),
                name="vf_observation")

        else:
            state_in, seq_lens_in = None, None

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape,
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        # if pi_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     pi_cnn_filters = _get_filter_config(single_obs_input_shape)
        #
        # if v_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     v_cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)

            state_out = state_in
            if self.use_lstm and not self.fake_lstm:
                for i, (out_size, kernel, stride) in enumerate(lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            mask=tf.sequence_mask(seq_lens_in),
                            initial_state=state_in)

            # state_out = state_in
            # if self.use_lstm:
            #     _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer)
            #     _last_layer, *state_out = tf.keras.layers.LSTM(
            #         units=64,
            #         return_sequences=True,
            #         return_state=True,
            #         name="{}_lstm".format(prefix))(
            #         inputs=_last_layer,
            #         mask=tf.sequence_mask(seq_lens_in),
            #         initial_state=state_in)

            return _last_layer, state_out

        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        pi_last_layer, pi_state_out = build_primary_layers(
            prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in)

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in)

        if self.use_lstm:
            state_out = [*pi_state_out, *vf_state_out]
        else:
            state_out = None

        pi_last_layer = maybe_td(
            tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                   kernel_size=[3, 3],
                                   strides=1,
                                   activation=conv_activation,
                                   padding="same",
                                   name="{}_conv_{}".format(
                                       'pi', "last")))(pi_last_layer)

        print(
            f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
        )

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('pi',
                                         "unmasked_logits")))(pi_last_layer)

        # pi_last_layer = maybe_td(tf.keras.layers.Flatten(name="pi_flatten"))(pi_last_layer)
        # unmasked_logits_out = maybe_td(tf.keras.layers.Dense(
        #     units=9,
        #     name="pi_unmasked_logits_out",
        #     activation=None,
        #     kernel_initializer=normc_initializer(0.01)))(pi_last_layer)
        # unmasked_logits_out = maybe_td(tf.keras.layers.Reshape(target_shape=[3,3,1]))(unmasked_logits_out)

        self._use_q_fn = model_config['custom_options']['q_fn']

        if self._use_q_fn:
            vf_last_layer = maybe_td(
                tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                       kernel_size=[3, 3],
                                       strides=1,
                                       activation=conv_activation,
                                       padding="same",
                                       name="{}_conv_{}".format(
                                           'vf', "last")))(vf_last_layer)

            value_out = maybe_td(
                tf.keras.layers.Conv2D(
                    filters=int(action_space.n / (rows * colums)),
                    kernel_size=[3, 3],
                    strides=1,
                    activation=None,
                    padding="same",
                    name="{}_conv_{}".format('vf', "q_out")))(vf_last_layer)
        else:

            vf_last_layer = maybe_td(
                tf.keras.layers.Conv2D(filters=1,
                                       kernel_size=[1, 1],
                                       strides=1,
                                       activation=conv_activation,
                                       padding="same",
                                       name="{}_conv_{}".format(
                                           'vf', "last")))(vf_last_layer)

            vf_last_layer = maybe_td(
                tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer)

            value_out = maybe_td(
                tf.keras.layers.Dense(
                    units=1,
                    name="vf_out",
                    activation=None,
                    kernel_initializer=normc_initializer(0.01)))(vf_last_layer)

        model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs]
        model_outputs = [unmasked_logits_out, value_out]

        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs,
                                         outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,):
        super(GRU, self).__init__(obs_space, action_space, num_outputs,
                                         model_config, name)

        # Define input layers
        if 'original_space' in dir(obs_space):
            curr_obs_space = obs_space.original_space.spaces["obs"]
        else:
            curr_obs_space = obs_space
        self.use_prev_action = model_config["custom_options"].get("use_prev_action")
        if self.use_prev_action:
            obs_shape = curr_obs_space.shape[0]
            action_shape = action_space.shape[0]
            input_layer = tf.keras.layers.Input(
                shape=(None, obs_shape + action_shape), name="inputs")
        else:
            input_layer = tf.keras.layers.Input(
                shape=(None, curr_obs_space.shape[0]), name="inputs")
        # Preprocess observations with the appropriate number of hidden layers
        last_layer = input_layer
        i = 1
        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        cell_size = model_config["custom_options"].get("cell_size")
        self.cell_size = cell_size
        state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ), name="h")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in")

        gru_out, state_h = tf.keras.layers.GRU(
            self.cell_size, return_sequences=True, return_state=True, name="gru")(
                inputs=last_layer,
                mask=tf.sequence_mask(seq_in),
                initial_state=[state_in_h])

        # Postprocess GRU output with another hidden layer and compute values using a shared layer
        logits = tf.keras.layers.Dense(
            self.num_outputs,
            activation=tf.keras.activations.linear,
            name="logits")(gru_out)
        values = tf.keras.layers.Dense(
            1, activation=None, name="values")(gru_out)

        inputs = [input_layer, seq_in, state_in_h]

        # Create the RNN model
        self.rnn_model = tf.keras.Model(
            inputs=inputs,
            outputs=[logits, values, state_h])
        self.register_variables(self.rnn_model.variables)
        self.rnn_model.summary()
Пример #10
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, twin_q):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        if self.use_lstm:
            raise NotImplementedError

        self.fake_lstm = model_config['custom_options'].get('fake_lstm', False)
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']
        self._use_q_fn = model_config['custom_options']['q_fn']

        self.twin_q = twin_q
        assert not (not self._use_q_fn and self.twin_q)
        if self.twin_q and self.use_lstm:
            raise NotImplementedError
        self._sac_alpha = model_config.get("sac_alpha", False)

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))

        if self.use_lstm:
            raise NotImplementedError
        else:
            state_in, seq_lens_in = None, None

            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape,
                name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in
            state_out = state_in
            for i, size in enumerate(model_config['fcnet_hiddens']):
                _last_layer = maybe_td(
                    tf.keras.layers.Dense(size,
                                          name="{}_fc_{}".format(prefix, i),
                                          activation=conv_activation,
                                          kernel_initializer=normc_initializer(
                                              1.0)))(_last_layer)

            return _last_layer, state_out

        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        self.main_vf_prefix = "main_vf" if self.twin_q else "vf"
        pi_last_layer, pi_state_out = build_primary_layers(
            prefix="pi", obs_in=self.pi_obs_inputs, state_in=pi_state_in)

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix=self.main_vf_prefix,
            obs_in=self.vf_obs_inputs,
            state_in=vf_state_in)
        if self.twin_q:
            twin_vf_last_layer, twin_vf_state_out = build_primary_layers(
                prefix="twin_vf", obs_in=self.vf_obs_inputs, state_in=None)
        else:
            twin_vf_last_layer, twin_vf_state_out = None, None

        if self.use_lstm:
            raise NotImplementedError
        else:
            state_out = None

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Dense(
                action_space.n,
                name="{}_fc_{}".format('pi', 'unmasked_logits'),
                activation=None,
                kernel_initializer=normc_initializer(1.0))(pi_last_layer))

        value_out = maybe_td(
            tf.keras.layers.Dense(
                action_space.n,
                name="{}_fc_{}".format(self.main_vf_prefix, 'q_out'),
                activation=None,
                kernel_initializer=normc_initializer(1.0))(vf_last_layer))

        if self.twin_q:
            twin_value_out = maybe_td(
                tf.keras.layers.Dense(action_space.n,
                                      name="{}_fc_{}".format(
                                          'twin_vf', 'q_out'),
                                      activation=None,
                                      kernel_initializer=normc_initializer(
                                          1.0))(twin_vf_last_layer))

        self.pi_model = tf.keras.Model(inputs=[self.pi_obs_inputs],
                                       outputs=[unmasked_logits_out])
        self.main_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs],
                                           outputs=[value_out])

        if self.twin_q:
            self.twin_q_model = tf.keras.Model(inputs=[self.vf_obs_inputs],
                                               outputs=[twin_value_out])
            print(self.twin_q_model.summary())
            self.register_variables(self.twin_q_model.variables)

        print(self.pi_model.summary())
        print(self.main_q_model.summary())

        self.register_variables(self.pi_model.variables)
        self.register_variables(self.main_q_model.variables)

        self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha")
        self.alpha = tf.exp(self.log_alpha)
        self.register_variables([self.log_alpha])
Пример #11
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(FullyConnectedNetworkWithMask,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation_list = []
        self.activation_value = None

        mask_mode = model_config.get("custom_options")["mask_mode"]
        assert mask_mode in ['multiply', 'add']

        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs
        i = 1

        mask_placeholder_dict = OrderedDict()
        self.mask_layer_dict = OrderedDict()
        self.default_mask = OrderedDict()

        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            for size in hiddens[:-1]:
                layer_name = "fc_{}".format(i)
                last_layer = tf.keras.layers.Dense(
                    size,
                    name=layer_name,
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)

                # here is the multiplication
                mask_name = "fc_{}_mask".format(i)
                mask_layer = MultiplyMaskLayer(size,
                                               name=mask_name,
                                               mask_mode=mask_mode)
                last_layer = mask_layer(last_layer)
                mask_placeholder_dict[mask_name] = mask_layer.get_kernel()
                self.mask_layer_dict[mask_name] = mask_layer

                activation_list.append(last_layer)
                i += 1

            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        else:
            # the last layer is a linear to size num_outputs
            for size in hiddens:
                layer_name = "fc_{}".format(i)
                last_layer = tf.keras.layers.Dense(
                    size,
                    name=layer_name,
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)

                # here is the multiplication
                mask_name = "fc_{}_mask".format(i)
                mask_layer = MultiplyMaskLayer(size,
                                               name=mask_name,
                                               mask_mode=mask_mode)
                last_layer = mask_layer(last_layer)
                mask_placeholder_dict[mask_name] = mask_layer.get_kernel()
                self.mask_layer_dict[mask_name] = mask_layer

                activation_list.append(last_layer)
                i += 1

            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)

        if not vf_share_layers:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            i = 1
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.mask_placeholder_dict = mask_placeholder_dict

        self.base_model = tf.keras.Model(inputs=inputs,
                                         outputs=[layer_out, value_out] +
                                         activation_list)
        # TODO we can add a flag to determine whether to return activation.

        self.register_variables(self.base_model.variables)
        self.register_variables(list(self.mask_placeholder_dict.values()))

        for name, layer in self.mask_layer_dict.items():
            self.default_mask[name] = layer.get_weights()
Пример #12
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(base_config=DEFAULT_STRATEGO_MODEL_CONFIG, extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.pi_obs_key = 'full_observation'
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            self.pi_obs_key = 'partial_observation'
            self.vf_obs_key = 'full_observation'
            assert not model_config['vf_share_layers']
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(observation_space=self.obs_space.original_space,
                                                                        options=model_config)
        else:
            self.preprocessor = None
            logger.warn("No custom preprocessor for StrategoModel was specified.\n"
                        "Some tree search policies may not initialize their placeholders correctly without this.")

        self.use_lstm = model_config['use_lstm']
        self.lstm_cell_size = model_config['lstm_cell_size']
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options']['mask_invalid_actions']

        conv_activation = get_activation_fn(model_config.get("conv_activation"))
        cnn_filters = model_config.get("conv_filters")
        fc_activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")

        if self.use_lstm:
            state_in = [tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_h"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="pi_lstm_c"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_h"),
                        tf.keras.layers.Input(shape=(self.lstm_cell_size,), name="vf_lstm_c")]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")
            
            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.pi_obs_key].shape), name="pi_observation")
    
            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape), name="vf_observation")
        
        else:
            state_in, seq_lens_in = None, None
           
            self.pi_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.pi_obs_key].shape, name="pi_observation")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape, name="vf_observation")
           
              
        if cnn_filters is None:
            
            # assuming board size will always remain the same for both pi and vf networks
            if self.use_lstm:
                single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
            else:
                single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
            cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer)
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor, state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format(prefix, i)))(_last_layer)

            _last_layer = maybe_td(tf.keras.layers.Flatten())(_last_layer)

            for i, size in enumerate(hiddens):
                _last_layer = maybe_td(tf.keras.layers.Dense(
                    size,
                    name="{}_fc_{}".format(prefix, i),
                    activation=fc_activation,
                    kernel_initializer=normc_initializer(1.0)))(_last_layer)

            if self.use_lstm:
                _last_layer, *state_out = tf.keras.layers.LSTM(
                    units=self.lstm_cell_size,
                    return_sequences=True,
                    return_state=True,
                    name="{}_lstm".format(prefix))(
                    inputs=_last_layer,
                    mask=tf.sequence_mask(seq_lens_in),
                    initial_state=state_in)
            else:
                state_out = None

            return _last_layer, state_out


        if self.use_lstm:
            pi_state_in = state_in[:2]
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        policy_file_path = None
        if 'policy_keras_model_file_path' in model_config['custom_options']:
            policy_file_path = model_config['custom_options']['policy_keras_model_file_path']
        if policy_file_path is not None:
            if self.use_lstm:
                raise NotImplementedError

            pi_state_out = None
            self._pi_model = load_model(filepath=policy_file_path, compile=False)
            # remove loaded input layer
            # pi_model.layers.pop(0)
            # self.pi_obs_inputs = pi_model.layers[0]

            # rename layers
            for layer in self._pi_model.layers:
                layer._name = "pi_" + layer.name
            self._pi_model.layers[-1]._name = 'pi_unmasked_logits'

            self.unmasked_logits_out = self._pi_model(self.pi_obs_inputs)

        else:
            self._pi_model = None
            pi_last_layer, pi_state_out = build_primary_layers(prefix="pi", obs_in=self.pi_obs_inputs,
                                                               state_in=pi_state_in)

            self.unmasked_logits_out = maybe_td(tf.keras.layers.Dense(
                num_outputs,
                name="pi_unmasked_logits",
                activation=None,
                kernel_initializer=normc_initializer(0.01)))(pi_last_layer)

        vf_last_layer, vf_state_out = build_primary_layers(prefix="vf", obs_in=self.vf_obs_inputs,
                                                           state_in=vf_state_in)

        if self.use_lstm:
            state_out = [*pi_state_out, *vf_state_out]
        else:
            state_out = None

        self._use_q_fn = model_config['custom_options']['q_fn']

        if self._use_q_fn:
            value_out_size = num_outputs
        else:
            value_out_size = 1

        value_out = maybe_td(tf.keras.layers.Dense(
            value_out_size,
            name="vf_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01)))(vf_last_layer)
        
        model_inputs = [self.pi_obs_inputs, self.vf_obs_inputs]
        model_outputs = [self.unmasked_logits_out, value_out]
        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs, outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self._obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self._obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            raise NotImplementedError
        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        self._action_dist_class, self._logit_dim = ModelCatalog.get_action_dist(
            self.action_space, model_config)

        self.use_lstm = model_config['use_lstm']
        self.fake_lstm = model_config['custom_options'].get('fake_lstm', False)

        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        base_lstm_filters = model_config["custom_options"]['base_lstm_filters']
        base_cnn_filters = model_config["custom_options"]['base_cnn_filters']
        pi_cnn_filters = model_config["custom_options"]['pi_cnn_filters']
        q_cnn_filters = model_config["custom_options"]['q_cnn_filters']

        rows = obs_space.original_space[self._obs_key].shape[0]
        colums = obs_space.original_space[self._obs_key].shape[1]

        if self.use_lstm:
            self._lstm_state_shape = (rows, colums, base_lstm_filters[0][0])

        if self.use_lstm and not self.fake_lstm:
            self._base_model_out_shape = (rows, colums,
                                          base_lstm_filters[0][0])
        else:
            self._base_model_out_shape = (rows, colums,
                                          base_cnn_filters[-1][0])

        if self.use_lstm:
            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="base_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="base_lstm_c")
            ]
            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self._obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self._obs_key].shape),
                name="observation")
            self._base_model_out = tf.keras.layers.Input(
                shape=self._base_model_out_shape, name="model_out")
        else:
            state_in, seq_lens_in = None, None
            self._obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self._obs_key].shape,
                name="observation")
            self._base_model_out = tf.keras.layers.Input(
                shape=self._base_model_out_shape, name="model_out")

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_shared_base_layers(prefix: str, obs_in: tf.Tensor,
                                     state_in: tf.Tensor):
            # obs_in = tf.debugging.check_numerics(
            #     obs_in, f"nan found in obs_in", name=None)

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(base_cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)
                # _last_layer = tf.debugging.check_numerics(
                #     _last_layer, f"nan found in _last_layer {i}", name=None)

            base_state_out = state_in
            if self.use_lstm and not self.fake_lstm:
                for i, (out_size, kernel,
                        stride) in enumerate(base_lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *base_state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        data_format='channels_last',
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            initial_state=state_in,
                            mask=tf.sequence_mask(seq_lens_in))

            return _last_layer, base_state_out

        def build_pi_layers(input_layer):
            _last_layer = input_layer
            for i, (out_size, kernel, stride) in enumerate(pi_cnn_filters):
                _last_layer = tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format('pi', i))(_last_layer)

            print(
                f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
            )

            unmasked_logits = tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('pi', "unmasked_logits"))(_last_layer)
            return unmasked_logits

        def build_q_layers(input_layer, prefix):
            _last_layer = input_layer
            for i, (out_size, kernel, stride) in enumerate(q_cnn_filters):
                _last_layer = tf.keras.layers.Conv2D(
                    filters=out_size,
                    kernel_size=kernel,
                    strides=stride,
                    activation=conv_activation,
                    padding="same",
                    name="{}_conv_{}".format(prefix, i))(_last_layer)

            q_val = tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format(prefix, "q_out"))(_last_layer)
            return q_val

        base_model_out, state_out = build_shared_base_layers(
            prefix="shared_base", obs_in=self._obs_inputs, state_in=state_in)
        pi_unmasked_logits_out = build_pi_layers(
            input_layer=self._base_model_out)
        q1_out = build_q_layers(input_layer=self._base_model_out, prefix="q1")
        q2_out = build_q_layers(input_layer=self._base_model_out, prefix="q2")

        base_inputs = [self._obs_inputs]
        base_outputs = [base_model_out]
        if self.use_lstm:
            base_inputs += [seq_lens_in, *state_in]
            base_outputs += [*state_out]

        self._base_model = tf.keras.Model(name=f"{name}_base",
                                          inputs=base_inputs,
                                          outputs=base_outputs)

        self.pi_model = tf.keras.Model(name=f"{name}_pi_head",
                                       inputs=[self._base_model_out],
                                       outputs=[pi_unmasked_logits_out])
        self.q1_model = tf.keras.Model(name=f"{name}_q1_head",
                                       inputs=[self._base_model_out],
                                       outputs=[q1_out])
        self.q2_model = tf.keras.Model(name=f"{name}_q2_head",
                                       inputs=[self._base_model_out],
                                       outputs=[q2_out])

        print(self._base_model.summary())
        print(self.pi_model.summary())
        print(self.q1_model.summary())
        print(self.q2_model.summary())

        self.register_variables(self._base_model.variables)
        self.register_variables(self.pi_model.variables)
        self.register_variables(self.q1_model.variables)
        self.register_variables(self.q2_model.variables)

        self.log_alpha = tf.Variable(0.0, dtype=tf.float32, name="log_alpha")
        self.alpha = tf.exp(self.log_alpha)
        self.register_variables([self.log_alpha])
Пример #14
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        super(FullyConnectedNetworkTanh,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)
        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        k = model_config["custom_options"]["num_components"]
        action_length = (num_outputs // k - 1) // 2
        assert num_outputs % k == 0
        assert (num_outputs // k - 1) % 2 == 0

        assert "std_norm" not in model_config["custom_options"]

        if model_config["custom_options"].get("std_mode") == "free":
            # learnable parameters
            log_stds = tf.get_variable(name="learnable_log_std",
                                       shape=[k * action_length],
                                       initializer=tf.zeros_initializer)
            num_outputs -= k * action_length
        elif model_config["custom_options"].get("std_mode") == "zero":
            log_stds = tf.ones(name="log_std", shape=[k * action_length])
            num_outputs -= k * action_length

        # we are using obs_flat, so take the flattened shape as input
        inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ),
                                       name="observations")
        last_layer = inputs
        i = 1

        if no_final_linear:
            raise NotImplementedError(
                "no_final_linear should be set to False.")
            # # the last layer is adjusted to be of size num_outputs
            # for size in hiddens[:-1]:
            #     last_layer = tf.keras.layers.Dense(
            #         size,
            #         name="fc_{}".format(i),
            #         activation=activation,
            #         kernel_initializer=normc_initializer(1.0))(last_layer)
            #     i += 1
            # layer_out = tf.keras.layers.Dense(
            #     num_outputs,
            #     name="fc_out",
            #     activation=activation,
            #     kernel_initializer=normc_initializer(1.0))(last_layer)
        else:
            # the last layer is a linear to size num_outputs
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation="tanh",  # <<== Here!
                kernel_initializer=normc_initializer(0.01))(last_layer)
            if model_config["custom_options"].get("std_mode") == "free":
                splits = tf.split(layer_out, [action_length * k, k], 1)
                with tf.control_dependencies(
                    [tf.variables_initializer([log_stds])]):
                    layer_out = tf.concat([
                        splits[0],
                        tf.broadcast_to(log_stds, tf.shape(splits[0])),
                        splits[1]
                    ],
                                          axis=1)
            elif model_config["custom_options"].get("std_mode") == "zero":
                splits = tf.split(layer_out, [action_length * k, k], 1)
                layer_out = tf.concat([
                    splits[0],
                    tf.broadcast_to(log_stds, tf.shape(splits[0])), splits[1]
                ],
                                      axis=1)
        if not vf_share_layers:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            i = 1
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.base_model = tf.keras.Model(inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)

        if model_config["custom_options"].get("std_mode") == "free":
            self.register_variables([log_stds])
Пример #15
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name):
        super(ObedienceLSTM, self).__init__(obs_space, action_space, num_outputs,
                                            model_config, name)

        self._value_out = -1
        self.obs_space = obs_space
        vision_space = self.obs_space.original_space.spaces[0]
        message_space = self.obs_space.original_space.spaces[1]

        # The inputs of the shared trunk. We will concatenate the observation space with shared info about the
        # visibility of agents. Currently we assume all the agents have equally sized action spaces.
        self.num_outputs = num_outputs
        self.num_agents = model_config["custom_options"]["num_agents"]
        self.num_symbols = model_config["custom_options"]["num_symbols"]
        self.cell_size = model_config["custom_options"].get("cell_size")

        # an extra none for the time dimension
        inputs = tf.keras.layers.Input(
            shape=(None,) + vision_space.shape, name="observations")

        # Build the CNN layer
        last_layer = inputs
        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                channels_last=True,
                name="conv{}".format(i)))(last_layer)
        out_size, kernel, stride = filters[-1]
        if len(filters) == 1:
            i = -1

        # should be batch x time x height x width x channel
        conv_out = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
            out_size,
            kernel,
            strides=(stride, stride),
            activation=activation,
            padding="valid",
            name="conv{}".format(i + 1)))(last_layer)

        flat_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(conv_out)

        # Add the fully connected layers
        hiddens = model_config["custom_options"].get("fcnet_hiddens")
        activation = get_activation_fn(model_config.get("fcnet_activation"))
        last_layer = flat_layer
        i = 1
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}_{}".format(i, name),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)  # ME: string custom initializer
            i += 1

        messages_layer = tf.keras.layers.Input(shape=((None,) + message_space.shape), name="messages")
        last_layer = tf.keras.layers.concatenate([last_layer, messages_layer])

        state_in_h = tf.keras.layers.Input(shape=(self.cell_size,), name="h")
        state_in_c = tf.keras.layers.Input(shape=(self.cell_size,), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            self.cell_size, return_sequences=True, return_state=True, name="lstm")(
            inputs=last_layer,
            mask=tf.sequence_mask(seq_in),
            initial_state=[state_in_h, state_in_c])

        num_actions_out = action_space.nvec[0]
        num_messages_out = self.num_outputs - num_actions_out

        # Postprocess LSTM output with another hidden layer and compute values
        logits = tf.keras.layers.Dense(
            num_actions_out,
            activation=tf.keras.activations.linear,
            name=name)(lstm_out)

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(lstm_out)

        message_logits = tf.keras.layers.Dense(
            num_messages_out,
            activation=tf.keras.activations.linear,
            name=f'message_{name}')(lstm_out)

        inputs = [inputs, messages_layer, seq_in, state_in_h, state_in_c]
        self.rnn_model = tf.keras.Model(
            inputs=inputs,
            outputs=[logits, value_out, message_logits, state_h, state_c])

        self.register_variables(self.rnn_model.variables)
Пример #16
0
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        super(ConvLSTM, self).__init__(obs_space, action_space, num_outputs,
                                    model_config, name)
        self.obs_space = obs_space
        self.num_outputs = num_outputs

        ## Batch x Time x H x W x C
        input_layer = tf.keras.layers.Input(shape=(None,) + obs_space.shape, name="inputs")

        conv_activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        last_layer = input_layer
        for i, (out_size, kernel, stride) in enumerate(filters):
            ## Batch x Time x H x W x C 
            # Time distributed ensures that the conv operates on each image independently
            last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=conv_activation,
                padding="same",
                name="conv{}".format(i)))(last_layer)
        
        last_layer = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(last_layer)

        # If true we append the actions into the layer after the conv
        self.use_prev_action = model_config["custom_options"].get("use_prev_action")
        if self.use_prev_action:
            actions_layer = tf.keras.layers.Input(shape=(None, action_space.shape), name="agent_actions")
            last_layer = tf.keras.layers.concatenate([last_layer, actions_layer])

        hiddens = model_config["custom_options"].get("fcnet_hiddens") # should be list of lists
        assert type(hiddens) == list
        assert type(hiddens[0]) == list
        assert type(hiddens[1]) == list
        i = 1
        fc_activation = get_activation_fn(model_config.get("fcnet_activation"))
        for size in hiddens[0]:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=fc_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        self.cell_size = model_config['lstm_cell_size']

        state_in_h = tf.keras.layers.Input(shape=(self.cell_size,), name="h")
        state_in_c = tf.keras.layers.Input(shape=(self.cell_size,), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        # expects B x T x (H*W*C)
        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            self.cell_size, return_sequences=True, return_state=True, name="lstm")(
            inputs=last_layer,
            mask=tf.sequence_mask(seq_in),
            initial_state=[state_in_h, state_in_c])        
        # output: B x cell_size
        
        last_layer = lstm_out
        for size in hiddens[1]:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=fc_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        action = tf.keras.layers.Dense(
            self.num_outputs,
            activation=tf.keras.activations.linear,
            name="action_logits")(last_layer)

        values = tf.keras.layers.Dense(
            1, activation=None, name="values")(last_layer)

        inputs = [input_layer, seq_in, state_in_h, state_in_c]
        if self.use_prev_action:
            inputs.insert(1, actions_layer)
        outputs = [action, values, state_h, state_c]

        self.rnn_model = tf.keras.Model(
                inputs=inputs,
                outputs=outputs)
        self.register_variables(self.rnn_model.variables)
        self.rnn_model.summary()
Пример #17
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(ActorDoubleCriticNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")
        self.use_diversity_value_network = model_config['custom_options'][
            "use_diversity_value_network"]

        # we are using obs_flat, so take the flattened shape as input
        inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ),
                                       name="observations")
        last_layer = inputs
        i = 1

        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            for size in hiddens[:-1]:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        else:
            # the last layer is a linear to size num_outputs
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)

        # pengzh: we use three different NN with same size.
        assert not vf_share_layers
        last_layer = inputs
        i = 1
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_value_{}".format(i),
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        if self.use_diversity_value_network:
            # build the value network for novel
            last_layer = inputs
            i = 1
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_novel_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1

            value_out_novel = tf.keras.layers.Dense(
                1,
                name="value_out_novel",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)

            self.base_model = tf.keras.Model(
                inputs, [layer_out, value_out, value_out_novel])
        else:
            self.base_model = tf.keras.Model(inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)
Пример #18
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(FullyConnectedNetwork,
              self).__init__(obs_space, action_space, num_outputs,
                             model_config, name)

        activation = get_activation_fn(model_config.get("fcnet_activation"))
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        # we are using obs_flat, so take the flattened shape as input
        inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ),
                                       name="observations")
        last_layer = inputs
        i = 1

        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            for size in hiddens[:-1]:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
        else:
            # the last layer is a linear to size num_outputs
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1
            layer_out = tf.keras.layers.Dense(
                num_outputs,
                name="fc_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)

        if not vf_share_layers:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            i = 1
            for size in hiddens:
                last_layer = tf.keras.layers.Dense(
                    size,
                    name="fc_value_{}".format(i),
                    activation=activation,
                    kernel_initializer=normc_initializer(1.0))(last_layer)
                i += 1

        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=None,
            kernel_initializer=normc_initializer(0.01))(last_layer)

        self.base_model = tf.keras.Model(inputs, [layer_out, value_out])
        self.register_variables(self.base_model.variables)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(LSTM, self).__init__(obs_space, action_space, num_outputs,
                                   model_config, name)
        self.obs_space = obs_space
        self.num_outputs = num_outputs

        ## Batch x Time x H x W x C
        input_layer = tf.keras.layers.Input((None, ) + obs_space.shape,
                                            name="inputs")

        # If true we append the actions into the layer after the conv
        self.lstm_use_prev_action_reward = model_config.get(
            "lstm_use_prev_action_reward")
        if self.lstm_use_prev_action_reward:
            actions_layer = tf.keras.layers.Input(shape=(None, ) +
                                                  action_space.shape,
                                                  name="agent_actions")
            input_layer = tf.keras.layers.Concatenate()(
                [input_layer, actions_layer])

        last_layer = input_layer

        hiddens = model_config.get("fcnet_hiddens")
        i = 1
        fc_activation = get_activation_fn(model_config.get("fcnet_activation"))
        for size in hiddens:
            last_layer = tf.keras.layers.Dense(
                size,
                name="fc_{}".format(i),
                activation=fc_activation,
                kernel_initializer=normc_initializer(1.0))(last_layer)
            i += 1

        self.cell_size = model_config['lstm_cell_size']

        state_in_h = tf.keras.layers.Input(shape=(self.cell_size, ), name="h")
        state_in_c = tf.keras.layers.Input(shape=(self.cell_size, ), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        # expects B x T x (Features)
        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            self.cell_size,
            return_sequences=True,
            return_state=True,
            name="lstm")(inputs=last_layer,
                         mask=tf.sequence_mask(seq_in),
                         initial_state=[state_in_h, state_in_c])
        # output: B x cell_size

        last_layer = lstm_out

        action = tf.keras.layers.Dense(self.num_outputs,
                                       activation=tf.keras.activations.linear,
                                       name="action_logits")(last_layer)

        values = tf.keras.layers.Dense(1, activation=None,
                                       name="values")(last_layer)

        inputs = [input_layer, seq_in, state_in_h, state_in_c]
        if self.lstm_use_prev_action_reward:
            inputs.insert(1, actions_layer)
        outputs = [action, values, state_h, state_c]

        self.rnn_model = tf.keras.Model(inputs=inputs, outputs=outputs)
        self.register_variables(self.rnn_model.variables)
        self.rnn_model.summary()
Пример #20
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(VisionNetwork, self).__init__(obs_space, action_space,
                                            num_outputs, model_config, name)

        activation = get_activation_fn(model_config.get("conv_activation"))
        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        no_final_linear = model_config.get("no_final_linear")
        vf_share_layers = model_config.get("vf_share_layers")

        inputs = tf.keras.layers.Input(shape=obs_space.shape,
                                       name="observations")
        last_layer = inputs

        # Build the action layers
        for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="same",
                name="conv{}".format(i))(last_layer)
        out_size, kernel, stride = filters[-1]
        if no_final_linear:
            # the last layer is adjusted to be of size num_outputs
            last_layer = tf.keras.layers.Conv2D(num_outputs,
                                                kernel,
                                                strides=(stride, stride),
                                                activation=activation,
                                                padding="valid",
                                                name="conv_out")(last_layer)
            conv_out = last_layer
        else:
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                name="conv{}".format(i + 1))(last_layer)
            conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1],
                                              activation=None,
                                              padding="same",
                                              name="conv_out")(last_layer)

        # Build the value layers
        if vf_share_layers:
            last_layer = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)
            value_out = tf.keras.layers.Dense(
                1,
                name="value_out",
                activation=None,
                kernel_initializer=normc_initializer(0.01))(last_layer)
        else:
            # build a parallel set of hidden layers for the value net
            last_layer = inputs
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
                last_layer = tf.keras.layers.Conv2D(
                    out_size,
                    kernel,
                    strides=(stride, stride),
                    activation=activation,
                    padding="same",
                    name="conv_value_{}".format(i))(last_layer)
            out_size, kernel, stride = filters[-1]
            last_layer = tf.keras.layers.Conv2D(
                out_size,
                kernel,
                strides=(stride, stride),
                activation=activation,
                padding="valid",
                name="conv_value_{}".format(i + 1))(last_layer)
            last_layer = tf.keras.layers.Conv2D(
                1, [1, 1],
                activation=None,
                padding="same",
                name="conv_value_out")(last_layer)
            value_out = tf.keras.layers.Lambda(
                lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer)

        self.base_model = tf.keras.Model(inputs, [conv_out, value_out])
        self.register_variables(self.base_model.variables)
Пример #21
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 q_hiddens=None,
                 dueling=False,
                 num_atoms=1,
                 use_noisy=False,
                 v_min=-10.0,
                 v_max=10.0,
                 sigma0=0.5,
                 parameter_noise=False):

        if q_hiddens or dueling or num_atoms != 1 or use_noisy:
            raise NotImplementedError

        model_config = with_base_config(
            base_config=DEFAULT_STRATEGO_MODEL_CONFIG,
            extra_config=model_config)
        TFModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)

        print(model_config)

        observation_mode = model_config['custom_options']['observation_mode']
        if observation_mode == PARTIALLY_OBSERVABLE:
            self.vf_obs_key = 'partial_observation'
        elif observation_mode == FULLY_OBSERVABLE:
            self.vf_obs_key = 'full_observation'
        elif observation_mode == BOTH_OBSERVATIONS:
            raise ValueError(
                f"Using {BOTH_OBSERVATIONS} format doesn't make sense for a Q-network, there's no policy, just a Q-function"
            )

        else:
            assert False, "policy observation_mode must be in [PARTIALLY_OBSERVABLE, FULLY_OBSERVABLE, BOTH_OBSERVATIONS]"

        if model_config["custom_preprocessor"]:
            print(obs_space)

            self.preprocessor = ModelCatalog.get_preprocessor_for_space(
                observation_space=self.obs_space.original_space,
                options=model_config)
        else:
            self.preprocessor = None
            logger.warn(
                "No custom preprocessor for StrategoModel was specified.\n"
                "Some tree search policies may not initialize their placeholders correctly without this."
            )

        self.use_lstm = model_config['use_lstm']
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.mask_invalid_actions = model_config['custom_options'][
            'mask_invalid_actions']

        conv_activation = get_activation_fn(
            model_config.get("conv_activation"))
        lstm_filters = model_config["custom_options"]['lstm_filters']
        cnn_filters = model_config.get("conv_filters")
        final_pi_filter_amt = model_config["custom_options"][
            "final_pi_filter_amt"]

        rows = obs_space.original_space[self.vf_obs_key].shape[0]
        colums = obs_space.original_space[self.vf_obs_key].shape[1]

        if self.use_lstm:
            self._lstm_state_shape = (rows, colums, lstm_filters[0][0])
            # self._lstm_state_shape = (64,)

        if self.use_lstm:
            state_in = [
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_h"),
                tf.keras.layers.Input(shape=self._lstm_state_shape,
                                      name="vf_lstm_c")
            ]

            seq_lens_in = tf.keras.layers.Input(shape=(), name="lstm_seq_in")

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=(None, *obs_space.original_space[self.vf_obs_key].shape),
                name="vf_observation")

        else:
            state_in, seq_lens_in = None, None

            self.vf_obs_inputs = tf.keras.layers.Input(
                shape=obs_space.original_space[self.vf_obs_key].shape,
                name="vf_observation")

        # if pi_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     pi_cnn_filters = _get_filter_config(single_obs_input_shape)
        #
        # if v_cnn_filters is None:
        #     assert False
        #     # assuming board size will always remain the same for both pi and vf networks
        #     if self.use_lstm:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[2:]
        #     else:
        #         single_obs_input_shape = self.pi_obs_inputs.shape.as_list()[1:]
        #     v_cnn_filters = _get_filter_config(single_obs_input_shape)

        def maybe_td(layer):
            if self.use_lstm:
                return tf.keras.layers.TimeDistributed(layer=layer,
                                                       name=f"td_{layer.name}")
            else:
                return layer

        def build_primary_layers(prefix: str, obs_in: tf.Tensor,
                                 state_in: tf.Tensor):
            # encapsulated in a function to either be called once for shared policy/vf or twice for separate policy/vf

            _last_layer = obs_in

            for i, (out_size, kernel, stride) in enumerate(cnn_filters):
                _last_layer = maybe_td(
                    tf.keras.layers.Conv2D(filters=out_size,
                                           kernel_size=kernel,
                                           strides=stride,
                                           activation=conv_activation,
                                           padding="same",
                                           name="{}_conv_{}".format(
                                               prefix, i)))(_last_layer)

                if parameter_noise:
                    # assuming inputs shape (batch_size x w x h x channel)
                    _last_layer = maybe_td(
                        tf.keras.layers.LayerNormalization(
                            axis=(1, 2),
                            name=f"{prefix}_LayerNorm_{i}"))(_last_layer)

            state_out = state_in
            if self.use_lstm:
                for i, (out_size, kernel, stride) in enumerate(lstm_filters):
                    if i > 0:
                        raise NotImplementedError(
                            "Only single lstm layers are implemented right now"
                        )

                    _last_layer, *state_out = tf.keras.layers.ConvLSTM2D(
                        filters=out_size,
                        kernel_size=kernel,
                        strides=stride,
                        activation=conv_activation,
                        padding="same",
                        return_sequences=True,
                        return_state=True,
                        name="{}_convlstm".format(prefix))(
                            inputs=_last_layer,
                            mask=tf.sequence_mask(seq_lens_in),
                            initial_state=state_in)
                    raise NotImplementedError(
                        "havent checked lstms for q model"
                        "")
            return _last_layer, state_out

        if self.use_lstm:
            vf_state_in = state_in[2:]
        else:
            pi_state_in, vf_state_in = None, None

        vf_last_layer, vf_state_out = build_primary_layers(
            prefix="vf", obs_in=self.vf_obs_inputs, state_in=vf_state_in)

        if self.use_lstm:
            state_out = vf_state_out
        else:
            state_out = None

        vf_last_layer = maybe_td(
            tf.keras.layers.Conv2D(filters=final_pi_filter_amt,
                                   kernel_size=[3, 3],
                                   strides=1,
                                   activation=conv_activation,
                                   padding="same",
                                   name="{}_conv_{}".format(
                                       'vf', "last")))(vf_last_layer)

        if parameter_noise:
            # assuming inputs shape (batch_size x w x h x channel)
            vf_last_layer = maybe_td(
                tf.keras.layers.LayerNormalization(
                    axis=(1, 2), name=f"vf_LayerNorm_last"))(vf_last_layer)

        print(
            f"action space n: {action_space.n}, rows: {rows}, columns: {colums}, filters: {int(action_space.n / (rows * colums))}"
        )

        unmasked_logits_out = maybe_td(
            tf.keras.layers.Conv2D(
                filters=int(action_space.n / (rows * colums)),
                kernel_size=[3, 3],
                strides=1,
                activation=None,
                padding="same",
                name="{}_conv_{}".format('vf',
                                         "unmasked_logits")))(vf_last_layer)

        # vf_last_layer = maybe_td(tf.keras.layers.Conv2D(
        #     filters=1,
        #     kernel_size=[1, 1],
        #     strides=1,
        #     activation=conv_activation,
        #     padding="same",
        #     name="{}_conv_{}".format('vf', "last")))(vf_last_layer)
        #
        # vf_last_layer = maybe_td(tf.keras.layers.Flatten(name="vf_flatten"))(vf_last_layer)
        #
        # value_out = maybe_td(tf.keras.layers.Dense(
        #     units=1,
        #     name="vf_out",
        #     activation=None,
        #     kernel_initializer=normc_initializer(0.01)))(vf_last_layer)

        model_inputs = [self.vf_obs_inputs]
        model_outputs = [unmasked_logits_out]

        if self.use_lstm:
            model_inputs += [seq_lens_in, *state_in]
            model_outputs += state_out

        self.base_model = tf.keras.Model(inputs=model_inputs,
                                         outputs=model_outputs)

        print(self.base_model.summary())

        self.register_variables(self.base_model.variables)