Пример #1
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens", [])
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([int(np.product(obs_space.shape))] +
                                    hiddens[-1:])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(*vf_layers)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Пример #2
0
    def __init__(self,
                 obs_space,
                 action_space,
                 num_outputs,
                 model_config,
                 name,
                 num_decompose=2):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        self.num_decompose = num_decompose

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                # self._logits = torch.nn.ModuleList([
                #     torch.nn.Sequential(
                #         SlimFC(
                #             in_size=prev_layer_size,
                #             out_size=256,
                #             initializer=normc_initializer(1.0),
                #             activation_fn=activation),
                #         SlimFC(
                #             in_size=256,
                #             out_size=num_outputs,
                #             initializer=normc_initializer(1.0),
                #             activation_fn=None),
                #     ) for i in range(self.num_decompose)])
                # self._logits = torch.nn.ModuleList([
                #     torch.nn.Sequential(
                #         torch.nn.Linear(prev_layer_size, 256),
                #         torch.nn.ReLU(),
                #         torch.nn.Linear(256, num_outputs),
                #     ) for i in range(self.num_decompose)])

                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs *
                                      self.num_decompose,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                raise ValueError("No num_outputs")

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            self._value_branch_separate = []
            for size in hiddens:
                self._value_branch_separate.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(
                *self._value_branch_separate)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=self.num_decompose,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Пример #3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.free_log_std = model_config.get("free_log_std")

        # TODO(sven): implement case: vf_shared_layers = False.
        # vf_share_layers = model_config.get("vf_share_layers")

        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Maybe generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        # Create layers 0 to second-last.
        for size in hiddens[:-1]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        # The last layer is adjusted to be of size num_outputs, but it's a
        # layer with activation.
        if no_final_linear and num_outputs:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=num_outputs,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = num_outputs
        # Finish the layers with the provided sizes (`hiddens`), plus -
        # iff num_outputs > 0 - a last linear layer of size num_outputs.
        else:
            if len(hiddens) > 0:
                layers.append(
                    SlimFC(in_size=prev_layer_size,
                           out_size=hiddens[-1],
                           initializer=normc_initializer(1.0),
                           activation_fn=activation))
                prev_layer_size = hiddens[-1]
            if num_outputs:
                self._logits = SlimFC(in_size=prev_layer_size,
                                      out_size=num_outputs,
                                      initializer=normc_initializer(0.01),
                                      activation_fn=None)
            else:
                self.num_outputs = ([np.product(obs_space.shape)] +
                                    hiddens[-1:-1])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        # TODO(sven): Implement non-shared value branch.
        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, control_input_size, control_hidden_size,
                 interaction_hidden_size):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        no_final_linear = model_config.get(
            "no_final_linear")  # TODO Handle no_final_linear
        assert (not no_final_linear, "Not Implemented yet bro")

        self.vf_share_layers = model_config.get("vf_share_layers")
        self.vf_hiddens = model_config.get("vf_hiddens", [10, 10])
        self.free_log_std = model_config.get("free_log_std")
        self.control_input_size = control_input_size
        self.interaction_input_size = 2
        assert (np.product(obs_space.shape) == self.control_input_size +
                self.interaction_input_size, "Wrong size of obs space")
        control_hidden_size = control_hidden_size
        interaction_hidden_size = interaction_hidden_size
        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")

        # Are the std required as output for the action
        self.std = ((num_outputs / 2) == np.product(action_space.shape))

        # Are the log_std varies with state or not
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        self._logits = None  # Output of the network, called logits for consistency with the rest of RLlib

        # Build the Negotiate model
        self.linear_1 = SlimFC(self.control_input_size,
                               control_hidden_size,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)
        self.linear_2_mean = SlimFC(control_hidden_size,
                                    2,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self.linear_accept_1 = SlimFC(self.interaction_input_size,
                                      interaction_hidden_size,
                                      initializer=normc_initializer(1.0),
                                      activation_fn=activation)
        self.linear_accept_2_mean = SlimFC(interaction_hidden_size,
                                           1,
                                           initializer=normc_initializer(0.01),
                                           activation_fn=None)
        self.control = nn.Sequential(self.linear_1, self.linear_2_mean)
        self.interaction = nn.Sequential(self.linear_accept_1,
                                         self.linear_accept_2_mean)
        self.linear_coop_mean = AppendBiasLayer(1)

        if self.std:
            if not self.free_log_std:
                self.linear_2_std = SlimFC(control_hidden_size,
                                           2,
                                           initializer=normc_initializer(0.01),
                                           activation_fn=None)
                self.linear_accept_2_std = SlimFC(
                    interaction_hidden_size,
                    1,
                    initializer=normc_initializer(0.01),
                    activation_fn=None)
                self.linear_coop_std = AppendBiasLayer(1)
                self.control_std = nn.Sequential(self.linear_1,
                                                 self.linear_2_std)
                self.interaction_std = nn.Sequential(self.linear_accept_1,
                                                     self.linear_accept_2_std)
                self.coop_std = AppendBiasLayer(1)
            else:
                self._append_free_log_std = AppendBiasLayer(num_outputs)

        # value function
        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in self.vf_hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
                prev_layer_size = prev_vf_layer_size
            self._value_branch_separate = nn.Sequential(*vf_layers)
        else:
            raise NotImplemented()
        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        self._value_module = nn.Sequential(self._value_branch_separate,
                                           self._value_branch)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Пример #5
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = model_config.get("fcnet_activation")
        hiddens = model_config.get("fcnet_hiddens")
        no_final_linear = model_config.get("no_final_linear")
        self.vf_share_layers = model_config.get("vf_share_layers")
        self.free_log_std = model_config.get("free_log_std")

        # Generate free-floating bias variables for the second half of
        # the outputs.
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in hiddens:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            prev_layer_size = size

        if num_outputs:
            self._logits = MultiActionFC(in_size=prev_layer_size,
                                         out_size=num_outputs,
                                         out_lens=[3, 10],
                                         at_hiddens=[32, 3],
                                         ap_hiddens=[32, 10],
                                         initializer=normc_initializer(0.01),
                                         activation=activation)
        else:
            self.num_outputs = ([int(np.product(obs_space.shape))] +
                                hiddens[-1:])[-1]

        # Layer to add the log std vars to the state-dependent means.
        if self.free_log_std and self._logits:
            self._append_free_log_std = AppendBiasLayer(num_outputs)

        self._hidden_layers = nn.Sequential(*layers)

        self._value_branch_separate = None
        if not self.vf_share_layers:
            # Build a parallel set of hidden layers for the value net.
            prev_vf_layer_size = int(np.product(obs_space.shape))
            vf_layers = []
            for size in hiddens:
                vf_layers.append(
                    SlimFC(in_size=prev_vf_layer_size,
                           out_size=size,
                           activation_fn=activation,
                           initializer=normc_initializer(1.0)))
                prev_vf_layer_size = size
            self._value_branch_separate = nn.Sequential(*vf_layers)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        # Holds the current "base" output (before logits layer).
        self._features = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None