Пример #1
0
    def __init__(
        self,
        observation_shapes: List[Tuple[int, ...]],
        network_settings: NetworkSettings,
        act_type: ActionType,
        act_size: List[int],
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
        super().__init__()
        self.act_type = act_type
        self.act_size = act_size
        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
        self.is_continuous_int = torch.nn.Parameter(
            torch.Tensor([int(act_type == ActionType.CONTINUOUS)]))
        self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size))
        self.network_body = NetworkBody(observation_shapes, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
            self.encoding_size = network_settings.hidden_units

        if self.act_type == ActionType.CONTINUOUS:
            self.distribution = GaussianDistribution(
                self.encoding_size,
                act_size[0],
                conditional_sigma=conditional_sigma,
                tanh_squash=tanh_squash,
            )
        else:
            self.distribution = MultiCategoricalDistribution(
                self.encoding_size, act_size)
Пример #2
0
 def forward(
     self,
     vec_inputs: List[torch.Tensor],
     vis_inputs: List[torch.Tensor],
     masks: Optional[torch.Tensor] = None,
     memories: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, int, int, int, int]:
     """
     Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.
     """
     dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
     if self.action_spec.is_continuous():
         action_list = self.sample_action(dists)
         action_out = torch.stack(action_list, dim=-1)
         if self._clip_action_on_export:
             action_out = torch.clamp(action_out, -3, 3) / 3
     else:
         action_out = torch.cat([dist.all_log_prob() for dist in dists],
                                dim=1)
     return (
         action_out,
         self.version_number,
         torch.Tensor([self.network_body.memory_size]),
         self.is_continuous_int,
         self.act_size_vector,
     )
Пример #3
0
    def __init__(
        self,
        observation_shapes: List[Tuple[int, ...]],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
        super().__init__()
        self.action_spec = action_spec
        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
        self.is_continuous_int_deprecated = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.is_continuous())])
        )
        self.continuous_act_size_vector = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False
        )
        # TODO: export list of branch sizes instead of sum
        self.discrete_act_size_vector = torch.nn.Parameter(
            torch.Tensor([sum(self.action_spec.discrete_branches)]), requires_grad=False
        )
        self.act_size_vector_deprecated = torch.nn.Parameter(
            torch.Tensor(
                [
                    self.action_spec.continuous_size
                    + sum(self.action_spec.discrete_branches)
                ]
            ),
            requires_grad=False,
        )
        self.network_body = NetworkBody(observation_shapes, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
            self.encoding_size = network_settings.hidden_units

        self.action_model = ActionModel(
            self.encoding_size,
            action_spec,
            conditional_sigma=conditional_sigma,
            tanh_squash=tanh_squash,
        )
Пример #4
0
    def __init__(
        self,
        observation_shapes: List[Tuple[int, ...]],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
        super().__init__()
        self.action_spec = action_spec
        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
        self.is_continuous_int = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.is_continuous())]))
        self.act_size_vector = torch.nn.Parameter(
            torch.Tensor([
                self.action_spec.continuous_size +
                sum(self.action_spec.discrete_branches)
            ]),
            requires_grad=False,
        )
        self.network_body = NetworkBody(observation_shapes, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
            self.encoding_size = network_settings.hidden_units

        if self.action_spec.is_continuous():
            self.distribution = GaussianDistribution(
                self.encoding_size,
                self.action_spec.continuous_size,
                conditional_sigma=conditional_sigma,
                tanh_squash=tanh_squash,
            )
        else:
            self.distribution = MultiCategoricalDistribution(
                self.encoding_size, self.action_spec.discrete_branches)
        # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
        # export as well.
        self._clip_action_on_export = not tanh_squash
Пример #5
0
    def forward(
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
    ) -> Tuple[Union[int, torch.Tensor], ...]:
        """
        Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.

        At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
        so the size of return tuple varies with action spec.
        """
        encoding, memories_out = self.network_body(vec_inputs,
                                                   vis_inputs,
                                                   memories=memories,
                                                   sequence_length=1)

        (
            cont_action_out,
            disc_action_out,
            action_out_deprecated,
        ) = self.action_model.get_action_out(encoding, masks)
        export_out = [
            self.version_number,
            torch.Tensor([self.network_body.memory_size]),
        ]
        if self.action_spec.continuous_size > 0:
            export_out += [cont_action_out, self.continuous_act_size_vector]
        if self.action_spec.discrete_size > 0:
            export_out += [disc_action_out, self.discrete_act_size_vector]
        # Only export deprecated nodes with non-hybrid action spec
        if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
            export_out += [
                action_out_deprecated,
                self.is_continuous_int_deprecated,
                self.act_size_vector_deprecated,
            ]
        return tuple(export_out)
Пример #6
0
    def __init__(
        self,
        observation_specs: List[ObservationSpec],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
        super().__init__()
        self.action_spec = action_spec
        self.version_number = torch.nn.Parameter(
            torch.Tensor([self.MODEL_EXPORT_VERSION]), requires_grad=False
        )
        self.is_continuous_int_deprecated = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.is_continuous())]), requires_grad=False
        )
        self.continuous_act_size_vector = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False
        )
        self.discrete_act_size_vector = torch.nn.Parameter(
            torch.Tensor([self.action_spec.discrete_branches]), requires_grad=False
        )
        self.act_size_vector_deprecated = torch.nn.Parameter(
            torch.Tensor(
                [
                    self.action_spec.continuous_size
                    + sum(self.action_spec.discrete_branches)
                ]
            ),
            requires_grad=False,
        )
        self.network_body = NetworkBody(observation_specs, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
            self.encoding_size = network_settings.hidden_units
        self.memory_size_vector = torch.nn.Parameter(
            torch.Tensor([int(self.network_body.memory_size)]), requires_grad=False
        )

        self.action_model = ActionModel(
            self.encoding_size,
            action_spec,
            conditional_sigma=conditional_sigma,
            tanh_squash=tanh_squash,
        )
Пример #7
0
 def forward(
     self,
     vec_inputs: List[torch.Tensor],
     vis_inputs: List[torch.Tensor],
     masks: Optional[torch.Tensor] = None,
     memories: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, int, int, int, int]:
     """
     Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.
     """
     dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
     action_list = self.sample_action(dists)
     sampled_actions = torch.stack(action_list, dim=-1)
     if self.act_type == ActionType.CONTINUOUS:
         action_out = sampled_actions
     else:
         action_out = dists[0].all_log_prob()
     return (
         action_out,
         self.version_number,
         torch.Tensor([self.network_body.memory_size]),
         self.is_continuous_int,
         self.act_size_vector,
     )
Пример #8
0
 def __init__(self, lr):
     # Todo: add learning rate decay
     super().__init__()
     self.learning_rate = torch.Tensor([lr])
Пример #9
0
 def __init__(self):
     super().__init__()
     self.__global_step = nn.Parameter(torch.Tensor([0]).to(torch.int64),
                                       requires_grad=False)
Пример #10
0
def test_swish():
    layer = Swish()
    input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])
    target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor))
    assert torch.all(torch.eq(layer(input_tensor), target_tensor))