Пример #1
0
    def __init__(self, net, dim_actions):
        super().__init__()
        self.net = net
        self.dim_actions = dim_actions

        self.action_distribution = CategoricalNet(self.net.output_size,
                                                  self.dim_actions)
        self.critic = CriticHead(self.net.output_size)
Пример #2
0
    def __init__(self, net, dim_actions):
        super().__init__()
        self.net = net
        self.dim_actions = dim_actions

        self.supervise_stop = False

        if self.supervise_stop:
            self.non_stop_action_distribution = CategoricalNet(
                self.net.output_size, self.dim_actions - 1)

            self.stop_action_distribution = CategoricalNet(
                self.net.output_size, 2)
        else:
            self.action_distribution = CategoricalNet(self.net.output_size,
                                                      self.dim_actions)

        self.critic = CriticHead(self.net.output_size)
Пример #3
0
    def __init__(
        self,
        observation_space,
        action_space,
        goal_sensor_uuid,
        hidden_size=512,
    ):
        super().__init__()
        self.dim_actions = action_space.n
        self.goal_sensor_uuid = goal_sensor_uuid
        self.net = Net(
            observation_space=observation_space,
            hidden_size=hidden_size,
            goal_sensor_uuid=goal_sensor_uuid,
        )

        self.action_distribution = CategoricalNet(self.net.output_size,
                                                  self.dim_actions)