Пример #1
0
    def value(self, obs):
        obs = obs / 255.0
        out = self.conv1(obs)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv2(out)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv3(out)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv4(out)
        out = layers.flatten(out, axis=1)

        if self.algo == 'Dueling':
            As = self.fc2_adv(self.fc1_adv(out))
            V = self.fc2_val(self.fc1_val(out))
            Q = As + (V - layers.reduce_mean(As, dim=1, keep_dim=True))
        else:
            Q = self.fc1(out)
        return Q
Пример #2
0
 def value(self, obs):
     obs = layers.flatten(obs, axis=1)
     hid1 = self.fc1(obs)
     # concat1 = layers.concat([hid1, act], axis=1)
     hid2 = self.fc2(hid1)
     V = self.value_fc(hid2)
     V = layers.squeeze(V, axes=[1])
     return V
Пример #3
0
 def value(self, obs):
     # 定义网络
     h1 = self.cc1(obs)
     h2 = self.cc2(h1)
     h3 = self.cc3(h2)
     h4 = layers.flatten(h3, axis=1)
     Q = self.fc1(h4)
     return Q
Пример #4
0
 def value(self, obs):
     #输入归一化
     obs = obs / 255.0
     out = self.conv1(obs)
     out = self.conv2(out)
     out = self.conv3(out)
     out = layers.flatten(out, axis=1)
     out = self.fc0(out)
     out = self.fc1(out)
     return out
Пример #5
0
    def policy(self, obs):
        # fc01 = self.fc01(obs)
        obs = layers.flatten(obs, axis=1)
        hid1 = self.fc1(obs)
        hid2 = self.fc2(hid1)
        means = self.mean_linear(hid2)
        log_std = self.log_std_linear(hid2)
        log_std = layers.clip(log_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX)

        return means, log_std
Пример #6
0
    def obs_ae(self, obs):
        obs = obs / 255.0
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        shape_conv3 = conv3.shape
        flatten = layers.flatten(conv3, axis=1)
        fc = self.fc(flatten)

        defc = self.decoder[0](fc)
        x = layers.reshape(defc, shape_conv3)
        for layer in self.decoder[1:]:
            x = layer(x)
        return x
Пример #7
0
    def value(self, hidden, act):
        # 输入 state, action, 输出对应的Q(s,a)

        ######################################################################
        ######################################################################
        #
        # 5. 请组装Q网络
        #
        flatten_obs = layers.flatten(hidden, axis=1)
        concat = layers.concat([flatten_obs, act], axis=1)
        hid = self.fc1(concat)
        Q = self.fc2(hid)
        Q2 = layers.squeeze(Q, axes=[1])
        return Q2
Пример #8
0
    def policy(self, obs):
        """
        Args:
            obs: A float32 tensor of shape [B, C, H, W]
        Returns:
            policy_logits: B * ACT_DIM
        """
        obs = obs / 255.0
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        policy_conv = self.policy_conv(conv3)
        policy_logits = layers.flatten(policy_conv, axis=1)
        return policy_logits
Пример #9
0
    def value(self, obs):
        """
        Args:
            obs: A float32 tensor of shape [B, C, H, W]
        Returns:
            value: B
        """
        obs = obs / 255.0
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        flatten = layers.flatten(conv3, axis=1)
        value = self.value_fc(flatten)
        value = layers.squeeze(value, axes=[1])
        return value
Пример #10
0
 def value(self, obs):
     #输入归一化
     obs = obs / 255.0
     out = self.conv1(obs)
     out = self.conv2(out)
     out = self.conv3(out)
     out = layers.flatten(out, axis=1)
     out = self.fc0(out)
     out = self.fc1(out)
     V = self.valueFc(out)
     advantage = self.advantageFc(out)
     #计算优势函数的均值,用于归一化
     advMean = fluid.layers.reduce_mean(advantage, dim=1, keep_dim=True)
     #状态行为值函数Q=V+A
     Q = advantage + (V - advMean)
     return Q
Пример #11
0
 def value(self, obs):
     obs = obs / 255.0
     out = self.conv1(obs)
     out = layers.pool2d(input=out,
                         pool_size=2,
                         pool_stride=2,
                         pool_type='max',
                         data_format="NHWC")
     out = self.conv2(out)
     out = layers.pool2d(input=out,
                         pool_size=2,
                         pool_stride=2,
                         pool_type='max',
                         data_format="NHWC")
     out = layers.flatten(out, axis=1)
     Q = self.fc1(out)
     return Q
Пример #12
0
    def policy(self, obs):
        """
        Args:
            obs: A float32 tensor of shape [B, C, H, W]

        Returns:
            policy_logits: B * ACT_DIM
        """
        obs = obs / 255.0
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        flatten = layers.flatten(conv3, axis=1)
        fc_output = self.fc(flatten)

        policy_logits = self.policy_fc(fc_output)
        return policy_logits
Пример #13
0
    def policy(self, obs):
        """
        Args:
            obs: 输入的图像,shape为[N, C, H, W]

        Returns:
            policy_logits: N * ACTION_DIM
        """
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)

        flatten = layers.flatten(conv4, axis=1)
        fc_output = self.fc(flatten)

        policy_logits = self.policy_fc(fc_output)
        return policy_logits
Пример #14
0
    def value(self, obs):
        """
        Args:
            obs: 输入的图像,shape为[N, C, H, W]

        Returns:
            values: N
        """
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)

        flatten = layers.flatten(conv4, axis=1)
        fc_output = self.fc(flatten)

        values = self.value_fc(fc_output)
        values = layers.squeeze(values, axes=[1])
        return values
Пример #15
0
    def value(self, obs):
        obs = obs / 255.0
        # print(len(obs.shape))
        # if len(obs.shape)>4:
        #     obs = layers.squeeze(input=obs,axes=[-1])
        # obs = layers.squeeze(input=obs)
        out = self.conv1(obs)
        out = layers.pool2d(
            input=out, pool_size=2, pool_stride=2, pool_type='max')
        out = self.conv2(out)
        out = layers.pool2d(
            input=out, pool_size=2, pool_stride=2, pool_type='max')
        out = self.conv3(out)
        out = layers.pool2d(
            input=out, pool_size=2, pool_stride=2, pool_type='max')
        out = self.conv4(out)
        out = layers.flatten(out, axis=1)

        Q = self.fc1(out)
        return Q
Пример #16
0
    def value(self, obs):
        out = self.conv1(obs)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv2(out)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv3(out)
        out = layers.pool2d(input=out,
                            pool_size=2,
                            pool_stride=2,
                            pool_type='max')
        out = self.conv4(out)
        out = layers.flatten(out, axis=1)

        Q = self.fc1(out)
        return Q
Пример #17
0
    def policy_and_value(self, obs):
        """
        Args:
            obs: A float32 tensor of shape [B, C, H, W]

        Returns:
            policy_logits: B * ACT_DIM
            values: B
        """
        obs = obs / 255.0
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        flatten = layers.flatten(conv3, axis=1)
        fc_output = self.fc(flatten)

        policy_logits = self.policy_fc(fc_output)

        values = self.value_fc(fc_output)
        values = layers.squeeze(values, axes=[1])

        return policy_logits, values
Пример #18
0
 def value(self, obs):
     #         obs = obs / 255.0
     print('value', obs.shape)
     out = self.conv1(obs)
     out = layers.pool2d(input=out,
                         pool_size=4,
                         pool_stride=2,
                         pool_type='max')
     #         out = self.conv2(out)
     #         out = layers.pool2d(
     #             input=out, pool_size=2, pool_stride=2, pool_type='max')
     #         out = self.conv3(out)
     #         out = layers.pool2d(
     #             input=out, pool_size=2, pool_stride=2, pool_type='max')
     #         out = self.conv4(out)
     out = layers.flatten(out, axis=1)
     start = self.fc1(out)
     start = paddle.fluid.layers.softmax(start)
     end = self.fc2(out)
     end = paddle.fluid.layers.softmax(end)
     num = self.fc3(out)
     vec = layers.concat([start, end, num], axis=1)
     return vec
Пример #19
0
    def policy_and_value(self, obs):
        """
        Args:
            obs: 输入的图像,shape为[N, C, H, W]

        Returns:
            policy_logits: N * ACTION_DIM
            values: N
        """
        conv1 = self.conv1(obs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)

        flatten = layers.flatten(conv4, axis=1)
        fc_output = self.fc(flatten)

        policy_logits = self.policy_fc(fc_output)

        values = self.value_fc(fc_output)
        values = layers.squeeze(values, axes=[1])

        return policy_logits, values