Пример #1
0
    def forward_grad(self, x):
        #grad_x = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad)  # , groups=self.channels)
        #grad_y = F.conv2d(F.pad(x, (0, 0, 0, 1)), self.f_grad2)  # , groups=self.channels)
        x1 = fluid.layers.pad2d(x, paddings=[0, 0, 0, 1])
        #pdb.set_trace()

        grad_x = self.conv2df_grad(x1)
        #grad_x[:, :, :, -1] = 0

        temp = unstack(grad_x, axis=3)

        temp[-1] = temp[-1] * 0
        grad_x = stack(temp, axis=3)

        x2 = fluid.layers.pad2d(x, paddings=[0, 1, 0, 0])
        grad_y = self.conv2df_grad2(x2)  # , groups=self.channels)

        #grad_y[:, :, -1, :] = 0
        temp = unstack(grad_y, axis=2)
        temp[-1] = temp[-1] * 0
        grad_y = stack(temp, axis=2)

        bt, c, h, w = grad_x.shape

        grad_x = fluid.layers.reshape(grad_x, [-1, c, h, w])
        grad_y = fluid.layers.reshape(grad_y, [-1, c, h, w])
        return grad_x, grad_y
    def forward_grad(self, x):
        grad_x = self.conv4u(layers.pad(x, (0, 0, 0, 0, 0, 0, 0, 1)))
        tmp = layers.unstack(grad_x, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  #tmp[-1]=0

        grad_x = layers.stack(tmp, axis=2)

        grad_y = self.conv4v(layers.pad(x, (0, 0, 0, 0, 0, 1, 0, 0)))

        tmp = layers.unstack(grad_y, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  # tmp[-1]=0
        grad_y = layers.stack(tmp, axis=2)
        return grad_x, grad_y
Пример #3
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]): 
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]): 
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = L.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) == 1:
                start_pos = L.unsqueeze(start_pos, axes=[-1])
            if len(end_pos.shape) == 1:
                end_pos = L.unsqueeze(end_pos, axes=[-1])
            start_loss = L.softmax_with_cross_entropy(start_logit, start_pos)
            end_loss = L.softmax_with_cross_entropy(end_logits, end_pos)
            loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
Пример #4
0
    def forward(self, x):
        """Forward network"""
        mask = layers.reduce_any(x != self.pad_index, -1)
        lens = nn.reduce_sum(mask, -1)
        masked_x = nn.masked_select(x, mask)
        char_mask = masked_x != self.pad_index
        emb = self.embed(masked_x)

        _, (h, _) = self.lstm(emb, char_mask, self.pad_index)
        h = layers.concat(layers.unstack(h), axis=-1)
        feat_embed = nn.pad_sequence_paddle(
            layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index)
        return feat_embed
    def forward(self, x):
        '''
        bt,c,w,h=x.shape
        tmp=layers.reshape(x,shape=[48,-1,c,w,h])
        res=layers.reshape(tmp[:,:-1],shape=[-1,c,w,h])'''
        x = self.bottleneck(x)
        inp = self.norm_img(x)
        bt, c, w, h = inp.shape
        inp = layers.reshape(inp, shape=[self.batch_size, -1, c, w, h])

        x = inp[:, :-1]
        y = inp[:, 1:]

        x = layers.reshape(layers.transpose(x, perm=[0, 2, 1, 3, 4]),
                           shape=[-1, c, h, w])
        y = layers.reshape(layers.transpose(y, perm=[0, 2, 1, 3, 4]),
                           shape=[-1, c, h, w])
        u1 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        u2 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')

        l_t = self.lamda * self.theta
        taut = self.tau / (self.theta + 1e-12)

        grad2_x = self.conv4Ix(layers.pad(y, (0, 0, 0, 0, 0, 0, 1, 1)))

        tmp = layers.unstack(grad2_x, axis=3)
        tmp[-1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2])
        tmp[0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0])
        grad2_x = layers.stack(tmp, axis=3)

        grad2_y = self.conv4Iy(layers.pad(y, (0, 0, 0, 0, 1, 1, 0, 0)))
        tmp = layers.unstack(grad2_y, axis=2)
        tmp[-1] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :])
        tmp[0] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :])
        grad2_y = layers.stack(tmp, axis=2)

        p11 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p12 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p21 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p22 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')

        gsqx = grad2_x**2
        gsqy = grad2_y**2

        grad = gsqx + gsqy + 1e-12

        rho_c = y - grad2_x * u1 - grad2_y * u2 - x
        for i in range(self.n_iter):
            rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12

            mask1 = (rho < -l_t * grad).detach().astype('float32')
            mask1.stop_gradient = True
            tmp1 = l_t * grad2_x
            tmp2 = l_t * grad2_y
            v1 = tmp1 * mask1
            v2 = tmp2 * mask1

            mask2 = (rho > l_t * grad).detach().astype('float32')
            mask2.stop_gradient = True
            v1 = -tmp1 * mask2 + v1
            v2 = -tmp2 * mask2 + v2

            mask3 = fluid.layers.ones(
                x.shape, dtype='float32') - (mask1 + mask2 - mask1 * mask2)
            mask3.stop_gradient = True
            tmp1 = (-rho / grad) * grad2_x
            tmp2 = (-rho / grad) * grad2_y

            v1 = tmp1 * mask3 + v1
            v2 = tmp2 * mask3 + v2

            del rho
            del mask1
            del mask2
            del mask3

            v1 += u1
            v2 += u2

            u1 = v1 + self.theta * self.divergence(p11, p12)
            u2 = v2 + self.theta * self.divergence(p21, p22)

            del v1
            del v2
            u1 = u1
            u2 = u2

            u1x, u1y = self.forward_grad(u1)
            u2x, u2y = self.forward_grad(u2)

            p11 = (p11 + taut * u1x) / (
                1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p12 = (p12 + taut * u1y) / (
                1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p21 = (p21 + taut * u2x) / (
                1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            p22 = (p22 + taut * u2y) / (
                1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            del u1x
            del u1y
            del u2x
            del u2y

        flow = layers.concat([u1, u2], axis=1)

        #  flow = layers.transpose(layers.reshape(flow,shape=[b,t,c*2,h,w]),perm=[0,2,1,3,4])
        flow = self.unbottleneck(flow)
        flow = self.bn(flow) if self.bn else flow
        return flow
Пример #6
0
    def forward(self, x):  #bcthw
        residual = x[:, :, :-1]
        #print(x.shape)
        x = self.bottleneck(x)

        #bbb = self.bottleneck.weight[0]
        #print(bbb)
        inp = self.norm_img(x)

        x = inp[:, :, :-1]
        y = inp[:, :, 1:]
        b, c, t, h, w = x.shape

        #x = x.permute(0, 2, 1, 3, 4).contiguous().view(b * t, c, h, w)
        #y = y.permute(0, 2, 1, 3, 4).contiguous().view(b * t, c, h, w)
        x = fluid.layers.transpose(x, perm=[0, 2, 1, 3, 4])
        y = fluid.layers.transpose(y, perm=[0, 2, 1, 3, 4])
        x = fluid.layers.reshape(x, [-1, c, h, w])
        y = fluid.layers.reshape(y, [-1, c, h, w])
        #paddle.tensor.zeros_like(input, dtype=None, device=None, stop_gradient=True, name=None)
        u1 = fluid.layers.zeros_like(x)

        u2 = fluid.layers.zeros_like(x)
        l_t = self.l * self.t
        taut = self.a / self.t

        #grad2_x = F.conv2d(F.pad(y, (1, 1, 0, 0)), self.img_grad, padding=0, stride=1)  # , groups=self.channels)
        #grad2_x = fluid.layers.conv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1]), self.img_grad)

        #fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1])
        #grad2_x = fconv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 0, 0, 1, 1]), self.img_grad,0,1)
        grad2_x = self.conv2dimg_grad(
            y
        )  #梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化

        #grad2_x[:, :, :, 0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0])
        #grad2_x[:, :, :, -1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2])
        temp1 = unstack(x, axis=3)
        temp2 = unstack(x, axis=3)
        temp = unstack(grad2_x, axis=3)
        temp[0] = 0.5 * (temp1[1] - temp2[0])
        temp[-1] = 0.5 * (temp1[-1] - temp2[-2])
        grad2_x = stack(temp, axis=3)
        #查看权重有无变化

        # print(self.conv2dimg_grad2[0])
        # print(self.conv2df_grad[0])
        # print(self.conv2df_grad2[0])
        #grad2_y = F.conv2d(F.pad(y, (0, 0, 1, 1)), self.img_grad2, padding=0, stride=1)  # , groups=self.channels)
        #grad2_y = fconv2d(fluid.layers.pad(y, paddings=[0, 0, 0, 0, 1, 1, 0, 0]), self.img_grad2)
        grad2_y = self.conv2dimg_grad2(
            y)  #梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化#梯度无变化
        #查看权重有无变化

        #grad2_y[:, :, 0, :] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :])
        #grad2_y[:, :, -1, :] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :])
        temp1 = unstack(x, axis=2)
        temp2 = unstack(x, axis=2)
        temp = unstack(grad2_x, axis=2)
        temp[0] = 0.5 * (temp1[1] - temp2[0])
        temp[-1] = 0.5 * (temp1[-1] - temp2[-2])
        grad2_x = stack(temp, axis=2)

        #p11 = paddle.tensor.zeros_like(x.data)
        #p12 = paddle.tensor.zeros_like(x.data)
        #p21 = paddle.tensor.zeros_like(x.data)
        #p22 = paddle.tensor.zeros_like(x.data)
        p11 = fluid.layers.zeros_like(x)
        p12 = fluid.layers.zeros_like(x)
        p21 = fluid.layers.zeros_like(x)
        p22 = fluid.layers.zeros_like(x)

        gsqx = grad2_x**2
        gsqy = grad2_y**2
        grad = gsqx + gsqy + 1e-12

        rho_c = y - grad2_x * u1 - grad2_y * u2 - x

        for i in range(self.n_iter):
            #pdb.set_trace()
            rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12

            v1 = fluid.layers.zeros_like(x)
            v2 = fluid.layers.zeros_like(x)
            #mask3 = ((mask1 ^ 1) & (mask2 ^ 1) & (grad > 1e-12)).detach()
            #v1[mask3] = ((-rho / grad) * grad2_x)[mask3]
            #v2[mask3] = ((-rho / grad) * grad2_y)[mask3]
            mask3 = paddle.fluid.layers.where(
                (grad > 1e-12)
            )  #((((rho >= -l_t * grad))&((rho <= l_t * grad))&(grad > 1e-12)))
            if mask3.shape[0]:
                v13 = paddle.fluid.layers.gather_nd(v1, mask3)
                v23 = paddle.fluid.layers.gather_nd(v2, mask3)
                v13set = paddle.fluid.layers.gather_nd(
                    ((-rho / grad) * grad2_x), mask3)
                v23set = paddle.fluid.layers.gather_nd(
                    ((-rho / grad) * grad2_y), mask3)
                v13.set_value(v13set)
                v23.set_value(v23set)
                v1 = paddle.fluid.layers.scatter_nd(mask3, v13, v1.shape)
                v2 = paddle.fluid.layers.scatter_nd(mask3, v23, v2.shape)

            #运用gather_nd 和scatter_nd
            #mask1 = (rho < -l_t * grad).detach()
            #v1[mask1] = (l_t * grad2_x)[mask1]
            #v2[mask1] = (l_t * grad2_y)[mask1]

            mask1 = paddle.fluid.layers.where(rho < -l_t * grad)
            if mask1.shape[0]:

                v11 = paddle.fluid.layers.gather_nd(v1, mask1)
                v21 = paddle.fluid.layers.gather_nd(v2, mask1)
                v11set = paddle.fluid.layers.gather_nd((l_t * grad2_x), mask1)
                v21set = paddle.fluid.layers.gather_nd((l_t * grad2_y), mask1)
                v11.set_value(v11set)
                v1 = paddle.fluid.layers.scatter_nd(mask1, v11, v1.shape)
                v21.set_value(v21set)
                v2 = paddle.fluid.layers.scatter_nd(mask1, v21, v2.shape)

            #mask2 = (rho > l_t * grad).detach()
            #v1[mask2] = (-l_t * grad2_x)[mask2]
            #v2[mask2] = (-l_t * grad2_y)[mask2]
            mask2 = paddle.fluid.layers.where(rho > l_t * grad)
            if mask2.shape[0]:

                v12 = paddle.fluid.layers.gather_nd(v1, mask2)
                v22 = paddle.fluid.layers.gather_nd(v2, mask2)
                v12set = paddle.fluid.layers.gather_nd((-l_t * grad2_x), mask2)
                v22set = paddle.fluid.layers.gather_nd((-l_t * grad2_y), mask2)
                v12.set_value(v12set)
                v22.set_value(v22set)
                v1 = paddle.fluid.layers.scatter_nd(mask2, v12, v1.shape)
                v2 = paddle.fluid.layers.scatter_nd(mask2, v22, v2.shape)

                #bbb = self.conv2ddiv.weight[0]
                #print(bbb)
            del rho
            del mask1
            del mask2
            del mask3
            """
            del v11
            del v21
            del v11set
            del v21set 
            

            del v12
            del v22
            del v12set
            del v22set

            del v13
            del v23
            del v13set
            del v23set
            """

            v1 += u1
            v2 += u2

            u1 = v1 + self.t * self.divergence(p11, p12)  #梯度回传没问题 看看前面有没问题ccf
            u2 = v2 + self.t * self.divergence(p21, p22)
            del v1
            del v2
            u1 = u1
            u2 = u2

            u1x, u1y = self.forward_grad(u1)
            u2x, u2y = self.forward_grad(u2)

            p11 = (p11 + taut * u1x) / (
                1. + taut * fluid.layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p12 = (p12 + taut * u1y) / (
                1. + taut * fluid.layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p21 = (p21 + taut * u2x) / (
                1. + taut * fluid.layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            p22 = (p22 + taut * u2y) / (
                1. + taut * fluid.layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            del u1x
            del u1y
            del u2x
            del u2y

        #flow = torch.cat([u1, u2], dim=1)
        flow = fluid.layers.concat(input=[u1, u2], axis=1)
        #print(u1.shape)
        #print(u2.shape)
        #print(flow.shape)
        #flow = flow.view(b, t, c * 2, h, w).contiguous().permute(0, 2, 1, 3, 4)
        #print(x.shape)
        flow = fluid.layers.reshape(flow, [b, t, c * 2, h, w])
        flow = fluid.layers.transpose(flow, perm=[0, 2, 1, 3, 4])
        flow = self.unbottleneck(flow)
        #print(self.name_scope)

        flow = self.bn(flow)
        #print(residual.shape,'xxxx',flow.shape)
        #bbb = residual+flow
        #print(bbb.shape)
        return self.prelu(residual)  #+flow