示例#1
0
文件: builder.py 项目: wilxy/mgd
    def sp(self, i, t_feats, s_feats, margins, pooling_kernel='max'):
        """
        Simple Pooling for channels reduction, including max pooling and avg pooling.
        """
        b, sc, h, w = s_feats[i].shape
        _, tc, _, _ = t_feats[i].shape

        groups = tc // sc

        t = []
        m = []

        for c in range(0, tc, sc):
            if c == (tc // sc) * sc and len(self.ignore_inds) > 0:
                continue
            if c == (tc // sc) * sc and self.shave:
                continue

            t.append(t_feats[i][:,
                                self.guided_inds[i][c:c + sc].detach(), :, :])
            m.append(margins[:, self.guided_inds[i][c:c + sc].detach(), :, :])

        t = torch.stack(t, dim=2)
        m = torch.stack(m, dim=2)

        # pooling_kernel: max F.adaptive_max_pool3d | avg F.adaptive_avg_pool3d
        t = F.adaptive_max_pool3d(t, (1, h, w)).squeeze(2)
        m = F.adaptive_max_pool3d(m, (1, 1, 1)).squeeze(2)

        return t, m
示例#2
0
 def forward(self, x):
     if self.method == 'avg':
         return F.adaptive_avg_pool3d(x, self.output_size)
     elif self.method == 'avg':
         return F.adaptive_max_pool3d(x, self.output_size)
     else:
         avg_pooled = F.adaptive_avg_pool3d(x, self.output_size)
         max_pooled = F.adaptive_max_pool3d(x, self.output_size)
         return avg_pooled + max_pooled
示例#3
0
    def forward_multiframe(self, x, pool=True):
        (B, T, C, H, W) = x.size()
        x = x.contiguous()
        x = x.view(B * T, C, H, W)
        x = self.feature_extraction(x)

        (_, C, H, W) = x.size()
        x = x.view(B, T, C, H, W)
        x = x.permute(0, 2, 1, 3, 4)

        if not pool:
            return x

        if self.pool_type == 'avgpool':
            x = F.adaptive_avg_pool3d(x, 1)
        elif self.pool_type == 'maxpool':
            x = F.adaptive_max_pool3d(x, 1)

        if self.with_fc:
            x = x.view(x.size(0), -1)
            x = self.fc(x)
            return x.view(x.size(0), -1, 1, 1)
        else:
            return x.view(x.size(0), -1, 1, 1)
        return x
示例#4
0
    def forward(self,x):
        identity = x
        if self.inter_channel is not None:
            x = self.relu(self.bn1(self.conv_down(x)))
        gran_tensor_list = []
        for i in range(self.granularity):
            gran_tensor = x[:, i*(self.in_gran_channel):(i+1)*(self.in_gran_channel),...]
            B,C,T,H,W = gran_tensor.shape
            gran_tensor = F.adaptive_max_pool3d(gran_tensor,(T,H//(2**i),W//(2**i)))
            if self.order == 'hwt':
                gran_tensor,h_vis = self.axial_gran[i*3+0](gran_tensor,True)
                gran_tensor,w_vis = self.axial_gran[i*3+1](gran_tensor,True)
                gran_tensor,t_vis = self.axial_gran[i*3+2](gran_tensor,True)
            elif self.order == 'wht':
                gran_tensor = self.axial_gran[i*3+1](gran_tensor)
                gran_tensor = self.axial_gran[i*3+0](gran_tensor)
                gran_tensor = self.axial_gran[i*3+2](gran_tensor)
            elif self.order == 'wth':
                gran_tensor = self.axial_gran[i*3+1](gran_tensor)
                gran_tensor = self.axial_gran[i*3+2](gran_tensor)
                gran_tensor = self.axial_gran[i*3+0](gran_tensor)
            elif self.order == 'twh':
                gran_tensor = self.axial_gran[i*3+2](gran_tensor)
                gran_tensor = self.axial_gran[i*3+1](gran_tensor)
                gran_tensor = self.axial_gran[i*3+0](gran_tensor)
            else:
                raise NotImplementedError
            gran_tensor = F.interpolate(gran_tensor,size=(T,H,W))
            gran_tensor_list.append(gran_tensor)
        x = torch.cat(gran_tensor_list,dim=1)
        x = self.bn2(self.conv_up(x))

        out = identity+x
        return out
示例#5
0
    def forward(self, x):
        b, c, h, w = x.size()
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == 'avg':
                y = x.unsqueeze(0)
                avg_pool = F.adaptive_avg_pool3d(y, (self.groups, 1, 1))
                avg_pool = avg_pool.squeeze(0)
                channel_att_raw = self.mlp(avg_pool)
            elif pool_type == 'max':
                y = x.unsqueeze(0)
                max_pool = F.adaptive_max_pool3d(y, (self.groups, 1, 1))
                max_pool = max_pool.squeeze(0)
                channel_att_raw = self.mlp(max_pool)
            elif pool_type == 'lp':
                lp_pool = F.lp_pool2d(x,
                                      2, (x.size(2), x.size(3)),
                                      stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp(lp_pool)
            elif pool_type == 'lse':
                # LSE pool only
                lse_pool = logsumexp_2d(x)
                channel_att_raw = self.mlp(lse_pool)

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw
        # channel_att_sum=channel_att_sum.unsqueeze(1)
        # channel_att_sum=F.upsample(channel_att_sum,c)
        # channel_att_sum=channel_att_sum.squeeze(1)
        scale = F.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(
            x)
        return x * scale
示例#6
0
    def forward_multiframe_feat_emb(self, x, pool=True):
        (B, C, T, H, W) = x.size()
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = x.view(B * T, C, H, W)

        input_shape = x.shape[-2:]
        features = self.backbone(x)
        out = self.classifier(features, pool=False)

        (_, C, H, W) = out.size()
        out = out.view(B, T, C, H, W)
        out = out.permute(0, 2, 1, 3, 4)

        if not pool:
            return out
            #_, C = out.size()[0:2]
            #out = out.view(B, C)
        else:
            #if self.pool_type == 'avgpool':
            #    output_feature = F.adaptive_avg_pool2d(output_feature, 1)
            #elif self.pool_type == 'maxpool':
            #    output_feature = F.adaptive_max_pool2d(output_feature, 1)

            output_pool = F.adaptive_max_pool3d(out, 1)
            _, C = output_pool.size()[0:2]
            output_pool = output_pool.view(B, C)
        #x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
        return out, output_pool
示例#7
0
    def forward(self, x):
        _, c, h, w = x.size()
        # c2,h2,w2=int(c/4),int(h/4),int(w/4)
        c2, h2, w2 = self.group, int(h / 4), int(w / 4)
        # y=self.conv1(x)
        y = x.clone()
        y = y.unsqueeze(0)
        y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2))
        y1 = y1.squeeze(0)
        y1 = self.conv2_1(y1)

        # y=y.unsqueeze(0)
        y2 = F.adaptive_max_pool3d(y, (c2, h2, w2))
        y2 = y2.squeeze(0)
        y2 = self.conv2_2(y2)

        # y1=y1.unsqueeze(1)
        # y2=y2.unsqueeze(1)
        # y3=torch.cat((y1,y2),dim=1)
        # y3=self.conv(y3)
        # y3=y3.squeeze(1)
        y3 = y1 + y2

        y3 = y3.unsqueeze(0)
        y3 = F.upsample(y3, size=(c, h, w))
        y3 = y3.squeeze(0)
        # y=self.conv3(y)
        y3 = F.sigmoid(y3)
        # print(y3.size())

        return y3 * x
    def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor:
        _, _, feature_map_t, feature_map_height, feature_map_width = features.shape
        scale = 1 / 16
        output_size = (feature_map_t, 7, 7)

        if mode == Pooler.Mode.POOLING:
            pool = []
            for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices):
                start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1),
                              0)  # [0, feature_map_width)
                start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1),
                              0)  # (0, feature_map_height]
                end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width),
                            1)  # [0, feature_map_width)
                end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height),
                            1)  # (0, feature_map_height]

                roi_feature_map = features[proposal_batch_index, :, :, start_y:end_y, start_x:end_x]
                pool.append(F.adaptive_max_pool3d(input=roi_feature_map, output_size=output_size))
            pool = torch.stack(pool, dim=0)
        else:
            raise ValueError

        #pool = F.max_pool3d(input=pool, kernel_size=(1, 2, 2), stride=(1, 2, 2))
        return pool
示例#9
0
    def forward(self, f, inputs, proposals):
        self.DEPTH, self.HEIGHT, self.WIDTH = inputs.shape[2:]

        crops = []
        for p in proposals:
            b = int(p[0])
            center = p[2:5]
            side_length = p[5:8]
            c0 = center - side_length / 2  # left bottom corner
            c1 = c0 + side_length  # right upper corner
            c0 = (c0 / self.scale).floor().long()
            c1 = (c1 / self.scale).ceil().long()
            minimum = torch.LongTensor([[0, 0, 0]]).cuda()
            maximum = torch.LongTensor(
                np.array([[self.DEPTH, self.HEIGHT, self.WIDTH]]) /
                self.scale).cuda()

            c0 = torch.cat((c0.unsqueeze(0), minimum), 0)
            c1 = torch.cat((c1.unsqueeze(0), maximum), 0)
            c0, _ = torch.max(c0, 0)
            c1, _ = torch.min(c1, 0)

            # Slice 0 dim, should never happen
            if np.any((c1 - c0).cpu().data.numpy() < 1):
                print(p)
                print('c0:', c0, ', c1:', c1)
            crop = f[b, :, c0[0]:c1[0], c0[1]:c1[1], c0[2]:c1[2]]
            crop = F.adaptive_max_pool3d(crop, self.rcnn_crop_size)
            crops.append(crop)

        crops = torch.stack(crops)

        return crops
示例#10
0
    def forward(self, x):
        _, c, h, w = x.size()
        c2, h2, w2 = int(c / 4), int(h / 4), int(w / 4)
        y = x
        y = y.unsqueeze(0)
        y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2))
        y1 = y1.squeeze(0)
        y2 = F.adaptive_max_pool3d(y, (c2, h2, w2))
        y2 = y2.squeeze(0)
        y3 = torch.cat((y1, y2), 1)
        print(y3.size())

        y3 = self.conv2(y3)
        y3 = y3.unsqueeze(0)
        y3 = F.upsample(y3, size=(c, h, w))
        y3 = y3.squeeze(0)
        # y=self.conv3(y)
        y3 = F.sigmoid(y3)

        # y2=F.adaptive_max_pool3d(y,(c2,h2,w2))
        # y2=y2.squeeze(0)
        # y2=self.conv2(y2)
        # y2=y2.unsqueeze(0)
        # y2=F.upsample(y2,size=(c,h,w))
        # y2=y2.squeeze(0)
        # # y=self.conv3(y)
        # y2=F.sigmoid(y2)

        return y3 * x
 def forward(self, x):
     out = F.relu(self.conv1(x))
     out = F.relu(self.conv2(out))
     out = F.relu(self.conv3(out))
     out = F.adaptive_max_pool3d(out, (1, 1, 1))
     out = out.view(out.shape[0], -1)
     out = self.fc(out)
     return out
示例#12
0
 def forward(self, x):
     batch = x.shape[0]
     h = self.encoder(x)
     h = F.adaptive_max_pool3d(h, (None, 1, 1))
     h = h.view((batch, -1))
     residual = self.ln(h)
     # residual = self.skip_connect(h)
     return h * residual
示例#13
0
    def forward(self, x):

        # Split volume
        x_short, x_long = x

        x_short_id = x_short

        x_short_id = x_short
        x_long_id = x_long

        # Short
        x_short = self.conv_short(x_short)

        # Long
        x_long = self.conv_long(x_long)

        if not self.no_lateral:

            _, cs, t_short, h_short, w_short = x_short.size()
            _, cl, t_long, h_long, w_long = x_long.size()

            if self.pool == 'soft':
                x_short2long = F.adaptive_max_pool3d(
                    x_short, (x_short.size()[-3], x_short.size()[-2],
                              x_short.size()[-1]))
                '''
                if (x_short.size()[-2]%2 != 0 or x_short.size()[-1]%2 != 0):
                    padding = (1,0,1,0,0,0) # pad last dim by (0, 1) and 2nd to last by (0, 1)
                    x_short2long = F.pad(x_short, padding, 'replicate')
                    x_short2long = soft_pool3d(x_short2long,kernel_size=(1,2,2),stride=(1,2,2))
                else:
                    x_short2long = soft_pool3d(x_short,kernel_size=(1,2,2),stride=(1,2,2))
                '''
            else:
                x_short2long = F.avg_pool3d(x_short,
                                            kernel_size=(1, 2, 2),
                                            stride=(1, 2, 2))

            if (x_short2long.shape[2] > 2):
                x_short2long = temporal_cossim_pool(x_short2long)

            if (list(x_short2long.size())[2:] != list(x_long[0].size())[2:]):
                t, h, w = list(x_long.size())[2:]
                x_short2long = F.interpolate(x_short2long,
                                             size=(t, h, w),
                                             mode='trilinear')

            x_short2long = self.conv_short2long(x_short2long)

        x_long = self.norm_long(x_long)
        x_short = self.norm_short(x_short)

        if not self.no_lateral:
            x_short2long = self.norm_long(x_short2long)

            x_long = torch.add(x_long, x_short2long)

        return (x_short, x_long)
示例#14
0
 def pool(self, inputs: Tensor, target_shape: List[int]) -> Tensor:
     if len(target_shape) == 2:
         return F.adaptive_max_pool2d(inputs, target_shape)
     elif len(target_shape) == 3:
         return F.adaptive_max_pool3d(inputs, target_shape)
     elif len(target_shape) == 1:
         return F.adaptive_max_pool1d(inputs, target_shape)
     else:
         raise RuntimeError(f"Invalid target_shape: {target_shape}")
示例#15
0
    def forward(self, x):
        features = self.features(x)
        out = F.relu(features, inplace=True)

        if self.ddata_pool=='avg':
            out = F.adaptive_avg_pool3d(out, (1, 1, 1))
        elif self.ddata_pool=='max':
            out = F.adaptive_max_pool3d(out, (1, 1, 1))

        out = torch.flatten(out, 1)
        return out
示例#16
0
    def forward(self, x):

        # pdb.set_trace()
        dx = self.dconv1(x)
        hx = self.hconv1(x)
        wx = self.wconv1(x)

        if self.pooling == 'max':
            dx = F.adaptive_max_pool3d(dx, (x.shape[2], 1, 1))
            hx = F.adaptive_max_pool3d(hx, (1, x.shape[3], 1))
            wx = F.adaptive_max_pool3d(wx, (1, 1, x.shape[4]))
        else:
            dx = F.adaptive_avg_pool3d(dx, (x.shape[2], 1, 1))
            hx = F.adaptive_avg_pool3d(hx, (1, x.shape[3], 1))
            wx = F.adaptive_avg_pool3d(wx, (1, 1, x.shape[4]))

        # dx = self.dpooling(dx)
        # hx = self.hpooling(hx)
        # wx = self.wpooling(wx)

        # dx = self.sig(self.dconv2(dx))
        # hx = self.sig(self.hconv2(hx))
        # wx = self.sig(self.wconv2(wx))

        if self.middle_norm == 'sig':
            dx = self.sig(self.dconv2(dx))
            hx = self.sig(self.hconv2(hx))
            wx = self.sig(self.wconv2(wx))
        else:
            dx = F.softmax(self.dconv2(dx), 1)
            hx = F.softmax(self.hconv2(hx), 1)
            wx = F.softmax(self.wconv2(wx), 1)

        attx = dx * hx * wx

        attx = self.sig(self.fuse(attx))
        x = x * attx
        return x
示例#17
0
 def forward(self, X):
     b, n, din = X.size()
     d = self.boost_factor
     m = n/d
     assert(m*d==n)
     Xr = X.view(b,d,1,m,din).expand(b,d,m,m,din)
     Xrc= torch.cat((Xr,Xr.transpose(2,3)),dim=-1) #bxdxmxmx6
     G = self.L.forward(Xrc) #bxdxmxmxK
     if self.sym_pool_max: #average each point, then max across all points
         Pr= Functional.adaptive_avg_pool3d(G, (m,1,self.dims[-1])).squeeze(-2) #bxdxmxK
         P = Functional.adaptive_max_pool2d(Pr,(1,self.dims[-1])).squeeze(-2) #bxdxK
     else: #max each point, then average over all points
         Pr= Functional.adaptive_max_pool3d(G, (m,1,self.dims[-1])).squeeze(-2) #bxdxmxK
         P = Functional.adaptive_avg_pool2d(Pr,(1,self.dims[-1])).squeeze(-2) #bxdxK
     Y = self.F.forward(P)   #bxdxC
     Y = self.BoostPool.forward(Y).squeeze(-2) #bxC
     return Y
示例#18
0
    def forward_multiframe(self, x, pool=True):
        (B, C, T, H, W) = x.size()
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = x.view(B * T, C, H, W)

        x = self.features(x)

        (_, C, H, W) = x.size()
        x = x.view(B, T, C, H, W)
        x = x.permute(0, 2, 1, 3, 4)

        if not pool:
            return x

        if self.pool_type == 'avgpool':
            x = F.adaptive_avg_pool3d(x, 1)
        elif self.pool_type == 'maxpool':
            x = F.adaptive_max_pool3d(x, 1)

        x = x.view(B, C)
        return x
示例#19
0
    def forward_multiframe_feat_emb(self, x, pool=True):
        (B, C, T, H, W) = x.size()
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = x.view(B * T, C, H, W)

        x = self.features(x)
        x = self.fc(x)
        (_, C, H, W) = x.size()
        x = x.view(B, T, C, H, W)
        x = x.permute(0, 2, 1, 3, 4)

        if not pool:
            return x

        # for evaluation (sound source separation)
        if self.pool_type == 'avgpool':
            img = F.adaptive_avg_pool3d(x, 1)
        elif self.pool_type == 'maxpool':
            img = F.adaptive_max_pool3d(x, 1)
        img = img.view(B, C)

        return x, img
示例#20
0
    def forward(self, f, inputs, proposals):
        self.DEPTH, self.HEIGHT, self.WIDTH = inputs.shape[2:]

        crops = []
        for p in proposals:
            b = int(p[0])
            center = p[2:5]
            side_length = p[5:8]

            # left bottom corner
            c0 = center - side_length / 2
            # right upper corner
            c1 = c0 + side_length

            # corresponding point on the downsampled feature map
            c0 = (c0 / self.scale).floor().long()
            c1 = (c1 / self.scale).ceil().long()
            minimum = torch.LongTensor([[0, 0, 0]]).cuda()
            maximum = torch.LongTensor(
                np.array([[self.DEPTH, self.HEIGHT, self.WIDTH]]) /
                self.scale).cuda()

            # clip the boxes, to make sure (0, 0, 0) <= (z0, y0, x0) and (z1, y1, x1) < (D, H, W)
            c0 = torch.cat((c0.unsqueeze(0), minimum), 0)
            c1 = torch.cat((c1.unsqueeze(0), maximum), 0)
            c0, _ = torch.max(c0, 0)
            c1, _ = torch.min(c1, 0)

            # This should never happen
            if np.any((c1 - c0).cpu().data.numpy() < 1):
                print(p)
                print('c0:', c0, ', c1:', c1)
            crop = f[b, :, c0[0]:c1[0], c0[1]:c1[1], c0[2]:c1[2]]
            crop = F.adaptive_max_pool3d(crop, self.rcnn_crop_size)
            crops.append(crop)

        crops = torch.stack(crops)

        return crops
示例#21
0
    def forward(self, x):
        _, c, h, w = x.size()
        c2, h2, w2 = int(c / 4), int(h / 4), int(w / 4)
        y = x
        y = y.unsqueeze(0)
        y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2))
        y1 = y1.squeeze(0)
        y1 = self.conv2(y1)
        y1 = y1.unsqueeze(0)
        y1 = F.upsample(y1, size=(c, h, w))
        y1 = y1.squeeze(0)
        # y=self.conv3(y)
        y1 = F.sigmoid(y1)

        y2 = F.adaptive_max_pool3d(y, (c2, h2, w2))
        y2 = y2.squeeze(0)
        y2 = self.conv2(y2)
        y2 = y2.unsqueeze(0)
        y2 = F.upsample(y2, size=(c, h, w))
        y2 = y2.squeeze(0)
        # y=self.conv3(y)
        y2 = F.sigmoid(y2)

        return (y1 + y2) * x
示例#22
0
 def test_adaptive_max_pool3d(self):
     inp = torch.randn(1, 16, 16, 32, 32, device='cuda', dtype=self.dtype)
     out = F.adaptive_max_pool3d(inp, output_size=5, return_indices=True)
示例#23
0
def conv_soft_argmax3d(input: torch.Tensor,
                       kernel_size: Tuple[int, int, int] = (3, 3, 3),
                       stride: Tuple[int, int, int] = (1, 1, 1),
                       padding: Tuple[int, int, int] = (1, 1, 1),
                       temperature: Union[torch.Tensor, float] = torch.tensor(1.0),
                       normalized_coordinates: bool = False,
                       eps: float = 1e-8,
                       output_value: bool = True,
                       strict_maxima_bonus: float = 0.0) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
    r"""Function that computes the convolutional spatial Soft-Argmax 3D over the windows
    of a given input heatmap. Function has two outputs: argmax coordinates and the softmaxpooled heatmap values
    themselves.
    On each window, the function computed is:

    .. math::
             ijk(X) = \frac{\sum{(i,j,k)} * exp(x / T)  \in X} {\sum{exp(x / T)  \in X}}

    .. math::
             val(X) = \frac{\sum{x * exp(x / T)  \in X}} {\sum{exp(x / T)  \in X}}

    where T is temperature.

    Args:
        kernel_size (Tuple[int,int,int]):  size of the window
        stride (Tuple[int,int,int]): stride of the window.
        padding (Tuple[int,int,int]): input zero padding
        temperature (torch.Tensor): factor to apply to input. Default is 1.
        normalized_coordinates (bool): whether to return the coordinates normalized in the range of [-1, 1]. Otherwise,
                                       it will return the coordinates in the range of the input shape. Default is False.
        eps (float): small value to avoid zero division. Default is 1e-8.
        output_value (bool): if True, val is outputed, if False, only ij
        strict_maxima_bonus (float): pixels, which are strict maxima will score (1 + strict_maxima_bonus) * value.
                                     This is needed for mimic behavior of strict NMS in classic local features
    Shape:
        - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
        - Output: :math:`(N, C, 3, D_{out}, H_{out}, W_{out})`, :math:`(N, C, D_{out}, H_{out}, W_{out})`, where

         .. math::
             D_{out} = \left\lfloor\frac{D_{in}  + 2 \times \text{padding}[0] -
             (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor

         .. math::
             H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[1] -
             (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor

         .. math::
             W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[2] -
             (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor

    Examples:
        >>> input = torch.randn(20, 16, 3, 50, 32)
        >>> nms_coords, nms_val = conv_soft_argmax2d(input, (3, 3, 3), (1, 2, 2), (0, 1, 1))
    """
    if not torch.is_tensor(input):
        raise TypeError("Input type is not a torch.Tensor. Got {}"
                        .format(type(input)))

    if not len(input.shape) == 5:
        raise ValueError("Invalid input shape, we expect BxCxDxHxW. Got: {}"
                         .format(input.shape))

    if temperature <= 0:
        raise ValueError("Temperature should be positive float or tensor. Got: {}"
                         .format(temperature))

    b, c, d, h, w = input.shape
    kx, ky, kz = kernel_size
    device: torch.device = input.device
    dtype: torch.dtype = input.dtype
    input = input.view(b * c, 1, d, h, w)

    center_kernel: torch.Tensor = _get_center_kernel3d(kx, ky, kz, device).to(dtype)
    window_kernel: torch.Tensor = _get_window_grid_kernel3d(kx, ky, kz, device).to(dtype)

    # applies exponential normalization trick
    # https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    # https://github.com/pytorch/pytorch/blob/bcb0bb7e0e03b386ad837015faba6b4b16e3bfb9/aten/src/ATen/native/SoftMax.cpp#L44
    x_max = F.adaptive_max_pool3d(input, (1, 1, 1))

    # max is detached to prevent undesired backprop loops in the graph
    x_exp = ((input - x_max.detach()) / temperature).exp()

    pool_coef: float = float(kx * ky * kz)

    # softmax denominator
    den = pool_coef * F.avg_pool3d(x_exp.view_as(input),
                                   kernel_size,
                                   stride=stride,
                                   padding=padding) + eps

    # We need to output also coordinates
    # Pooled window center coordinates
    grid_global: torch.Tensor = create_meshgrid3d(
        d, h, w, False, device=device).to(dtype).permute(0, 4, 1, 2, 3)

    grid_global_pooled = F.conv3d(grid_global,
                                  center_kernel,
                                  stride=stride,
                                  padding=padding)

    # Coordinates of maxima residual to window center
    # prepare kernel
    coords_max: torch.Tensor = F.conv3d(x_exp,
                                        window_kernel,
                                        stride=stride,
                                        padding=padding)

    coords_max = coords_max / den.expand_as(coords_max)
    coords_max = coords_max + grid_global_pooled.expand_as(coords_max)
    # [:,:, 0, ...] is depth (scale)
    # [:,:, 1, ...] is x
    # [:,:, 2, ...] is y

    if normalized_coordinates:
        coords_max = normalize_pixel_coordinates3d(coords_max.permute(0, 2, 3, 4, 1), d, h, w)
        coords_max = coords_max.permute(0, 4, 1, 2, 3)

    # Back B*C -> (b, c)
    coords_max = coords_max.view(b, c, 3, coords_max.size(2), coords_max.size(3), coords_max.size(4))

    if not output_value:
        return coords_max
    x_softmaxpool = pool_coef * F.avg_pool3d(x_exp.view(input.size()) * input,
                                             kernel_size,
                                             stride=stride,
                                             padding=padding) / den
    if strict_maxima_bonus > 0:
        in_levels: int = input.size(2)
        out_levels: int = x_softmaxpool.size(2)
        skip_levels: int = (in_levels - out_levels) // 2
        strict_maxima: torch.Tensor = F.avg_pool3d(kornia.feature.nms3d(input, kernel_size), 1, stride, 0)
        strict_maxima = strict_maxima[:, :, skip_levels:out_levels - skip_levels]
        x_softmaxpool *= 1.0 + strict_maxima_bonus * strict_maxima
    x_softmaxpool = x_softmaxpool.view(b,
                                       c,
                                       x_softmaxpool.size(2),
                                       x_softmaxpool.size(3),
                                       x_softmaxpool.size(4))
    return coords_max, x_softmaxpool
示例#24
0
 def forward(self, x):
     x = F.adaptive_max_pool3d(x, output_size=(7, 6, 5))
     x = F.adaptive_max_pool3d(x, output_size=1)
     return x
示例#25
0
 def forward(self, x):
     return torch.cat(
         (F.adaptive_avg_pool3d(x, 1), F.adaptive_max_pool3d(x, 1)), dim=1)
示例#26
0
 def forward(self, input: Tensor) -> Tensor:
     input = self.quant_handle(input)
     return F.adaptive_max_pool3d(input, self.output_size,
                                  self.return_indices)
    def forward(self, x):
        scale_times = x.size(3) // self.s
        matrix_size = x.size(3) // scale_times
        out = self.conv1(x)
        n, _, t, h, w = out.size()
        rp = F.adaptive_max_pool3d(out, (t, matrix_size, 1))
        cp = F.adaptive_max_pool3d(out, (t, 1, matrix_size))
        if matrix_size == self.s:
            p = self.conv_p(rp).view(n, self.k, self.s, self.s, t)
            q = self.conv_q(cp).view(n, self.k, self.s, self.s, t)
        else:
            ones = x.new_ones((1, 1, matrix_size, matrix_size, 1),
                              requires_grad=False)
            p = x.new_zeros(n, self.k, matrix_size, matrix_size, t)
            p_out = self.conv_p(rp).view(n, self.k, self.s, self.s, t, -1)
            count = x.new_zeros((1, 1, matrix_size, matrix_size, 1),
                                requires_grad=False)
            for i in range(p_out.size(5)):
                p[:, :, i:self.s + i, i:self.s + i, :] += p_out[:, :, :, :, :,
                                                                i]
                count[:, :, i:self.s + i, i:self.s + i, :] += 1
            count = torch.where(count > 0, count, ones)
            p /= count

            q = x.new_zeros(n, self.k, matrix_size, matrix_size, t)
            q_out = self.conv_q(cp).view(n, self.k, self.s, self.s, t, 2)
            count = x.new_zeros((1, 1, matrix_size, matrix_size, 1),
                                requires_grad=False)
            for i in range(q_out.size(5)):
                q[:, :, i:self.s + i, i:self.s + i, :] += q_out[:, :, :, :, :,
                                                                i]
                count[:, :, i:self.s + i, i:self.s + i, :] += 1
            count = torch.where(count > 0, count, ones)
            q /= count
        p = F.softmax(p, dim=3)
        q = F.softmax(q, dim=2)
        p = p.view(n, self.k, 1, matrix_size, matrix_size,
                   t).expand(n, self.k,
                             x.size(1) // self.k, matrix_size, matrix_size,
                             t).contiguous()
        p = p.view(n, x.size(1), matrix_size, matrix_size,
                   t).permute(0, 1, 4, 2, 3).contiguous()
        q = q.view(n, self.k, 1, matrix_size, matrix_size,
                   t).expand(n, self.k,
                             x.size(1) // self.k, matrix_size, matrix_size,
                             t).contiguous()
        q = q.view(n, x.size(1), matrix_size, matrix_size,
                   t).permute(0, 1, 4, 2, 3).contiguous()
        p = self.resize_mat(p, h // matrix_size)
        q = self.resize_mat(q, w // matrix_size)
        y = p.matmul(x)
        y = y.matmul(q)
        if self.tk > 0:
            tp = F.adaptive_avg_pool3d(out, (self.ts, 1, 1))
            tm = self.conv_t(tp).view(n, self.tk, self.ts, self.ts)
            tm = F.softmax(tm, dim=3)
            tm = tm.view(n, self.tk, 1, 1, 1, self.ts,
                         self.ts).expand(n, self.tk,
                                         x.size(1) // self.tk, h, w, self.ts,
                                         self.ts).contiguous()
            tm = tm.view(n, x.size(1), h * w, self.ts, self.ts)
            tm = self.resize_mat(tm, t // self.ts)
            tm = tm.view(n, x.size(1), h, w, t, t)
            y = y.permute(0, 1, 3, 4,
                          2).contiguous().view(n, x.size(1), h, w, t, 1)
            y = tm.matmul(y).squeeze(-1).permute(0, 1, 4, 2, 3).contiguous()

        y = self.conv2(y)

        return y
示例#28
0
 def pool(self, input):
     return F.adaptive_max_pool3d(input, 1)
示例#29
0
 def forward(self, x):
     for l in self.layers:
         x = l(x)
     x = F.adaptive_max_pool3d(x, 1)
     x = x.view(x.size(0), -1)
     return F.log_softmax(self.out(x), dim=-1)
示例#30
0
文件: model.py 项目: hyzcn/MotionNet
    def forward(self, x):
        batch, seq, z, h, w = x.size()

        x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
        x = F.relu(self.bn_pre_1(self.conv_pre_1(x)))
        x = F.relu(self.bn_pre_2(self.conv_pre_2(x)))

        # -------------------------------- Encoder Path --------------------------------
        # -- STC block 1
        x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
        x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))

        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2),
                       x_1.size(3)).contiguous()  # (batch, seq, c, h, w)
        x_1 = self.conv3d_1(x_1)
        x_1 = x_1.view(-1, x_1.size(2), x_1.size(3),
                       x_1.size(4)).contiguous()  # (batch * seq, c, h, w)

        # -- STC block 2
        x_2 = F.relu(self.bn2_1(self.conv2_1(x_1)))
        x_2 = F.relu(self.bn2_2(self.conv2_2(x_2)))

        x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2),
                       x_2.size(3)).contiguous()  # (batch, seq, c, h, w)
        x_2 = self.conv3d_2(x_2)
        x_2 = x_2.view(
            -1, x_2.size(2), x_2.size(3),
            x_2.size(4)).contiguous()  # (batch * seq, c, h, w), seq = 1

        # -- STC block 3
        x_3 = F.relu(self.bn3_1(self.conv3_1(x_2)))
        x_3 = F.relu(self.bn3_2(self.conv3_2(x_3)))

        # -- STC block 4
        x_4 = F.relu(self.bn4_1(self.conv4_1(x_3)))
        x_4 = F.relu(self.bn4_2(self.conv4_2(x_4)))

        # -------------------------------- Decoder Path --------------------------------
        x_5 = F.relu(
            self.bn5_1(
                self.conv5_1(
                    torch.cat((F.interpolate(x_4, scale_factor=(2, 2)), x_3),
                              dim=1))))
        x_5 = F.relu(self.bn5_2(self.conv5_2(x_5)))

        x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3))
        x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous()
        x_2 = F.adaptive_max_pool3d(x_2, (1, None, None))
        x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous()
        x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous()

        x_6 = F.relu(
            self.bn6_1(
                self.conv6_1(
                    torch.cat((F.interpolate(x_5, scale_factor=(2, 2)), x_2),
                              dim=1))))
        x_6 = F.relu(self.bn6_2(self.conv6_2(x_6)))

        x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3))
        x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous()
        x_1 = F.adaptive_max_pool3d(x_1, (1, None, None))
        x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous()
        x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous()

        x_7 = F.relu(
            self.bn7_1(
                self.conv7_1(
                    torch.cat((F.interpolate(x_6, scale_factor=(2, 2)), x_1),
                              dim=1))))
        x_7 = F.relu(self.bn7_2(self.conv7_2(x_7)))

        x = x.view(batch, -1, x.size(1), x.size(2), x.size(3))
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = F.adaptive_max_pool3d(x, (1, None, None))
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = x.view(-1, x.size(2), x.size(3), x.size(4)).contiguous()

        x_8 = F.relu(
            self.bn8_1(
                self.conv8_1(
                    torch.cat((F.interpolate(x_7, scale_factor=(2, 2)), x),
                              dim=1))))
        res_x = F.relu(self.bn8_2(self.conv8_2(x_8)))

        return res_x