Пример #1
0
    def forward(self, x):
        res = self.feature_extractor(x)
        res = F.adaptive_avg_pool2d(res, (1, 1))
        res = res.view(-1, 32)
        res = self.fc_final(res)

        return res
Пример #2
0
 def forward(self, x):
     out = F.relu(self.bn1(self.conv1(x)))
     out = self.layer1(out)
     out = self.layer2(out)
     out = self.layer3(out)
     out = F.adaptive_avg_pool2d(out, 1)
     out = out.view(out.size(0), -1)
     return F.log_softmax(self.linear(out))
Пример #3
0
 def forward(self, x):
     out = self.conv1(x)
     out = self.block1(out)
     out = self.block2(out)
     out = self.block3(out)
     out = self.relu(self.bn1(out))
     out = F.adaptive_avg_pool2d(out, 1)
     out = out.view(-1, self.nChannels)
     return self.fc(out)
Пример #4
0
def get_activations(images, model, batch_size=64, dims=2048,
                    cuda=False, verbose=False):
    """Calculates the activations of the pool_3 layer for all images.

    Params:
    -- images      : Numpy array of dimension (n_images, 3, hi, wi). The values
                     must lie between 0 and 1.
    -- model       : Instance of inception model
    -- batch_size  : the images numpy array is split into batches with
                     batch size batch_size. A reasonable batch size depends
                     on the hardware.
    -- dims        : Dimensionality of features returned by Inception
    -- cuda        : If set to True, use GPU
    -- verbose     : If set to True and parameter out_step is given, the number
                     of calculated batches is reported.
    Returns:
    -- A numpy array of dimension (num images, dims) that contains the
       activations of the given tensor when feeding inception with the
       query tensor.
    """
    model.eval()

    d0 = images.shape[0]
    if batch_size > d0:
        print(('Warning: batch size is bigger than the data size. '
               'Setting batch size to data size'))
        batch_size = d0

    n_batches = d0 // batch_size
    n_used_imgs = n_batches * batch_size

    pred_arr = np.empty((n_used_imgs, dims))
    for i in range(n_batches):
        if verbose:
            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
                  end='', flush=True)
        start = i * batch_size
        end = start + batch_size

        batch = torch.from_numpy(images[start:end]).type(torch.FloatTensor)
        batch = Variable(batch, volatile=True)
        if cuda:
            batch = batch.cuda()

        pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.shape[2] != 1 or pred.shape[3] != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)

    if verbose:
        print(' done')

    return pred_arr
Пример #5
0
 def _pyramid_pooling(self, input_x, output_sizes):
     pyramid_level_tensors = []
     for tsize in output_sizes:
         if self.pool_type == 'max_pool':
             pyramid_level_tensor = F.adaptive_max_pool2d(input_x, tsize)
         if self.pool_type == 'avg_pool':
             pyramid_level_tensor = F.adaptive_avg_pool2d(input_x, tsize)
         pyramid_level_tensor = pyramid_level_tensor.view(input_x.size(0), -1)
         pyramid_level_tensors.append(pyramid_level_tensor)
     return torch.cat(pyramid_level_tensors, dim=1)
 def forward(self, x):
     x = self.conv_layers(x)
     
     mean_pool = F.adaptive_avg_pool2d(x, 1).squeeze()
     max_pool = F.adaptive_max_pool2d(x, 1).squeeze()
     x = torch.cat([mean_pool, max_pool], dim=1)
     
     x = self.fc(x)
     
     return x
    def forward(self, img, att_size=14):
        x = img.unsqueeze(0)

        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        fc = x.mean(3).mean(2).squeeze()
        att = F.adaptive_avg_pool2d(x,[att_size,att_size]).squeeze().permute(1, 2, 0)
        
        return fc, att
    def forward(self, img, att_size=14):
        x = img  # (26, 3, w, d)

        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        fc = x.mean(3).mean(2).squeeze()
        if att_size > 0:
            att = F.adaptive_avg_pool2d(x,[att_size,att_size]).squeeze().permute(1, 2, 0)
            return fc, att
        else:
            return fc.unsqueeze(0), fc.mean(0)  # (1, 26, 2048), (2048,)
Пример #9
0
 def forward(self, x):
     features = self.features(x)
     out = F.leaky_relu(features, 0.02, inplace=True)
     out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
     out = self.classifier(out)
     return out
Пример #10
0
 def forward(self, x):
     bs, _, _, _ = x.shape
     x = self.model.features(x)
     x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
     l0 = self.l0(x)
     return l0
    def forward(self, x: Tensor, y: Tensor) -> Tensor:
        # !!! MODIFIED BY USER !!!
        #       features = self.features(x)

        # Initial layers
        x = self.features.conv0(x)
        x = self.features.norm0(x)
        x = self.features.relu0(x)
        x = self.features.pool0(x)

        # denseblock1
        x = self.features.denseblock1(x)

        # transition1
        x = self.features.transition1(x)
        l1 = x

        # denseblock2
        x = self.features.denseblock2(x)

        # transition2
        x = self.features.transition2(x)
        l2 = x

        # denseblock3
        x = self.features.denseblock3(x)

        # transition3
        x = self.features.transition3(x)
        l3 = x

        # denseblock4
        x = self.features.denseblock4(x)

        # norm5
        features = self.features.norm5(x)

        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        g = out

        l1 = self.att_proj1(l1)
        l2 = self.att_proj2(l2)
        l3 = self.att_proj3(l3)

        ga1 = self.calculate_ga(l1, g, self.att_est1)
        ga2 = self.calculate_ga(l2, g, self.att_est2)
        ga3 = self.calculate_ga(l3, g, self.att_est3)

        # Classifier layer has been replaced
        #out = torch.flatten(out, 1)
        #out = self.classifier(out)

        # Non-image model
        non_image_out = F.relu(self.non_image_fc(y))

        #print(ga1.shape, ga2.shape, ga3.shape, g.shape)
        g_all = torch.cat((ga1, ga2, ga3, non_image_out), dim=1)

        out = self.classifier(g_all)

        return out
Пример #12
0
 def _scale(self, input: Tensor, inplace: bool) -> Tensor:
     scale = F.adaptive_avg_pool2d(input, 1)
     scale = self.fc1(scale)
     scale = self.relu(scale)
     scale = self.fc2(scale)
     return F.hardsigmoid(scale, inplace=inplace)
Пример #13
0
 def forward(self, x):
     if self.transform_input:
         x_ch0 = torch.unsqueeze(x[:, 0],
                                 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
         x_ch1 = torch.unsqueeze(x[:, 1],
                                 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
         x_ch2 = torch.unsqueeze(x[:, 2],
                                 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
         x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
     # 299 x 299 x 3
     x = self.Conv2d_1a_3x3(x)
     # 149 x 149 x 32
     x = self.Conv2d_2a_3x3(x)
     # 147 x 147 x 32
     x = self.Conv2d_2b_3x3(x)
     # 147 x 147 x 64
     x = F.max_pool2d(x, kernel_size=3, stride=2)
     # 73 x 73 x 64
     x = self.Conv2d_3b_1x1(x)
     # 73 x 73 x 80
     x = self.Conv2d_4a_3x3(x)
     # 71 x 71 x 192
     x = F.max_pool2d(x, kernel_size=3, stride=2)
     # 35 x 35 x 192
     x = self.Mixed_5b(x)
     # 35 x 35 x 256
     x = self.Mixed_5c(x)
     # 35 x 35 x 288
     x = self.Mixed_5d(x)
     # 35 x 35 x 288
     x = self.Mixed_6a(x)
     # 17 x 17 x 768
     x = self.Mixed_6b(x)
     # 17 x 17 x 768
     x = self.Mixed_6c(x)
     # 17 x 17 x 768
     x = self.Mixed_6d(x)
     # 17 x 17 x 768
     x = self.Mixed_6e(x)
     # 17 x 17 x 768
     if self.training and self.aux_logits:
         aux = self.AuxLogits(x)
     # 17 x 17 x 768
     x = self.Mixed_7a(x)
     # 8 x 8 x 1280
     x = self.Mixed_7b(x)
     # 8 x 8 x 2048
     x = self.Mixed_7c(x)
     # 8 x 8 x 2048
     # x = F.avg_pool2d(x, kernel_size=8)
     x = F.adaptive_avg_pool2d(x, output_size=(1, 1))
     # 1 x 1 x 2048
     x = F.dropout(x, training=self.training)
     # 1 x 1 x 2048
     x = x.view(x.size(0), -1)
     # 2048
     x = self.fc(x)
     # 1000 (num_classes)
     if self.training and self.aux_logits:
         return x, aux
     return x
Пример #14
0
    def build_module(self):
        """
        Builds network whilst automatically inferring shapes of layers.
        """
        print("Building basic block of ConvolutionalNetwork using input shape",
              self.input_shape)
        x = torch.zeros(
            (self.input_shape
             ))  # create dummy inputs to be used to infer shapes of layers

        out = x
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        for i in range(self.num_layers):  # for number of layers times
            self.layer_dict['conv_{}'.format(i)] = nn.Conv2d(
                in_channels=out.shape[1],
                # add a conv layer in the module dict
                kernel_size=3,
                out_channels=self.num_filters,
                padding=1,
                bias=self.use_bias)

            out = self.layer_dict['conv_{}'.format(i)](
                out)  # use layer on inputs to get an output
            out = F.relu(out)  # apply relu
            print(out.shape)
            if self.dim_reduction_type == 'strided_convolution':  # if dim reduction is strided conv, then add a strided conv
                self.layer_dict['dim_reduction_strided_conv_{}'.format(
                    i)] = nn.Conv2d(in_channels=out.shape[1],
                                    kernel_size=3,
                                    out_channels=out.shape[1],
                                    padding=1,
                                    bias=self.use_bias,
                                    stride=2,
                                    dilation=1)

                out = self.layer_dict['dim_reduction_strided_conv_{}'.format(
                    i)](out)  # use strided conv to get an output
                out = F.relu(out)  # apply relu to the output
            elif self.dim_reduction_type == 'dilated_convolution':  # if dim reduction is dilated conv, then add a dilated conv, using an arbitrary dilation rate of i + 2 (so it gets smaller as we go, you can choose other dilation rates should you wish to do it.)
                self.layer_dict['dim_reduction_dilated_conv_{}'.format(
                    i)] = nn.Conv2d(in_channels=out.shape[1],
                                    kernel_size=3,
                                    out_channels=out.shape[1],
                                    padding=1,
                                    bias=self.use_bias,
                                    stride=1,
                                    dilation=i + 2)
                out = self.layer_dict['dim_reduction_dilated_conv_{}'.format(
                    i)](out)  # run dilated conv on input to get output
                out = F.relu(out)  # apply relu on output

            elif self.dim_reduction_type == 'max_pooling':
                self.layer_dict['dim_reduction_max_pool_{}'.format(
                    i)] = nn.MaxPool2d(2, padding=1)
                out = self.layer_dict['dim_reduction_max_pool_{}'.format(i)](
                    out)

            elif self.dim_reduction_type == 'avg_pooling':
                self.layer_dict['dim_reduction_avg_pool_{}'.format(
                    i)] = nn.AvgPool2d(2, padding=1)
                out = self.layer_dict['dim_reduction_avg_pool_{}'.format(i)](
                    out)

            print(out.shape)
        if out.shape[-1] != 2:
            out = F.adaptive_avg_pool2d(
                out, 2
            )  # apply adaptive pooling to make sure output of conv layers is always (2, 2) spacially (helps with comparisons).
        print('shape before final linear layer', out.shape)
        out = out.view(out.shape[0], -1)
        self.logit_linear_layer = nn.Linear(
            in_features=out.shape[1],  # add a linear layer
            out_features=self.num_output_classes,
            bias=self.use_bias)
        out = self.logit_linear_layer(
            out)  # apply linear layer on flattened inputs
        print("Block is built, output volume is", out.shape)
        return out
 def forward(self, x):
     features = self.backbone.features(x)
     features = F.adaptive_avg_pool2d(features, output_size=(1, 1)).view((features.size()[0], -1))
     logits = self.head_linear(features)
     return logits
Пример #16
0
    def forward(self, inputs):
        assert len(inputs) == len(self.in_channels)

        laterals = [
            lateral_conv(inputs[i + self.start_level])
            for i, lateral_conv in enumerate(self.lateral_convs)
        ]

        h, w = inputs[-1].size(2), inputs[-1].size(3)
        #size = [1,2,3]

        AdapPool_Features = [
            self.high_lateral_conv[j](F.adaptive_avg_pool2d(
                inputs[-1],
                output_size=(max(1,
                                 int(h * self.adaptive_pool_output_ratio[j])),
                             max(1, int(w *
                                        self.adaptive_pool_output_ratio[j])))))
            for j in range(len(self.adaptive_pool_output_ratio))
        ]
        AdapPool_Features = [
            F.upsample(feat, size=(h, w), mode='bilinear', align_corners=True)
            for feat in AdapPool_Features
        ]

        Concat_AdapPool_Features = torch.cat(AdapPool_Features, dim=1)
        fusion_weights = self.high_lateral_conv_attention(
            Concat_AdapPool_Features)
        fusion_weights = F.sigmoid(fusion_weights)
        high_pool_fusion = 0
        for i in range(3):
            high_pool_fusion += torch.unsqueeze(fusion_weights[:, i, :, :],
                                                dim=1) * AdapPool_Features[i]
        raw_laternals = [laterals[i].clone() for i in range(len(laterals))]
        # build top-down path

        #high_pool_fusion += global_pool
        laterals[-1] += high_pool_fusion

        used_backbone_levels = len(laterals)
        for i in range(used_backbone_levels - 1, 0, -1):
            laterals[i - 1] += F.interpolate(laterals[i],
                                             scale_factor=2,
                                             mode='nearest')
        # build outputs
        # part 1: from original levels
        outs = [
            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
        ]
        # part 2: add extra levels
        if self.num_outs > len(outs):
            # use max pool to get more levels on top of outputs
            # (e.g., Faster R-CNN, Mask R-CNN)
            if not self.add_extra_convs:
                for i in range(self.num_outs - used_backbone_levels):
                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))
            # add conv layers on top of original feature maps (RetinaNet)
            else:
                if self.extra_convs_on_inputs:
                    orig = inputs[self.backbone_end_level - 1]
                    outs.append(self.fpn_convs[used_backbone_levels](orig))
                else:
                    outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))

                pool_noupsample_fusion = F.adaptive_avg_pool2d(
                    high_pool_fusion, (1, 1))
                outs[-1] += pool_noupsample_fusion

                for i in range(used_backbone_levels + 1, self.num_outs):
                    if self.relu_before_extra_convs:
                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))
                    else:
                        outs.append(self.fpn_convs[i](outs[-1]))
        if self.train_with_auxiliary:
            return tuple(outs), tuple(raw_laternals)
        else:
            return tuple(outs)
Пример #17
0
def downsample2d_as(inputs, target_as):
    _, _, h, w = target_as.size()
    return tf.adaptive_avg_pool2d(inputs, [h, w])
Пример #18
0
 def forward(self, ft, score, x=None):
     h = self.reduce(ft)
     hpool = F.adaptive_avg_pool2d(h, (1, 1)) if x is None else x
     h = adaptive_cat((h, score), dim=1, ref_tensor=0)
     h = self.transform(h)
     return h, hpool
Пример #19
0
 def forward(self, x):
     return self.a * F.adaptive_max_pool2d(
         x, 1) + (1 - self.a) * F.adaptive_avg_pool2d(x, 1)
Пример #20
0
def global_pool(x1, x2):
    ctx = F.adaptive_avg_pool2d(x1, 1).sigmoid()
    x = upsample_add(x1, x2 * ctx)
    return x
Пример #21
0
 def forward(self, inputs):
     return F.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1)
Пример #22
0
 def forward(self, x):
     return F.adaptive_avg_pool2d(x, (1, 1))
Пример #23
0
 def forward(self, x):
     return F.adaptive_avg_pool2d(
         x.clamp(min=self.eps).pow(self.p), self.size).pow(1. / self.p)
 def forward(self, feature):
     # class_feature = self.class_c2(self.class_c1(feature))  # (512, 10)
     class_feature = self.class_c1(feature)  # (512, 10)
     class_1x1 = F.adaptive_avg_pool2d(class_feature, output_size=(1, 1)).view((class_feature.size()[0], -1))
     class_logits = self.class_l1(class_1x1)
     return class_feature, class_logits
Пример #25
0
 def forward(self, x):
     out = F.relu(x)
     out = F.adaptive_avg_pool2d(out, (1, 1))
     out = out.view(out.size(0), -1)
     out = self.fc(out)
     return out
Пример #26
0
def get_activations(images,
                    model,
                    batch_size=64,
                    dims=2048,
                    cuda=False,
                    verbose=False):
    """Calculates the activations of the pool_3 layer for all images.
    Params:
    -- images      : Numpy array of dimension (n_images, 3, hi, wi). The values
                     must lie between 0 and 1.
    -- model       : Instance of inception model
    -- batch_size  : the images numpy array is split into batches with
                     batch size batch_size. A reasonable batch size depends
                     on the hardware.
    -- dims        : Dimensionality of features returned by Inception
    -- cuda        : If set to True, use GPU
    -- verbose     : If set to True and parameter out_step is given, the number
                     of calculated batches is reported.
    Returns:
    -- A numpy array of dimension (num images, dims) that contains the
       activations of the given tensor when feeding inception with the
       query tensor.
    """
    model.eval()

    d0 = images.shape[0]
    if batch_size > d0:
        print(('Warning: batch size is bigger than the data size. '
               'Setting batch size to data size'))
        batch_size = d0

    n_batches = d0 // batch_size
    n_used_imgs = n_batches * batch_size

    pred_arr = np.empty((n_used_imgs, dims))
    for i in range(n_batches):
        if verbose:
            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
                  end='',
                  flush=True)
        start = i * batch_size
        end = start + batch_size

        batch = torch.from_numpy(images[start:end]).type(torch.FloatTensor)
        batch = Variable(batch, volatile=True)
        if cuda:
            batch = batch.cuda()

        pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.shape[2] != 1 or pred.shape[3] != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)

    if verbose:
        print(' done')

    return pred_arr
Пример #27
0
 def _compute_grad_weights(self, grads):
     grads = self._normalize(grads)
     return F.adaptive_avg_pool2d(grads, 1)
Пример #28
0
    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1] for feat in img_feats
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                bbox_results = self._bbox_forward(i,
                                                  x,
                                                  rois,
                                                  semantic_feat=semantic)
                ms_scores.append(bbox_results['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'].argmax(dim=1)
                    rois = bbox_head.regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta, semantic in zip(img_feats, img_metas,
                                                 semantic_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    mask_feats = self.mask_roi_extractor[-1](
                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],
                        mask_rois)
                    if self.with_semantic:
                        semantic_feat = semantic
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                -2:]:
                            mask_semantic_feat = F.adaptive_avg_pool2d(
                                mask_semantic_feat, mask_feats.shape[-2:])
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
def get_activations(files,
                    model,
                    batch_size=1,
                    dims=2048,
                    cuda=False,
                    verbose=False):
    """Calculates the activations of the pool_3 layer for all images.
    Params:
    -- files       : List of image files paths
    -- model       : Inst
    ance of inception model
    -- batch_size  : Batch size of images for the model to process at once.
                     Make sure that the number of samples is a multiple of
                     the batch size, otherwise some samples are ignored. This
                     behavior is retained to match the original FID score
                     implementation.
    -- dims        : Dimensionality of features returned by Inception
    -- cuda        : If set to True, use GPU
    -- verbose     : If set to True and parameter out_step is given, the number
                     of calculated batches is reported.
    Returns:
    -- A numpy array of dimension (num images, dims) that contains the
       activations of the given tensor when feeding inception with the
       query tensor.
    """
    model.eval()

    if len(files) % batch_size != 0:
        print(('Warning: number of images is not a multiple of the '
               'batch size. Some samples are going to be ignored.'))
    if batch_size > len(files):
        print(('Warning: batch size is bigger than the data size. '
               'Setting batch size to data size'))
        batch_size = len(files)

    n_batches = len(files) // batch_size
    n_used_imgs = n_batches * batch_size

    pred_arr = np.empty((n_used_imgs, dims))

    for i in tqdm(range(n_batches)):
        if verbose:
            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
                  end='',
                  flush=True)
        start = i * batch_size
        end = start + batch_size

        images = np.array(
            [imread(str(f)).astype(np.float32) for f in files[start:end]])
        # images = np.concatenate([imread(str(f)).astype(np.float32)
        #                    for f in files[start:end]], axis=0)

        # Reshape to (n_images, 3, height, width)
        images = images.transpose((0, 3, 1, 2))
        images /= 255

        batch = torch.from_numpy(images).type(torch.FloatTensor)
        if cuda:
            batch = batch.cuda()

        pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.shape[2] != 1 or pred.shape[3] != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)

    if verbose:
        print(' done')

    return pred_arr
Пример #30
0
def make_avg_flat(out):
    flat = F.adaptive_avg_pool2d(out, output_size=1)
    flat = flat.view(flat.size(0), -1)
    return flat
Пример #31
0
def resize2d(img, size):
    return F.adaptive_avg_pool2d(img, size[2:])
Пример #32
0
    def forward(self, x):

        bts,_,_,_ = x.size()

        mean = [0.485,0.456,0.406]
        std = [0.229,0.224,0.225]

        x = torch.cat([
            (x-mean[2])/std[2],
            (x-mean[1])/std[1],
            (x-mean[0])/std[0]
        ],1)

        e1 = self.conv1(x)  #; print('x',x.size())    #64,64,64
        e1 = self.scse1(e1)
        e2 = self.encoder2(e1) #; print('e2',e2.size()) #128,32,32
        e2 = self.scse2(e2)
        e3 = self.encoder3(e2) #; print('e3',e3.size())#256,16,16
        e3 = self.scse3(e3)
        e4 = self.encoder4(e3)#; print('e4',e4.size()) #512,8,8
        e4 = self.scse4(e4)
        # e5 = self.encoder5(e4)# ; print('e5',e5.size())#1024,8,8
        # e5 = self.scse5(e5)

        f = self.center(e4)# ; print('f',f.size()) #256,8,8
        #f = self.scse_center(f)

        f_gap = F.adaptive_avg_pool2d(f,output_size=1)#256,1,1
        f_gap = F.dropout(f_gap,p=0.5)
        f_gap_fuse = self.class_fuse_conv(f_gap)#64,1,1

        class_logit = self.class_out(f_gap_fuse.view(bts,64)).view(bts)# 1



        d5 = self.decoder5(f,e3)# ; print('d5',d5.size())#64,16,16
        #d5 = self.drop_1(d5)
        d4 = self.decoder4(d5,e2)#; print('d4',d4.size())#32,32
        #d4 = self.drop_1(d4)
        d3 = self.decoder3(d4,e1)#; print('d3',d3.size())#64,64
        #d3 = self.drop_1(d3)
        d2 = self.decoder2(d3)#; print('d2',d2.size()) #128,128


        hyper = torch.cat((
            F.upsample(d2, scale_factor=2, mode='bilinear', align_corners=False),
            F.upsample(d3,scale_factor=4,mode='bilinear',align_corners=False),
            F.upsample(d4,scale_factor=8,mode='bilinear',align_corners=False),
            F.upsample(d5,scale_factor=16,mode='bilinear',align_corners=False)
        ),1) #256,128,128

        hyper= F.dropout2d(hyper,p=0.5)#256,128,128

        seg_base_fuse = self.seg_basefuse_conv(hyper)#64,128,128

        seg_logit = self.seg_single_logit(seg_base_fuse)#1,128,128

        fuse_feature = torch.cat((
            seg_base_fuse,
            F.upsample(f_gap_fuse,scale_factor=seg_logit.size()[-1],mode='nearest')
        ),1)#128,128,128

        fuse_logit = self.fuse_logit(fuse_feature)#1,128,128

        return fuse_logit,seg_logit,class_logit
 def get_yolo_feature_vec(self, coords):
     fmap_cropped = self.crop_feature_map(coords)
     fmap_cropped = F.adaptive_avg_pool2d(fmap_cropped, (1, 1))
     return np.squeeze(fmap_cropped.cpu().numpy())
Пример #34
0
 def adaptive_avg_pool2d(a):
     return F.adaptive_avg_pool2d(a, 1)
Пример #35
0
 def forward(self, input):
     x = self.features(input)
     x = F.relu(x, inplace=True)
     x = F.adaptive_avg_pool2d(x, (4,4)).view(x.size(0), -1)
     x = self.fc(x)
     return x
Пример #36
0
 def forward(self, x):
     out = self.extractor(x)
     out = F.adaptive_avg_pool2d(out, 1)
     out = out.view(out.size(0), -1)
     out = self.fc(out)
     return out
Пример #37
0
 def forward(self, x):
     if self.transform_input:
         x_ch0 = torch.unsqueeze(x[:, 0],
                                 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
         x_ch1 = torch.unsqueeze(x[:, 1],
                                 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
         x_ch2 = torch.unsqueeze(x[:, 2],
                                 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
         x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
     # N x 3 x 299 x 299
     x = self.Conv2d_1a_3x3(x)
     # N x 32 x 149 x 149
     x = self.Conv2d_2a_3x3(x)
     # N x 32 x 147 x 147
     x = self.Conv2d_2b_3x3(x)
     # N x 64 x 147 x 147
     x = F.max_pool2d(x, kernel_size=3, stride=2)
     # N x 64 x 73 x 73
     x = self.Conv2d_3b_1x1(x)
     # N x 80 x 73 x 73
     x = self.Conv2d_4a_3x3(x)
     # N x 192 x 71 x 71
     x = F.max_pool2d(x, kernel_size=3, stride=2)
     # N x 192 x 35 x 35
     x = self.Mixed_5b(x)
     # N x 256 x 35 x 35
     x = self.Mixed_5c(x)
     # N x 288 x 35 x 35
     x = self.Mixed_5d(x)
     # N x 288 x 35 x 35
     x = self.Mixed_6a(x)
     # N x 768 x 17 x 17
     x = self.Mixed_6b(x)
     # N x 768 x 17 x 17
     x = self.Mixed_6c(x)
     # N x 768 x 17 x 17
     x = self.Mixed_6d(x)
     # N x 768 x 17 x 17
     x = self.Mixed_6e(x)
     # N x 768 x 17 x 17
     if self.training and self.aux_logits:
         aux = self.AuxLogits(x)
     # N x 768 x 17 x 17
     x = self.Mixed_7a(x)
     # N x 1280 x 8 x 8
     x = self.Mixed_7b(x)
     # N x 2048 x 8 x 8
     x = self.Mixed_7c(x)
     # N x 2048 x 8 x 8
     # Adaptive average pooling
     x = F.adaptive_avg_pool2d(x, (1, 1))
     # N x 2048 x 1 x 1
     x = F.dropout(x, training=self.training)
     # N x 2048 x 1 x 1
     x = torch.flatten(x, 1)
     # N x 2048
     x = self.fc(x)
     # N x 1000 (num_classes)
     if self.training and self.aux_logits:
         return _InceptionOutputs(x, aux)
     return x
Пример #38
0
    def forward(self, x):
        batchsize = x.shape[0]
        x = self.mobilenet0_conv0(x)
        x = self.mobilenet0_conv1(x)
        x = self.mobilenet0_conv2(x)
        x = self.mobilenet0_conv3(x)
        x = self.mobilenet0_conv4(x)
        x = self.mobilenet0_conv5(x)
        x = self.mobilenet0_conv6(x)
        x = self.mobilenet0_conv7(x)
        x = self.mobilenet0_conv8(x)
        x = self.mobilenet0_conv9(x)
        x10 = self.mobilenet0_conv10(x)
        x = self.mobilenet0_conv11(x10)
        x = self.mobilenet0_conv12(x)
        x = self.mobilenet0_conv13(x)
        x = self.mobilenet0_conv14(x)
        x = self.mobilenet0_conv15(x)
        x = self.mobilenet0_conv16(x)
        x = self.mobilenet0_conv17(x)
        x = self.mobilenet0_conv18(x)
        x = self.mobilenet0_conv19(x)
        x = self.mobilenet0_conv20(x)
        x = self.mobilenet0_conv21(x)
        x22 = self.mobilenet0_conv22(x)
        x = self.mobilenet0_conv23(x22)
        x = self.mobilenet0_conv24(x)
        x = self.mobilenet0_conv25(x)
        x = self.mobilenet0_conv26(x)
        o1 = self.rf_c3_lateral(x)
        o2 = self.rf_c3_det_conv1(o1)
        o3 = self.rf_c3_det_context_conv1(o1)
        o4 = self.rf_c3_det_context_conv2(o3)
        o5 = self.rf_c3_det_context_conv3_1(o3)
        o6 = self.rf_c3_det_context_conv3_2(o5)
        o7 = torch.cat((o2, o4, o6), 1)
        o8 = self.rf_c3_det_concat_relu(o7)
        cls32 = self.face_rpn_cls_score_stride32(o8)
        cls32_shape = cls32.shape
        cls32 = cls32.view(
            batchsize, 2, -1, cls32_shape[3]
        )  # torch.reshape(cls32, (batchsize, 2, -1, cls32_shape[3]))
        cls32 = self.face_rpn_cls_score_stride32_softmax(cls32)
        cls32 = cls32.view(
            batchsize, 4, -1, cls32_shape[3]
        )  # torch.reshape(cls32, (batchsize, 4, -1, cls32_shape[3]))
        bbox32 = self.face_rpn_bbox_pred_stride32(o8)
        landmark32 = self.face_rpn_landmark_pred_stride32(o8)
        p1 = self.rf_c2_lateral(x22)
        p2 = self.rf_c3_upsampling(o1)
        p2 = F.adaptive_avg_pool2d(p2, (p1.shape[2], p1.shape[3]))

        p3 = p1 + p2
        p4 = self.rf_c2_aggr(p3)
        p5 = self.rf_c2_det_conv1(p4)
        p6 = self.rf_c2_det_context_conv1(p4)
        p7 = self.rf_c2_det_context_conv2(p6)
        p8 = self.rf_c2_det_context_conv3_1(p6)
        p9 = self.rf_c2_det_context_conv3_2(p8)
        p10 = torch.cat((p5, p7, p9), 1)
        p10 = self.rf_c2_det_concat_relu(p10)
        cls16 = self.face_rpn_cls_score_stride16(p10)
        cls16_shape = cls16.shape
        cls16 = cls16.view(
            batchsize, 2, -1, cls16_shape[3]
        )  # torch.reshape(cls16, (batchsize, 2, -1, cls16_shape[3]))
        cls16 = self.face_rpn_cls_score_stride16_softmax(cls16)
        cls16 = cls16.view(
            batchsize, 4, -1, cls16_shape[3]
        )  #cls16 = torch.reshape(cls16, (batchsize, 4, -1, cls16_shape[3]))
        bbox16 = self.face_rpn_bbox_pred_stride16(p10)
        landmark16 = self.face_rpn_landmark_pred_stride16(p10)
        q1 = self.rf_c1_red_conv(x10)
        q2 = self.rf_c2_upsampling(p4)
        q2 = F.adaptive_avg_pool2d(q2, (q1.shape[2], q1.shape[3]))

        q3 = q1 + q2
        q4 = self.rf_c1_aggr(q3)
        q5 = self.rf_c1_det_conv1(q4)
        q6 = self.rf_c1_det_context_conv1(q4)
        q7 = self.rf_c1_det_context_conv2(q6)
        q8 = self.rf_c1_det_context_conv3_1(q6)
        q9 = self.rf_c1_det_context_conv3_2(q8)
        q10 = torch.cat((q5, q7, q9), 1)
        q10 = self.rf_c2_det_concat_relu(q10)
        cls8 = self.face_rpn_cls_score_stride8(q10)
        cls8_shape = cls8.shape
        cls8 = cls8.view(
            batchsize, 2, -1, cls8_shape[3]
        )  # torch.reshape(cls8, (batchsize, 2, -1, cls8_shape[3]))
        cls8 = self.face_rpn_cls_score_stride8_softmax(cls8)
        cls8 = cls8.view(
            batchsize, 4, -1, cls8_shape[3]
        )  #cls8 = torch.reshape(cls8, (batchsize, 4, -1, cls8_shape[3]))
        bbox8 = self.face_rpn_bbox_pred_stride8(q10)
        landmark8 = self.face_rpn_landmark_pred_stride8(q10)

        detections = []
        detections.append(cls32)
        detections.append(bbox32)
        detections.append(landmark32)

        detections.append(cls16)
        detections.append(bbox16)
        detections.append(landmark16)

        detections.append(cls8)
        detections.append(bbox8)
        detections.append(landmark8)
        return detections
Пример #39
0
 def _compute_grad_weights(self, grads):
     return F.adaptive_avg_pool2d(grads, 1)
Пример #40
0
    def forward(self, x):
        batch_size, _, _, _ = x.shape
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        x[:, 0, :, :] -= mean[0]
        x[:, 0, :, :] /= std[0]
        x[:, 1, :, :] -= mean[1]
        x[:, 1, :, :] /= std[1]
        x[:, 2, :, :] -= mean[2]
        x[:, 2, :, :] /= std[2]

        # depth encoding / CoordConv
        # coord_scale = 1 / std[0]
        # coord_x = (torch.abs(torch.linspace(-1, 1, steps=x.size(3))) - 0.5) * coord_scale
        # x[:, 1] = coord_x.unsqueeze(0).expand_as(x[:, 1])
        # coord_y = (torch.linspace(-1, 1, steps=x.size(2))) * coord_scale
        # x[:, 2] = coord_y.unsqueeze(-1).expand_as(x[:, 2])

        e1 = self.encoder1(x)  # ; print('e1', e1.size())
        e2 = self.encoder2(e1)  # ; print('e2', e2.size())
        e3 = self.encoder3(e2)  # ; print('e3', e3.size())
        e4 = self.encoder4(e3)  # ; print('e4', e4.size())
        e5 = self.encoder5(e4)  # ; print('e5', e5.size())

        c = self.center(self.pool(e5))  # ; print('c', c.size())

        d5 = self.decoder5(c, e5)  # ; print('d5', d5.size())
        d4 = self.decoder4(d5, e4)  # ; print('d4', d4.size())
        d3 = self.decoder3(d4, e3)  # ; print('d3', d3.size())
        d2 = self.decoder2(torch.cat((d3, e2), 1))  # ; print('d2', d2.size())
        d1 = self.decoder1(d2, e1)  # ; print('d1', d1.size())

        d1_size = d1.size()[2:]
        upsampler = upsample(size=d1_size)
        u5 = upsampler(d5)
        u4 = upsampler(d4)
        u3 = upsampler(d3)
        u2 = upsampler(d2)

        d = torch.cat((d1, u2, u3, u4, u5), 1)
        # logit = self.logit(d)#;print(logit.size())

        fuse_pixel = self.fuse_pixel(d)

        logit_pixel = (
            self.logit_pixel1(d1), self.logit_pixel2(u2), self.logit_pixel3(u3), self.logit_pixel4(u4),
            self.logit_pixel5(u5),
        )

        e = F.adaptive_avg_pool2d(e5, output_size=1).view(batch_size, -1)  # image pool
        e = F.dropout(e, p=0.50, training=self.training)
        fuse_image = self.fuse_image(e)
        logit_image = self.logit_image(fuse_image).view(-1)

        # print(fuse_pixel.size())
        # print(fuse_image.size())
        logit = self.logit(torch.cat([  # fuse
            fuse_pixel,
            F.upsample(fuse_image.view(batch_size, -1, 1, 1, ), scale_factor=128, mode='nearest')
        ], 1))

        return logit, logit_pixel, logit_image