Exemplo n.º 1
0
    def __init__(self):
        super().__init__()
        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        in_channels = cfg.fpn.num_features  # 256

        self.proto_net, cfg.coef_dim = make_net(in_channels,
                                                cfg.mask_proto_net,
                                                include_last_relu=False)
        '''  
        self.proto_net:
        Sequential((0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                   (1): ReLU(inplace)
                   (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                   (3): ReLU(inplace)
                   (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                   (5): ReLU(inplace)
                   (6): InterpolateModule()
                   (7): ReLU(inplace)
                   (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                   (9): ReLU(inplace)
                   (10): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1)))
        cfg.coef_dim: 32
        '''

        self.fpn = FPN([512, 1024, 2048])
        self.selected_layers = [0, 1, 2, 3, 4]
        # create a ModuleList to match with the original pre-trained weights (original model state_dict)
        self.prediction_layers = nn.ModuleList()
        self.prediction_layers.append(PredictionModule(in_channels))
        '''  
        self.prediction_layers:
        ModuleList(
          (0): PredictionModule((upfeature): Sequential((0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                                                        (1): ReLU(inplace))
                                (bbox_layer): Conv2d(256, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                                (conf_layer): Conv2d(256, 243, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
                                (mask_layer): Conv2d(256, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))))
        '''

        if cfg.train_semantic:  # True
            self.semantic_seg_conv = nn.Conv2d(256,
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        self.anchors = []
        for i, hw in enumerate(cfg.hws):
            self.anchors += make_anchors(hw[1], hw[0], cfg.backbone.scales[i])
        self.anchors = torch.Tensor(self.anchors).view(-1, 4).cuda()
Exemplo n.º 2
0
    def forward(self, x):
        # changed
        outs = self.backbone(x)

        #changed
        outs = [outs[i] for i in [1, 2, 3]]
        outs = self.fpn(outs[0], outs[1], outs[2])
        '''
            outs:
            (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3
                                            (n, 512, 69, 69)             (n, 256, 35, 35) P4
                                            (n, 1024, 35, 35)            (n, 256, 18, 18) P5
                                            (n, 2048, 18, 18)            (n, 256, 9, 9)   P6
                                                                         (n, 256, 5, 5)   P7
        '''
        if isinstance(self.anchors, list):
            #changed
            for i, shape in enumerate([list(aa.shape) for aa in outs]):
                self.anchors += make_anchors(torch.tensor(shape[2]),
                                             torch.tensor(shape[3]),
                                             cfg.scales[i])
            self.anchors = torch.Tensor(self.anchors).view(-1, 4)

            # outs[0]: [2, 256, 69, 69], the feature map from P3
        proto_out = self.proto_net(outs[0])  # proto_out: (n, 32, 138, 138)
        proto_out = F.relu(proto_out, inplace=True)
        proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

        predictions = {'box': [], 'class': [], 'coef': []}

        for i in self.selected_layers:  # self.selected_layers [0, 1, 2, 3, 4]
            p = self.prediction_layers[0](outs[i])

            for k, v in p.items():
                predictions[k].append(v)

        for k, v in predictions.items():
            predictions[k] = torch.cat(v, -2)

        predictions['proto'] = proto_out
        predictions['anchors'] = self.anchors

        if self.training:
            if cfg.train_semantic:  # True
                predictions['segm'] = self.semantic_seg_conv(outs[0])
            return predictions

        else:
            predictions['class'] = F.softmax(predictions['class'], -1)
            return predictions
Exemplo n.º 3
0
    def forward(self, x):
        with timer.env('backbone'):
            outs = self.backbone(x)

        with timer.env('fpn'):
            outs = [outs[i] for i in cfg.backbone.selected_layers]
            outs = self.fpn(outs)
            '''
            outs:
            (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> [n, 256, 69, 69] P3
                                            (n, 512, 69, 69)             [n, 256, 35, 35] P4
                                            (n, 1024, 35, 35)            [n, 256, 18, 18] P5
                                            (n, 2048, 18, 18)            [n, 256, 9, 9]   P6
                                                                         [n, 256, 5, 5]   P7
            '''
        if isinstance(self.anchors, list):
            for i, shape in enumerate([list(aa.shape) for aa in outs]):
                self.anchors += make_anchors(shape[2], shape[3],
                                             cfg.backbone.scales[i])
            self.anchors = torch.Tensor(self.anchors).view(-1, 4).cuda()

        with timer.env('proto'):
            # outs[0]: [2, 256, 69, 69], the feature map from P3
            proto_out = self.proto_net(outs[0])  # proto_out: [2, 32, 138, 138]
            proto_out = F.relu(proto_out, inplace=True)
            proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

        with timer.env('pred_heads'):
            predictions = {'box': [], 'class': [], 'coef': []}

            for i in self.selected_layers:  # self.selected_layers [0, 1, 2, 3, 4]
                p = self.prediction_layers[0](outs[i])

                for k, v in p.items():
                    predictions[k].append(v)

        for k, v in predictions.items():
            predictions[k] = torch.cat(v, -2)

        predictions['proto'] = proto_out
        predictions['anchors'] = self.anchors

        if self.training:
            if cfg.train_semantic:  # True
                predictions['segm'] = self.semantic_seg_conv(outs[0])
            return predictions

        else:
            predictions['class'] = F.softmax(predictions['class'], -1)
            return predictions
Exemplo n.º 4
0
    def forward(self, img, box_classes=None, masks_gt=None):
        outs = self.backbone(img)
        outs = self.fpn(outs[1:4])
        '''
        outs:
        (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3
                                        (n, 512, 69, 69)             (n, 256, 35, 35) P4
                                        (n, 1024, 35, 35)            (n, 256, 18, 18) P5
                                        (n, 2048, 18, 18)            (n, 256, 9, 9)   P6
                                                                     (n, 256, 5, 5)   P7
        '''

        if isinstance(self.anchors, list):
            for i, shape in enumerate([list(aa.shape) for aa in outs]):
                self.anchors += make_anchors(self.cfg, shape[2], shape[3], self.cfg.scales[i])

            self.anchors = torch.tensor(self.anchors, device=outs[0].device).reshape(-1, 4)

        # outs[0]: [2, 256, 69, 69], the feature map from P3
        proto_out = self.proto_net(outs[0])  # proto_out: (n, 32, 138, 138)
        proto_out = F.relu(proto_out, inplace=True)
        proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

        class_pred, box_pred, coef_pred = [], [], []

        for aa in outs:
            class_p, box_p, coef_p = self.prediction_layers[0](aa)
            class_pred.append(class_p)
            box_pred.append(box_p)
            coef_pred.append(coef_p)

        class_pred = torch.cat(class_pred, dim=1)
        box_pred = torch.cat(box_pred, dim=1)
        coef_pred = torch.cat(coef_pred, dim=1)

        if self.training:
            seg_pred = self.semantic_seg_conv(outs[0])
            return self.compute_loss(class_pred, box_pred, coef_pred, proto_out, seg_pred, box_classes, masks_gt)
        else:
            class_pred = F.softmax(class_pred, -1)
            return class_pred, box_pred, coef_pred, proto_out, self.anchors
Exemplo n.º 5
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.coef_dim = 32

        if cfg.__class__.__name__.startswith('res101'):
            self.backbone = ResNet(layers=(3, 4, 23, 3))
            self.fpn = FPN(in_channels=(512, 1024, 2048))
        elif cfg.__class__.__name__.startswith('res50'):
            self.backbone = ResNet(layers=(3, 4, 6, 3))
            self.fpn = FPN(in_channels=(512, 1024, 2048))
        elif cfg.__class__.__name__.startswith('swin_tiny'):
            self.backbone = SwinTransformer()
            self.fpn = FPN(in_channels=(192, 384, 768))

        self.proto_net = ProtoNet(coef_dim=self.coef_dim)
        self.prediction_layers = PredictionModule(cfg, coef_dim=self.coef_dim)

        self.anchors = []
        fpn_fm_shape = [
            math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128)
        ]
        for i, size in enumerate(fpn_fm_shape):
            self.anchors += make_anchors(self.cfg, size, size,
                                         self.cfg.scales[i])

        if cfg.mode == 'train':
            self.semantic_seg_conv = nn.Conv2d(256,
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # init weights, backbone weights will be covered later
        for name, module in self.named_modules():
            if isinstance(module, nn.Conv2d):
                nn.init.xavier_uniform_(module.weight.data)

                if module.bias is not None:
                    module.bias.data.zero_()
Exemplo n.º 6
0
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


anchors = []
fpn_fm_shape = [
    math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128)
]
for i, size in enumerate(fpn_fm_shape):
    anchors += make_anchors(cfg, size, size, cfg.scales[i])

# prepare engine
with open(cfg.weight,
          'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    inputs, outputs, bindings = [], [], []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(
            engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))

        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)