def maskrcnn_resnet50_fpn(pretrained=False,
                          progress=True,
                          num_classes=91,
                          pretrained_backbone=True,
                          config=None,
                          **kwargs):

    if pretrained:
        # no need to download the backbone if pretrained is set
        pretrained_backbone = False
    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, config)
    sizes = (32, 64, 128, 256, 512)
    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
    #                                                 output_size=7,
    #                                                 sampling_ratio=2)
    anchor_generator = AnchorGenerator(sizes=(sizes),
                                       aspect_ratios=((0.5, 1.0, 2.0)))
    model = MaskRCNN(backbone,
                     num_classes,
                     rpn_anchor_generator=anchor_generator,
                     **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress)
        model.load_state_dict(state_dict)
    return model
Exemplo n.º 2
0
    def __init__(self):
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280
        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                        output_size=7,
                                                        sampling_ratio=2)
        mask_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                         output_size=14,
                                                         sampling_ratio=2)

        self.net = MaskRCNN(backbone,
                            5,
                            rpn_anchor_generator=anchor_generator,
                            box_roi_pool=roi_pooler,
                            mask_roi_pool=mask_pooler)
        for p in self.net.backbone.parameters():
            p.requires_grad = False

        params = [p for p in self.net.parameters() if p.requires_grad]
        self.optim = torch.optim.SGD(params,
                                     lr=0.001,
                                     momentum=0.9,
                                     weight_decay=0.0005)
        self.lr_schuduler = torch.optim.lr_scheduler.StepLR(self.optim,
                                                            step_size=3,
                                                            gamma=0.1)
Exemplo n.º 3
0
    def __init__(self,
                 backbone=None,
                 architecture=None,
                 detector=None,
                 num_classes=None,
                 device='cpu',
                 *args,
                 **kwargs):

        assert backbone is not None, ValueError('backbone can not None')
        assert architecture is not None, ValueError(
            'architecture can not None')
        assert detector is not None, ValueError('detector can not None')
        assert num_classes is not None, ValueError('num_classes can not None')
        assert device is not None, ValueError('device can not None.')
        self.device = device

        super.__init__()
        if backbone == 'efficientnet':
            backbone = EfficientNet.from_pretrained(architecture)
            backbone.out_channels = 1280
        elif backbone == 'fishnet':
            if architecture == 'fishnet99':
                backbone = fishnet99()
            elif architecture == 'fishnet150':
                backbone = fishnet150()
            else:
                backbone = fishnet201()

            backbone.out_channels = 1000
        elif backbone == 'resnet':
            backbone = resnet_fpn_backbone(architecture, pretrained=True)

        self.model = MaskRCNN(backbone, num_classes=num_classes)
        self.model.to(device)
def get_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='')
    # backbone = TimmToVision(m)
    m = timm.create_model('cspresnet50', features_only=True, pretrained=True)
    backbone = TimmToVisionFPN(m)
    #backbone = resnet50_fpn()
    model = MaskRCNN(backbone, num_classes)
    '''
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))

    # ["0"] rather than [0]
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0"],
                                                    output_size=7,
                                                    sampling_ratio=2)

    model = MaskRCNN(backbone,
                     num_classes=num_classes,
                     rpn_anchor_generator=anchor_generator,
                     box_roi_pool=roi_pooler)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer,
                                                       num_classes)
    '''
    return model
def get_model(
    backbone_name="resnet50",
    detector_name="fasterrcnn",
    trainable_layers=3,
    model_ckpt=None,
):
    """Constructs a fasterrcnn or maskrcnn detector with the given backbone"""
    num_classes = 2  # 1 class (wheat) + background
    if model_ckpt:
        # backbone = resnet_fpn_backbone('resnet101', True)
        backbone = timm_resnet_fpn_backbone(backbone_name, False,
                                            trainable_layers)
    else:
        backbone = timm_resnet_fpn_backbone(backbone_name, True,
                                            trainable_layers)
    if detector_name == "fasterrcnn":
        model = FasterRCNN(backbone, num_classes)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
    elif detector_name == "maskrcnn":
        model = MaskRCNN(backbone, num_classes)
        in_features_mask = (
            model.roi_heads.mask_predictor.conv5_mask.in_channels)
        hidden_layer = 256
        model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
    else:
        raise Exception(f"{detector_name} is not supported")
    if model_ckpt is not None:
        model.load_state_dict(torch.load(model_ckpt)["model_state_dict"])
        print("loaded ckpt")
    return model
def get_mask_rcnn(num_classes, max_instances, backbone="resnet101"):
    # load an instance segmentation model pre-trained pre-trained on COCO
    if backbone == "resnet50":
        print("**************Adding Resnet 50 backbone***************")
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True, box_detections_per_img=max_instances)
    else:

        bb = resnet_fpn_backbone(backbone, False)
        model = MaskRCNN(bb,
                         num_classes=91,
                         box_detections_per_img=max_instances)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    return model
def get_model_instance_segmentation_v2(num_classes,
                                       architecture: str = 'resnet18'):
    """ By modifying this function we will be able to use a large variety of
    pretrained backbones but besides the backbones nothing else will be trained.

    A better solution seems to be to load a pre-trained model and then to
    change the mask and box predictors.
    """

    # Pretrained model for num_classes=1000, but we will not use the final layers anyway.
    model = pretrainedmodels.__dict__[architecture](num_classes=1000,
                                                    pretrained='imagenet')

    my_backbone = MyBackbone(model.features, 512)

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                         output_size=14,
                                                         sampling_ratio=2)

    model = MaskRCNN(my_backbone,
                     num_classes=num_classes,
                     rpn_anchor_generator=anchor_generator,
                     box_roi_pool=roi_pooler,
                     mask_roi_pool=mask_roi_pooler)

    return model
Exemplo n.º 8
0
def get_instance_segmentation_model(num_classes,
                                    model_name='maskrcnn_resnet50_fpn'):
    # load a pre-trained model for classification
    # and return only the features
    if model_name.startswith('efficientnet'):
        backbone = EfficientNet.from_pretrained(model_name,
                                                num_classes=num_classes,
                                                include_top=False)
        # number of output channels
        backbone.out_channels = int(
            round_filters(1280, backbone._global_params))
        model = MaskRCNN(backbone, num_classes)
    else:
        # load an instance segmentation model pre-trained on COCO
        model = torchvision.models.detection.__dict__[model_name](
            pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    if model_name.startswith('mask') or model_name.startswith('efficientnet'):
        # now get the number of input features for the mask classifier
        in_features_mask = \
            model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
    return model
Exemplo n.º 9
0
class MaskNet():
    def __init__(self):
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280
        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                        output_size=7,
                                                        sampling_ratio=2)
        mask_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                         output_size=14,
                                                         sampling_ratio=2)

        self.net = MaskRCNN(backbone,
                            5,
                            rpn_anchor_generator=anchor_generator,
                            box_roi_pool=roi_pooler,
                            mask_roi_pool=mask_pooler)
        for p in self.net.backbone.parameters():
            p.requires_grad = False

        params = [p for p in self.net.parameters() if p.requires_grad]
        self.optim = torch.optim.SGD(params,
                                     lr=0.001,
                                     momentum=0.9,
                                     weight_decay=0.0005)
        self.lr_schuduler = torch.optim.lr_scheduler.StepLR(self.optim,
                                                            step_size=3,
                                                            gamma=0.1)

    def cuda(self):
        self.net = self.net.cuda()

    def train(self, images, targets):
        loss_dict = self.net(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        print(losses.item())
        self.optim.zero_grad()
        losses.backward()
        self.optim.step()

    def save(self):
        torch.save(self.net, os.getcwd() + '/mymodel.pth')

    def load(self):
        self.net = torch.load(os.getcwd() + '/mymodel.pth')
Exemplo n.º 10
0
def get_backbone(num_classes):

    # get backbone
    backbone = torchvision.models.resnet50(pretrained=True)

    # remove the fc layers
    new_backbone = torch.nn.Sequential(*(list(backbone.children())[:-2]))
    new_backbone.out_channels = 2048
    model = MaskRCNN(new_backbone, num_classes)

    return model
Exemplo n.º 11
0
def get_model_instance_segmentation4(num_classes):

    # COCO 에서 미리 학습된 인스턴스 분할 모델을 읽어옵니다
    #model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
    #model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
    backbone = torchvision.models.squeezenet1_1(pretrained=False).features
    #backbone.out_channels = 1
    backbone.out_channels = 512

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                         output_size=14,
                                                         sampling_ratio=2)
    model = MaskRCNN(backbone,
                     num_classes=num_classes,
                     box_roi_pool=roi_pooler,
                     mask_roi_pool=mask_roi_pooler)

    #print("squeezenet1_0 call2 - out_channels :1280, 18,052,473 / 72M")
    #print("squeezenet1_0 call2 - out_channels :516, 4,862,777 / 19.5M")
    #print("squeezenet1_1 call2 - out_channels :516, 4,849,849 4,862,777 / 19.5M")
    print(
        "squeezenet1_1 call2 - out_channels :256, 2,757,369 / 11M (15,000,000 / 15,000,000)"
    )
    print(
        "squeezenet1_1 call2 - out_channels :512, 4,808,441 / 19.2M (15,000,000)"
    )
    print(
        "squeezenet1_1 call2 - out_channels :512, 33,192,463 33,161,683 / 172M (15,000,000)"
    )

    #
    # 분류를 위한 입력 특징 차원을 얻습니다
    #in_features = backbone
    # 미리 학습된 헤더를 새로운 것으로 바꿉니다
    #model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    #in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels

    #hidden_layer = 1
    # and replace the mask predictor with a new one
    #model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
    #                                                   hidden_layer,
    #                                                   num_classes)

    return model
Exemplo n.º 12
0
def get_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='')
    # backbone = TimmToVision(m)
    # m = timm.create_model('cspresnet50', features_only=True, pretrained=True)
    m = timm.create_model('cspresnet50',
                          features_only=True,
                          pretrained=True,
                          pretrained_strict=False)
    backbone = TimmToVisionFPN(m)

    # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='')
    # backbone = TimmToVision(m,1024)

    anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                       aspect_ratios=aspect_ratios)

    # ["0"] rather than [0]
    out_channels = backbone.out_channels
    num_anchors = anchor_generator.num_anchors_per_location()[0]

    # CascadeRPN
    rpn_head = CascadeRPNHead(out_channels,
                              feat_channels=out_channels,
                              num_anchors=num_anchors,
                              stage=2)

    # model = FasterRCNN(backbone, num_classes=num_classes, rpn_head=rpn_head)
    model = MaskRCNN(backbone, num_classes=num_classes)

    # IA branch
    # model = FasterRCNNIA(backbone, num_classes=num_classes, rpn_head=rpn_head)

    # Box head branch
    model.roi_heads.box_head = RoIFeatureExtractor(num_inputs=256,
                                                   resolution=7)
    model.roi_heads.box_predictor = RoIBoxPredictor(num_classes)

    return model
Exemplo n.º 13
0
def get_model_instance_segmentation_mn2(num_classes):
    # load a pre-trained model for classification and return
    # only the features
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    # MaskRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here
    backbone.out_channels = 1280

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)

    mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                         output_size=14,
                                                         sampling_ratio=2)
    # put the pieces together inside a MaskRCNN model
    model = MaskRCNN(backbone,
                     num_classes=num_classes,
                     rpn_anchor_generator=anchor_generator,
                     box_roi_pool=roi_pooler,
                     mask_roi_pool=mask_roi_pooler)

    return model
Exemplo n.º 14
0
class MaskRCNN(Model, metaclass=ModelType):
    """
    MaskRCNN model for Document Layout Analysis with different backbone:
        - ResNet
        - EfficientNet
        - FishNet
    """
    def __init__(self,
                 backbone=None,
                 architecture=None,
                 detector=None,
                 num_classes=None,
                 device='cpu',
                 *args,
                 **kwargs):

        assert backbone is not None, ValueError('backbone can not None')
        assert architecture is not None, ValueError(
            'architecture can not None')
        assert detector is not None, ValueError('detector can not None')
        assert num_classes is not None, ValueError('num_classes can not None')
        assert device is not None, ValueError('device can not None.')
        self.device = device

        super.__init__()
        if backbone == 'efficientnet':
            backbone = EfficientNet.from_pretrained(architecture)
            backbone.out_channels = 1280
        elif backbone == 'fishnet':
            if architecture == 'fishnet99':
                backbone = fishnet99()
            elif architecture == 'fishnet150':
                backbone = fishnet150()
            else:
                backbone = fishnet201()

            backbone.out_channels = 1000
        elif backbone == 'resnet':
            backbone = resnet_fpn_backbone(architecture, pretrained=True)

        self.model = MaskRCNN(backbone, num_classes=num_classes)
        self.model.to(device)

    def load(self, path=None, *args, **kwargs):
        assert path is not None, ValueError('path can not None.')

        if self.device == 'cuda':
            self.model.load_state_dict(torch.load(path))
        else:
            self.model.load_state_dict(
                torch.load(path, map_location=lambda storage, loc: storage))
            if 'nn_parallel_to_cpu' in kwargs:
                state_dict = torch.load(
                    path, map_location=lambda storage, loc: storage)
                state_dict_without_nnparallel = OrderedDict()

                for key, item in state_dict.items():
                    state_without_nnparallel[key[7:]] = item

                self.model.load_state_dict(state_dict_without_nnparallel)

    def _analyze(self, img=None, *args, **kwargs):
        assert img is not None, ValueError('img can not be None')

        img = F.to_tensor(img)

        output = self.model([img])[0]
        for key, item in output.items():
            if self.device == 'cuda':
                item = item.cpu()
            output[key] = item.detach().numpy()

        boxes = [[x1, y1, x2 - x1, y2 - y1]
                 for x1, y1, x2, y2 in output['boxes']]
        scores = output['scores']
        rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25)
        output['boxes'] = [
            output['boxes'][id] for id in rects if output['scores'][id] > 0.5
        ]
        output['labels'] = [
            output['labels'][id] for id in rects if output['scores'][id] > 0.5
        ]
        output['scores'] = [
            output['scores'][id] for id in rects if output['scores'][id] > 0.5
        ]

        return output

    def batch_analyze(self, images=None, *args, **kwargs):
        """
        Analyze for a batch of images
        :param images:
        :return:
        """
        assert images is not None, ValueError('images can not be None')

        with torch.no_grad():
            if self.device == 'cuda':
                torch.cuda.synchronize()

            _images = []
            for image in images:
                _images.append(F.to_tensor(image).to(self.device))
                del image

            l_images = images.__len__()
            del images

            output = self.model(_images)
            _images = []
            del _images

            if 'use_listmemmap' in kwargs:
                f_out = ListMemMap()
            else:
                f_out = List()

            for id in range(output.__len__()):
                for key, item in output[id].items():
                    if self.device == 'cuda':
                        item = item.cpu()
                    output[id][key] = item.detach().numpy()
                del item

                boxes = [[x1, y1, x2 - x1, y2 - y1]
                         for x1, y1, x2, y2 in output[id]['boxes']]
                scores = output[id]['scores']
                rects = nms.boxes(rects=boxes,
                                  scores=scores,
                                  nms_threshold=0.25)

                tmp = list()
                tmp.append([
                    output[id]['boxes'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])
                tmp.append([
                    output[id]['labels'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])
                tmp.append([
                    output[id]['scores'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])

                f_out.append(tmp)
                del tmp

            del output, l_images, boxes, scores, rects
            if self.device == 'cuda':
                torch.cuda.empty_cache()

            gc.collect()
            return f_out

    def analyze(self, img=None, *args, **kwargs):
        """
        :param img: PIL.Image
        :return:
        """
        assert img is not None, ValueError('img can not be None')

        with torch.no_grad():
            img = F.to_tensor(img)
            output = self.model([img])[0]
            for key, item in output.items():
                if self.device == 'cuda':
                    item = item.cpu()
                output[key] = item.detach().numpy()

            boxes = [[x1, y1, x2 - x1, y2 - y1]
                     for x1, y1, x2, y2 in output['boxes']]
            scores = output['scores']
            rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25)
            output['boxes'] = [
                output['boxes'][id] for id in rects
                if output['scores'][id] > 0.5
            ]
            output['labels'] = [
                output['labels'][id] for id in rects
                if output['scores'][id] > 0.5
            ]
            output['scores'] = [
                output['scores'][id] for id in rects
                if output['scores'][id] > 0.5
            ]
            del boxes, scores, rects, img

        return output

    def analyze_pdf(self, pdf_file=None, *args, **kwargs):
        assert pdf_file is not None, ValueError('pdf_file is not None')

        images = pdf2image.convert_from_path(pdf_file)
        for idx, image in enumerate(images):
            out = self.analyze(image)
            img = self.box_display(image, out)
            cv2.imwrite(
                os.path.join('test', 'show',
                             pdf_file.split('/')[-1] + '_' + str(idx) +
                             '.png'), img)

    def box_display(self, image, output, *args, **kwargs):
        assert image is not None, ValueError('image can not None')
        assert output is not None, ValueError('output can not None ')

        image = np.array(image)

        for idx, box in enumerate(output[0]):
            x1, y1, x2, y2 = box
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 20, 200), 10)
            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(
                image, '%s: %.2f' %
                (self.class_names[output[1][idx]], output[2][idx]),
                (int(x1) + 10, int(y1) + 35), font, 1, (0, 0, 255), 2,
                cv2.LINE_AA)
        return image

    def mask_display(self, image, output):
        assert image is not None, ValueError('image can not None')
        assert output is not None, ValueError('output can not None')

        image = np.array(image)

        masks = output['masks']
        _masks = masks.argmax(axis=0)
        _masks = np.reshape(_masks, (_masks.shape[1], _masks.shape[2]))

        for i in range(_masks.shape[0]):
            for j in range(_masks.shape[1]):
                if (_masks[i][j] > 0) and (_masks[i][j] < 8):
                    image[i][j] = self.colours[_masks[i][j]]

        return image

    def train(self,
              train_set=None,
              valid_set=None,
              epoch=None,
              optimizer=None,
              lr_scheduler=None,
              *args,
              **kwargs):
        """
        Default training model
        """
        assert train_set is not None, ValueError('train_set can not None')
        assert valid_set is not None, ValueError('valid_set can not None')
        assert epoch is not None, ValueError('epoch can not None')

        self.model.train()

        params = [p for p in self.model.parameters() if p.requires_grad]
        if optimizer is None:
            optimizer = torch.optim.SGD(params,
                                        lr=0.005,
                                        momentum=0.9,
                                        weight_decay=0.0005)
        else:
            optimizer = optimizer

        if lr_scheduler is None:
            lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                           step_size=1000,
                                                           gamma=0.978)
        else:
            lr_scheduler = lr_scheduler

        for ep in range(epoch):
            metric_logger = MetricLogger(delimiter='  ')
            metric_logger.add_meter(
                'lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
            header = 'Epoch: [{}]'.format(ep)

            for images, targets in metric_logger.log_every(
                    train_set, 10, header):
                images = list(image.to(device) for image in images)
                targets = [{k: v.to(device)
                            for k, v in t.items()} for t in targets]

                loss_dict = model(images, targets)

                losses = sum(loss for loss in loss_dict.values())

                # reduce losses over all GPUs for logging purposes
                loss_dict_reduced = reduce_dict(loss_dict)
                losses_reduced = sum(loss
                                     for loss in loss_dict_reduced.values())

                loss_value = losses_reduced.item()

                if not math.isfinite(loss_value):
                    print("Loss is {}, stopping training".format(loss_value))
                    print(loss_dict_reduced)
                    sys.exit(1)

                optimizer.zero_grad()
                losses.backward()
                optimizer.step()

                if lr_scheduler is not None:
                    lr_scheduler.step()

                metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
                metric_logger.update(lr=optimizer.param_groups[0]["lr"])

            coco = get_coco_api_from_dataset(data_loader.dataset)
            iou_types = _get_iou_types(model)
            coco_evaluator = CocoEvaluator(coco, iou_types)

            for image, targets in metric_logger.log_every(
                    data_loader, 100, header):
                image = list(img.to(device) for img in image)
                targets = [{k: v.to(device)
                            for k, v in t.items()} for t in targets]

                if device is 'cuda':
                    torch.cuda.synchronize()

                model_time = time.time()
                outputs = model(image)

                outputs = [{k: v.to(cpu_device)
                            for k, v in t.items()} for t in outputs]
                model_time = time.time() - model_time

                res = {
                    target["image_id"].item(): output
                    for target, output in zip(targets, outputs)
                }
                evaluator_time = time.time()
                coco_evaluator.update(res)
                evaluator_time = time.time() - evaluator_time
                metric_logger.update(model_time=model_time,
                                     evaluator_time=evaluator_time)

            # gather the stats from all processes
            metric_logger.synchronize_between_processes()
            print("Averaged stats: ", metric_logger)
            coco_evaluator.synchronize_between_processes()

            # accumulate predictions from all images
            coco_evaluator.accumulate()
            coco_evaluator.summarize()
            torch.set_num_threads(n_threads)
Exemplo n.º 15
0
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.anchor_utils import AnchorGenerator

backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
      
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7,sampling_ratio=2)
mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14,sampling_ratio=2)
MaskRCNN_mobile_model = MaskRCNN(backbone,num_classes=2,rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,mask_roi_pool=mask_roi_pooler)

if __name__  == '__main__':
    model = MaskRCNN_mobile_model
    model.eval()
    x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
    predictions = model(x)
backbone.out_channels = 1280

anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0), ))

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                     output_size=14,
                                                     sampling_ratio=2)

model = MaskRCNN(backbone,
                 num_classes=4,
                 rpn_anchor_generator=anchor_generator,
                 box_roi_pool=roi_pooler,
                 mask_roi_pool=mask_roi_pooler)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')
dataSet = BrainTumor('/content/drive/My Drive/Colab Notebooks/BrainTumor/',
                     transforms=None)
data_loader = torch.utils.data.DataLoader(dataSet,
                                          batch_size=4,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=utils.collate_fn)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params,
from torch.autograd import Variable
from torchvision import datasets, transforms

backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0), ))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)
mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                     output_size=14,
                                                     sampling_ratio=2)
model = MaskRCNN(backbone,
                 num_classes=2,
                 rpn_anchor_generator=anchor_generator,
                 box_roi_pool=roi_pooler,
                 mask_roi_pool=mask_roi_pooler)

optimizer = optim.SGD(model.parameters(),
                      lr=0.005,
                      momentum=0.9,
                      weight_decay=0.0005)

dataDir = "../ChemLabScapeDataset/TrainAnnotations"
#dataset = ChemScapeDataset(dataDir, None, "Vessel", False)
d = datasets.CocoDetection(
    root="../coco/train2014",
    annFile="../coco/annotations/instances_train2014.json",
    transform=transforms.ToTensor())
dataLoader = torch.utils.data.DataLoader(d,