def test_instantiate_lst(self):
     lst = [1, 2, L(TestClass)(int_arg=1)]
     x = L(TestClass)(int_arg=lst)  # list as an argument should be recursively instantiated
     x = instantiate(x).int_arg
     self.assertEqual(x[:2], [1, 2])
     self.assertIsInstance(x[2], TestClass)
     self.assertEqual(x[2].int_arg, 1)
Exemplo n.º 2
0
def default_X_scheduler(num_X):
    """
    Returns the config for a default multi-step LR scheduler such as "1x", "3x",
    commonly referred to in papers, where every 1x has the total length of 1440k
    training images (~12 COCO epochs). LR is decayed twice at the end of training
    following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.

    Args:
        num_X: a positive real number

    Returns:
        DictConfig: configs that define the multiplier for LR during training
    """
    # total number of iterations assuming 16 batch size, using 1440000/16=90000
    total_steps_16bs = num_X * 90000

    if num_X <= 2:
        scheduler = L(MultiStepParamScheduler)(
            values=[1.0, 0.1, 0.01],
            # note that scheduler is scale-invariant. This is equivalent to
            # milestones=[6, 8, 9]
            milestones=[60000, 80000, 90000],
        )
    else:
        scheduler = L(MultiStepParamScheduler)(
            values=[1.0, 0.1, 0.01],
            milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
        )
    return L(WarmupParamScheduler)(
        scheduler=scheduler,
        warmup_length=1000 / total_steps_16bs,
        warmup_method="linear",
        warmup_factor=0.001,
    )
    def test_to_py(self):
        cfg = LazyConfig.load(self.root_filename)
        cfg.lazyobj.x = {
            "a": 1,
            "b": 2,
            "c": L(count)(x={
                "r": "a",
                "s": 2.4,
                "t": [1, 2, 3, "z"]
            })
        }
        cfg.list = ["a", 1, "b", 3.2]
        py_str = LazyConfig.to_py(cfg)
        expected = """cfg.dir1a_dict.a = "modified"
cfg.dir1a_dict.b = 2
cfg.dir1b_dict.a = 1
cfg.dir1b_dict.b = 2
cfg.lazyobj = itertools.count(
    x={
        "a": 1,
        "b": 2,
        "c": itertools.count(x={"r": "a", "s": 2.4, "t": [1, 2, 3, "z"]}),
    },
    y="base_a_1_from_b",
)
cfg.list = ["a", 1, "b", 3.2]
"""
        self.assertEqual(py_str, expected)
Exemplo n.º 4
0
 def test_instantiate_dataclass_as_subconfig(self):
     cfg = L(TestClass)(int_arg=1, extra_arg=ShapeSpec(channels=1, width=3))
     # Test original cfg as well as serialization
     for x in [cfg, reload_lazy_config(cfg)]:
         obj = instantiate(x)
         self.assertIsInstance(obj.extra_arg, ShapeSpec)
         self.assertEqual(obj.extra_arg.channels, 1)
         self.assertEqual(obj.extra_arg.height, None)
Exemplo n.º 5
0
 def test_instantiate_dataclass(self):
     cfg = L(ShapeSpec)(channels=1, width=3)
     # Test original cfg as well as serialization
     for x in [cfg, reload_lazy_config(cfg)]:
         obj = instantiate(x)
         self.assertIsInstance(obj, ShapeSpec)
         self.assertEqual(obj.channels, 1)
         self.assertEqual(obj.height, None)
    def test_basic_construct(self):
        objconf = L(TestClass)(
            int_arg=3,
            list_arg=[10],
            dict_arg={},
            extra_arg=L(TestClass)(int_arg=4, list_arg="${..list_arg}"),
        )

        obj = instantiate(objconf)
        self.assertIsInstance(obj, TestClass)
        self.assertEqual(obj.int_arg, 3)
        self.assertEqual(obj.extra_arg.int_arg, 4)
        self.assertEqual(obj.extra_arg.list_arg, obj.list_arg)

        objconf.extra_arg.list_arg = [5]
        obj = instantiate(objconf)
        self.assertIsInstance(obj, TestClass)
        self.assertEqual(obj.extra_arg.list_arg, [5])
Exemplo n.º 7
0
    def test_basic_construct(self):
        cfg = L(TestClass)(
            int_arg=3,
            list_arg=[10],
            dict_arg={},
            extra_arg=L(TestClass)(int_arg=4, list_arg="${..list_arg}"),
        )

        for x in [cfg, reload_lazy_config(cfg)]:
            obj = instantiate(x)
            self.assertIsInstance(obj, TestClass)
            self.assertEqual(obj.int_arg, 3)
            self.assertEqual(obj.extra_arg.int_arg, 4)
            self.assertEqual(obj.extra_arg.list_arg, obj.list_arg)

            # Test interpolation
            x.extra_arg.list_arg = [5]
            obj = instantiate(x)
            self.assertIsInstance(obj, TestClass)
            self.assertEqual(obj.extra_arg.list_arg, [5])
Exemplo n.º 8
0
    def test_interpolation(self):
        cfg = L(TestClass)(int_arg=3, extra_arg="${int_arg}")

        cfg.int_arg = 4
        obj = instantiate(cfg)
        self.assertEqual(obj.extra_arg, 4)

        # Test that interpolation still works after serialization
        cfg = reload_lazy_config(cfg)
        cfg.int_arg = 5
        obj = instantiate(cfg)
        self.assertEqual(obj.extra_arg, 5)
    def test_instantiate_namedtuple(self):
        x = L(TestClass)(int_arg=ShapeSpec(channels=1, width=3))
        # test serialization
        with tempfile.TemporaryDirectory() as d:
            fname = os.path.join(d, "d2_test.yaml")
            OmegaConf.save(x, fname)
            with open(fname) as f:
                x = yaml.unsafe_load(f)

        x = instantiate(x)
        self.assertIsInstance(x.int_arg, ShapeSpec)
        self.assertEqual(x.int_arg.channels, 1)
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
    dataloader,
    lr_multiplier,
    model,
    optimizer,
    train,
)
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock

# Config source:
# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py  # noqa
model.backbone.bottom_up = L(RegNet)(
    stem_class=SimpleStem,
    stem_width=32,
    block_class=ResBottleneckBlock,
    depth=22,
    w_a=31.41,
    w_0=96,
    w_m=2.24,
    group_width=64,
    se_ratio=0.25,
    norm="SyncBN",
    out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]

# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
Exemplo n.º 11
0
model = L(RetinaNet)(
    backbone=L(FPN)(
        bottom_up=L(ResNet)(
            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
            stages=L(ResNet.make_default_stages)(
                depth=50,
                stride_in_1x1=True,
                norm="FrozenBN",
            ),
            out_features=["res3", "res4", "res5"],
        ),
        in_features=["res3", "res4", "res5"],
        out_channels=256,
        top_block=L(LastLevelP6P7)(in_channels=2048,
                                   out_channels="${..out_channels}"),
    ),
    head=L(RetinaNetHead)(
        input_shape=[ShapeSpec(channels=256)],
        num_classes="${..num_classes}",
        conv_dims=[256, 256, 256, 256],
        prior_prob=0.01,
        num_anchors=9,
    ),
    anchor_generator=L(DefaultAnchorGenerator)(
        sizes=[[x, x * 2**(1.0 / 3), x * 2**(2.0 / 3)]
               for x in [32, 64, 128, 256, 512]],
        aspect_ratios=[0.5, 1.0, 2.0],
        strides=[8, 16, 32, 64, 128],
        offset=0.0,
    ),
    box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
    anchor_matcher=L(Matcher)(thresholds=[0.4, 0.5],
                              labels=[0, -1, 1],
                              allow_low_quality_matches=True),
    num_classes=80,
    head_in_features=["p3", "p4", "p5", "p6", "p7"],
    focal_loss_alpha=0.25,
    focal_loss_gamma=2.0,
    pixel_mean=[103.530, 116.280, 123.675],
    pixel_std=[1.0, 1.0, 1.0],
    input_format="BGR",
)
Exemplo n.º 12
0
from ..common.models.mask_rcnn_fpn import model
from ..common.train import train

from detectron2.config import LazyCall as L
from detectron2.modeling.backbone import RegNet
from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock

# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9  # noqa
model.backbone.bottom_up = L(RegNet)(
    stem_class=SimpleStem,
    stem_width=32,
    block_class=ResBottleneckBlock,
    depth=23,
    w_a=38.65,
    w_0=96,
    w_m=2.43,
    group_width=40,
    freeze_at=2,
    norm="FrozenBN",
    out_features=["s1", "s2", "s3", "s4"],
)
model.pixel_std = [57.375, 57.120, 58.395]

optimizer.weight_decay = 5e-5
train.init_checkpoint = (
    "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
)
# RegNets benefit from enabling cudnn benchmark mode
train.cudnn_benchmark = True
Exemplo n.º 13
0
model = L(GeneralizedRCNN)(
    backbone=L(FPN)(
        bottom_up=L(ResNet)(
            stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
            stages=L(ResNet.make_default_stages)(
                depth=50,
                stride_in_1x1=True,
                norm="FrozenBN",
            ),
            out_features=["res2", "res3", "res4", "res5"],
        ),
        in_features="${.bottom_up.out_features}",
        out_channels=256,
        top_block=L(LastLevelMaxPool)(),
    ),
    proposal_generator=L(RPN)(
        in_features=["p2", "p3", "p4", "p5", "p6"],
        head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
        anchor_generator=L(DefaultAnchorGenerator)(
            sizes=[[32], [64], [128], [256], [512]],
            aspect_ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64],
            offset=0.0,
        ),
        anchor_matcher=L(Matcher)(thresholds=[0.3, 0.7],
                                  labels=[0, -1, 1],
                                  allow_low_quality_matches=True),
        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
        batch_size_per_image=256,
        positive_fraction=0.5,
        pre_nms_topk=(2000, 1000),
        post_nms_topk=(1000, 1000),
        nms_thresh=0.7,
    ),
    roi_heads=L(StandardROIHeads)(
        num_classes=80,
        batch_size_per_image=512,
        positive_fraction=0.25,
        proposal_matcher=L(Matcher)(thresholds=[0.5],
                                    labels=[0, 1],
                                    allow_low_quality_matches=False),
        box_in_features=["p2", "p3", "p4", "p5"],
        box_pooler=L(ROIPooler)(
            output_size=7,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        box_head=L(FastRCNNConvFCHead)(
            input_shape=ShapeSpec(channels=256, height=7, width=7),
            conv_dims=[],
            fc_dims=[1024, 1024],
        ),
        box_predictor=L(FastRCNNOutputLayers)(
            input_shape=ShapeSpec(channels=1024),
            test_score_thresh=0.05,
            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
            num_classes="${..num_classes}",
        ),
        mask_in_features=["p2", "p3", "p4", "p5"],
        mask_pooler=L(ROIPooler)(
            output_size=14,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        mask_head=L(MaskRCNNConvUpsampleHead)(
            input_shape=ShapeSpec(channels=256, width=14, height=14),
            num_classes="${..num_classes}",
            conv_dims=[256, 256, 256, 256, 256],
        ),
    ),
    pixel_mean=[103.530, 116.280, 123.675],
    pixel_std=[1.0, 1.0, 1.0],
    input_format="BGR",
)
Exemplo n.º 14
0
from detectron2.config import LazyCall as L
from detectron2.evaluation import (
    COCOEvaluator,
    COCOPanopticEvaluator,
    DatasetEvaluators,
    SemSegEvaluator,
)

from .coco import dataloader

dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
dataloader.train.dataset.filter_empty = False
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"

dataloader.evaluator = L(DatasetEvaluators)(evaluators=[
    L(COCOEvaluator)(dataset_name="${....test.dataset.names}", ),
    L(SemSegEvaluator)(dataset_name="${....test.dataset.names}", ),
    L(COCOPanopticEvaluator)(dataset_name="${....test.dataset.names}", ),
])
 def test_instantiate_dataclass(self):
     a = L(TestDataClass)(x=1, y="s")
     a = instantiate(a)
     self.assertEqual(a.x, 1)
     self.assertEqual(a.y, "s")
model = L(MMDetDetector)(
    detector=dict(
        type="MaskRCNN",
        pretrained="torchvision://resnet50",
        backbone=dict(
            type="ResNet",
            depth=50,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type="BN", requires_grad=True),
            norm_eval=True,
            style="pytorch",
        ),
        neck=dict(type="FPN",
                  in_channels=[256, 512, 1024, 2048],
                  out_channels=256,
                  num_outs=5),
        rpn_head=dict(
            type="RPNHead",
            in_channels=256,
            feat_channels=256,
            anchor_generator=dict(
                type="AnchorGenerator",
                scales=[8],
                ratios=[0.5, 1.0, 2.0],
                strides=[4, 8, 16, 32, 64],
            ),
            bbox_coder=dict(
                type="DeltaXYWHBBoxCoder",
                target_means=[0.0, 0.0, 0.0, 0.0],
                target_stds=[1.0, 1.0, 1.0, 1.0],
            ),
            loss_cls=dict(type="CrossEntropyLoss",
                          use_sigmoid=True,
                          loss_weight=1.0),
            loss_bbox=dict(type="L1Loss", loss_weight=1.0),
        ),
        roi_head=dict(
            type="StandardRoIHead",
            bbox_roi_extractor=dict(
                type="SingleRoIExtractor",
                roi_layer=dict(type="RoIAlign",
                               output_size=7,
                               sampling_ratio=0),
                out_channels=256,
                featmap_strides=[4, 8, 16, 32],
            ),
            bbox_head=dict(
                type="Shared2FCBBoxHead",
                in_channels=256,
                fc_out_channels=1024,
                roi_feat_size=7,
                num_classes=80,
                bbox_coder=dict(
                    type="DeltaXYWHBBoxCoder",
                    target_means=[0.0, 0.0, 0.0, 0.0],
                    target_stds=[0.1, 0.1, 0.2, 0.2],
                ),
                reg_class_agnostic=False,
                loss_cls=dict(type="CrossEntropyLoss",
                              use_sigmoid=False,
                              loss_weight=1.0),
                loss_bbox=dict(type="L1Loss", loss_weight=1.0),
            ),
            mask_roi_extractor=dict(
                type="SingleRoIExtractor",
                roi_layer=dict(type="RoIAlign",
                               output_size=14,
                               sampling_ratio=0),
                out_channels=256,
                featmap_strides=[4, 8, 16, 32],
            ),
            mask_head=dict(
                type="FCNMaskHead",
                num_convs=4,
                in_channels=256,
                conv_out_channels=256,
                num_classes=80,
                loss_mask=dict(type="CrossEntropyLoss",
                               use_mask=True,
                               loss_weight=1.0),
            ),
        ),
        # model training and testing settings
        train_cfg=dict(
            rpn=dict(
                assigner=dict(
                    type="MaxIoUAssigner",
                    pos_iou_thr=0.7,
                    neg_iou_thr=0.3,
                    min_pos_iou=0.3,
                    match_low_quality=True,
                    ignore_iof_thr=-1,
                ),
                sampler=dict(
                    type="RandomSampler",
                    num=256,
                    pos_fraction=0.5,
                    neg_pos_ub=-1,
                    add_gt_as_proposals=False,
                ),
                allowed_border=-1,
                pos_weight=-1,
                debug=False,
            ),
            rpn_proposal=dict(
                nms_pre=2000,
                max_per_img=1000,
                nms=dict(type="nms", iou_threshold=0.7),
                min_bbox_size=0,
            ),
            rcnn=dict(
                assigner=dict(
                    type="MaxIoUAssigner",
                    pos_iou_thr=0.5,
                    neg_iou_thr=0.5,
                    min_pos_iou=0.5,
                    match_low_quality=True,
                    ignore_iof_thr=-1,
                ),
                sampler=dict(
                    type="RandomSampler",
                    num=512,
                    pos_fraction=0.25,
                    neg_pos_ub=-1,
                    add_gt_as_proposals=True,
                ),
                mask_size=28,
                pos_weight=-1,
                debug=False,
            ),
        ),
        test_cfg=dict(
            rpn=dict(
                nms_pre=1000,
                max_per_img=1000,
                nms=dict(type="nms", iou_threshold=0.7),
                min_bbox_size=0,
            ),
            rcnn=dict(
                score_thr=0.05,
                nms=dict(type="nms", iou_threshold=0.5),
                max_per_img=100,
                mask_thr_binary=0.5,
            ),
        ),
    ),
    pixel_mean=[123.675, 116.280, 103.530],
    pixel_std=[58.395, 57.120, 57.375],
)
 def test_bad_lazycall(self):
     with self.assertRaises(Exception):
         L(3)
Exemplo n.º 18
0
model = L(GeneralizedRCNN)(
    backbone=L(ResNet)(
        stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
        stages=L(ResNet.make_default_stages)(
            depth=50,
            stride_in_1x1=True,
            norm="FrozenBN",
        ),
        out_features=["res4"],
    ),
    proposal_generator=L(RPN)(
        in_features=["res4"],
        head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
        anchor_generator=L(DefaultAnchorGenerator)(
            sizes=[[32, 64, 128, 256, 512]],
            aspect_ratios=[0.5, 1.0, 2.0],
            strides=[16],
            offset=0.0,
        ),
        anchor_matcher=L(Matcher)(thresholds=[0.3, 0.7],
                                  labels=[0, -1, 1],
                                  allow_low_quality_matches=True),
        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
        batch_size_per_image=256,
        positive_fraction=0.5,
        pre_nms_topk=(12000, 6000),
        post_nms_topk=(2000, 1000),
        nms_thresh=0.7,
    ),
    roi_heads=L(Res5ROIHeads)(
        num_classes=80,
        batch_size_per_image=512,
        positive_fraction=0.25,
        proposal_matcher=L(Matcher)(thresholds=[0.5],
                                    labels=[0, 1],
                                    allow_low_quality_matches=False),
        in_features=["res4"],
        pooler=L(ROIPooler)(
            output_size=14,
            scales=(1.0 / 16, ),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        res5=L(ResNet.make_stage)(
            block_class=BottleneckBlock,
            num_blocks=3,
            stride_per_block=[2, 1, 1],
            in_channels=1024,
            bottleneck_channels=512,
            out_channels=2048,
            norm="FrozenBN",
            stride_in_1x1=True,
        ),
        box_predictor=L(FastRCNNOutputLayers)(
            input_shape=L(ShapeSpec)(channels="${...res5.out_channels}",
                                     height=1,
                                     width=1),
            test_score_thresh=0.05,
            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
            num_classes="${..num_classes}",
        ),
        mask_head=L(MaskRCNNConvUpsampleHead)(
            input_shape=L(ShapeSpec)(
                channels="${...res5.out_channels}",
                width="${...pooler.output_size}",
                height="${...pooler.output_size}",
            ),
            num_classes="${..num_classes}",
            conv_dims=[256],
        ),
    ),
    pixel_mean=[103.530, 116.280, 123.675],
    pixel_std=[1.0, 1.0, 1.0],
    input_format="BGR",
)
Exemplo n.º 19
0
import torch

from detectron2.config import LazyCall as L
from detectron2.solver.build import get_default_optimizer_params

SGD = L(torch.optim.SGD)(
    params=L(get_default_optimizer_params)(
        # params.model is meant to be set to the model object, before instantiating
        # the optimizer.
        weight_decay_norm=0.0),
    lr=0.02,
    momentum=0.9,
    weight_decay=1e-4,
)
Exemplo n.º 20
0
# Copyright (c) Facebook, Inc. and its affiliates.
from itertools import count

from detectron2.config import LazyCall as L

from .dir1.dir1_a import dir1a_dict, dir1a_str

dir1a_dict.a = "modified"

# modification above won't affect future imports
from .dir1.dir1_b import dir1b_dict, dir1b_str


lazyobj = L(count)(x=dir1a_str, y=dir1b_str)
from detectron2.config import LazyCall as L
from detectron2.evaluation import (
    COCOEvaluator,
    COCOPanopticEvaluator,
    DatasetEvaluators,
    SemSegEvaluator,
)

from .coco import dataloader

dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
dataloader.train.dataset.filter_empty = False
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"

dataloader.evaluator = [
    L(COCOEvaluator)(dataset_name="${...test.dataset.names}", ),
    L(SemSegEvaluator)(dataset_name="${...test.dataset.names}", ),
    L(COCOPanopticEvaluator)(dataset_name="${...test.dataset.names}", ),
]
        corr = sum(x[0] for x in all_corr_total)
        total = sum(x[1] for x in all_corr_total)
        return {"accuracy": corr / total}


# --- End of code that could be in a project and be imported

dataloader = OmegaConf.create()
dataloader.train = L(build_data_loader)(
    dataset=L(torchvision.datasets.ImageNet)(
        root="/path/to/imagenet",
        split="train",
        transform=L(T.Compose)(transforms=[
            L(T.RandomResizedCrop)(size=224),
            L(T.RandomHorizontalFlip)(),
            T.ToTensor(),
            L(T.Normalize)(mean=(0.485, 0.456, 0.406),
                           std=(0.229, 0.224, 0.225)),
        ]),
    ),
    batch_size=256 // 8,
    num_workers=4,
    training=True,
)

dataloader.test = L(build_data_loader)(
    dataset=L(torchvision.datasets.ImageNet)(
        root="${...train.dataset.root}",
        split="val",
        transform=L(T.Compose)(transforms=[
            L(T.Resize)(size=256),
            L(T.CenterCrop)(size=224),
 def test_instantiate_lazy_target(self):
     # _target_ is result of instantiate
     objconf = L(L(len)(int_arg=3))(call_arg=4)
     objconf._target_._target_ = TestClass
     self.assertEqual(instantiate(objconf), 7)
Exemplo n.º 24
0
from detectron2.config import LazyCall as L
from detectron2.layers import ShapeSpec
from detectron2.modeling import PanopticFPN
from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead

from .mask_rcnn_fpn import model

model._target_ = PanopticFPN
model.sem_seg_head = L(SemSegFPNHead)(
    input_shape={
        f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
        for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
    },
    ignore_value=255,
    num_classes=54,  # COCO panoptic
    conv_dims=128,
    common_stride=4,
    loss_weight=0.5,
    norm="GN",
)
Exemplo n.º 25
0
from detectron2.layers import ShapeSpec
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads

from .mask_rcnn_fpn import model

# arguments that don't exist for Cascade R-CNN
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]

model.roi_heads.update(
    _target_=CascadeROIHeads,
    box_heads=[
        L(FastRCNNConvFCHead)(
            input_shape=ShapeSpec(channels=256, height=7, width=7),
            conv_dims=[],
            fc_dims=[1024, 1024],
        )
        for k in range(3)
    ],
    box_predictors=[
        L(FastRCNNOutputLayers)(
            input_shape=ShapeSpec(channels=1024),
            test_score_thresh=0.05,
            box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
            cls_agnostic_bbox_reg=True,
            num_classes="${...num_classes}",
        )
        for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
    ],
    proposal_matchers=[