def __init__(self, pretrained=True, average_pool=True, semantic=True, final_dim=1024): """ :param average_pool: whether or not to average pool the representations :param pretrained: Whether we need to load from scratch :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes) """ super(SimpleDetector, self).__init__() # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py backbone = _load_resnet_imagenet( pretrained=pretrained ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained) self.backbone = nn.Sequential( backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool, backbone.layer1, backbone.layer2, backbone.layer3, # backbone.layer4 ) self.roi_align = ROIAlign((7, 7) if USE_IMAGENET_PRETRAINED else (14, 14), spatial_scale=1 / 16, sampling_ratio=0) print("semantic") print(semantic) if semantic: self.mask_dims = 32 self.object_embed = torch.nn.Embedding(num_embeddings=81, embedding_dim=128) self.mask_upsample = torch.nn.Conv2d( 1, self.mask_dims, kernel_size=3, stride=2 if USE_IMAGENET_PRETRAINED else 1, padding=1, bias=True) else: self.object_embed = None self.mask_upsample = None after_roi_align = [backbone.layer4] self.final_dim = final_dim if average_pool: after_roi_align += [nn.AvgPool2d(4, stride=1), Flattener()] self.after_roi_align = torch.nn.Sequential(*after_roi_align) self.obj_downsample = torch.nn.Sequential( torch.nn.Dropout(p=0.1), torch.nn.Linear(2048 + (128 if semantic else 0), final_dim), torch.nn.ReLU(inplace=True), ) self.regularizing_predictor = torch.nn.Linear(2048, 81)
def __init__(self, pretrained=True, average_pool=True, semantic=True, final_dim=1024, layer_fix=True): """ :param average_pool: whether or not to average pool the representations :param pretrained: Whether we need to load from scratch :param semantic: Whether or not we want to introduce the mask and the class label early on (default Yes) """ super(SimpleDetector, self).__init__() # huge thx to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/resnet_v1.py backbone = _load_resnet_imagenet( pretrained=pretrained ) if USE_IMAGENET_PRETRAINED else _load_resnet(pretrained=pretrained) self.pre_backbone = nn.Sequential( backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool, backbone.layer1, ) self.layer2 = backbone.layer2 self.cvm_2 = RegionCVM(in_channels=128 * 4, grid=[6, 6]) self.layer3 = backbone.layer3 self.cvm_3 = RegionCVM(in_channels=256 * 4, grid=[4, 4]) self.roi_align = ROIAlign((7, 7) if USE_IMAGENET_PRETRAINED else (14, 14), spatial_scale=1 / 16, sampling_ratio=0) if semantic: self.mask_dims = 32 self.object_embed = torch.nn.Embedding(num_embeddings=81, embedding_dim=128) self.mask_upsample = torch.nn.Conv2d( 1, self.mask_dims, kernel_size=3, stride=2 if USE_IMAGENET_PRETRAINED else 1, padding=1, bias=True) else: self.object_embed = None self.mask_upsample = None self.layer4 = backbone.layer4 self.cvm_4 = RegionCVM(in_channels=512 * 4, grid=[1, 1]) after_roi_align = [] self.final_dim = final_dim if average_pool: after_roi_align += [nn.AvgPool2d(7, stride=1), Flattener()] self.after_roi_align = torch.nn.Sequential(*after_roi_align) self.obj_downsample = torch.nn.Sequential( torch.nn.Dropout(p=0.1), torch.nn.Linear(2048 + (128 if semantic else 0), final_dim), torch.nn.ReLU(inplace=True), ) self.regularizing_predictor = torch.nn.Linear(2048, 81) for m in self.pre_backbone.modules(): for p in m.parameters(): p.requires_grad = False def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.layer2.apply(set_bn_fix) self.layer3.apply(set_bn_fix) self.layer4.apply(set_bn_fix) if layer_fix: for m in self.layer2.modules(): for p in m.parameters(): p.requires_grad = False for m in self.layer3.modules(): for p in m.parameters(): p.requires_grad = False for m in self.layer4.modules(): for p in m.parameters(): p.requires_grad = False