class ViTBackboneInt(nn.Module): def __init__(self, train_backbone: bool, channel_768: bool): super().__init__() if tiny: self.body = IntermediateLayerGetter(timm.create_model( 'vit_small_patch16_224', pretrained=True), return_layers={'blocks': '0'}) self.tiny = True else: self.body = IntermediateLayerGetter(timm.create_model( 'vit_base_patch16_384', pretrained=True), return_layers={'blocks': '0'}) self.tiny = False if train_backbone: for name, parameter in self.body.named_parameters(): parameter.requires_grad_(True) else: for name, parameter in self.body.named_parameters(): parameter.requires_grad_(False) if channel_768: self.num_channels = [768] else: self.num_channels = [2048] self.strides = [32]
def test_feature_extraction_methods_equivalence(self): model = models.resnet18(**self.model_defaults).eval() return_layers = { 'layer1': 'layer1', 'layer2': 'layer2', 'layer3': 'layer3', 'layer4': 'layer4' } ilg_model = IntermediateLayerGetter(model, return_layers).eval() fx_model = self._create_feature_extractor(model, return_layers) # Check that we have same parameters for (n1, p1), (n2, p2) in zip(ilg_model.named_parameters(), fx_model.named_parameters()): assert n1 == n2 assert p1.equal(p2) # And that ouputs match with torch.no_grad(): ilg_out = ilg_model(self.inp) fgn_out = fx_model(self.inp) assert all(k1 == k2 for k1, k2 in zip(ilg_out.keys(), fgn_out.keys())) for k in ilg_out.keys(): assert ilg_out[k].equal(fgn_out[k])
class MyGCNNet(nn.Module): def __init__(self): super(MyGCNNet, self).__init__() # BASE backbone = resnet.__dict__["resnet50"](pretrained=True, replace_stride_with_dilation=[False, False, True]) return_layers = {'relu': 'e0', 'layer1': 'e1', 'layer2': 'e2', 'layer3': 'e3', 'layer4': 'e4'} self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) for para in self.backbone.named_parameters(): if "bn" in para[0]: para[1].requires_grad = False pass # Convert self.relu = nn.ReLU(inplace=True) self.convert5 = nn.Conv2d(2048, 512, 1, 1, bias=False) # 25 self.convert4 = nn.Conv2d(1024, 512, 1, 1, bias=False) # 25 self.convert3 = nn.Conv2d(512, 256, 1, 1, bias=False) # 50 self.convert2 = nn.Conv2d(256, 256, 1, 1, bias=False) # 100 self.convert1 = nn.Conv2d(64, 128, 1, 1, bias=False) # 200 # DEEP POOL deep_pool = [[512, 512, 256, 256, 128], [512, 256, 256, 128, 128]] self.deep_pool5 = DeepPoolLayer(deep_pool[0][0], deep_pool[1][0], True, True) self.deep_pool4 = DeepPoolLayer(deep_pool[0][1], deep_pool[1][1], True, True) self.deep_pool3 = DeepPoolLayer(deep_pool[0][2], deep_pool[1][2], True, True) self.deep_pool2 = DeepPoolLayer(deep_pool[0][3], deep_pool[1][3], True, True) self.deep_pool1 = DeepPoolLayer(deep_pool[0][4], deep_pool[1][4], False, False) # ScoreLayer score = 128 self.score = nn.Conv2d(score, 1, 1, 1) pass def forward(self, x): # BASE feature = self.backbone(x) feature1 = self.relu(self.convert1(feature["e0"])) # 128, 200 feature2 = self.relu(self.convert2(feature["e1"])) # 256, 100 feature3 = self.relu(self.convert3(feature["e2"])) # 256, 50 feature4 = self.relu(self.convert4(feature["e3"])) # 512, 25 feature5 = self.relu(self.convert5(feature["e4"])) # 512, 25 # SIZE x_size = x.size()[2:] merge = self.deep_pool5(feature5, feature4) # A + F merge = self.deep_pool4(merge, feature3) # A + F merge = self.deep_pool3(merge, feature2) # A + F merge = self.deep_pool2(merge, feature1) # A + F merge = self.deep_pool1(merge) # A # ScoreLayer merge = self.score(merge) if x_size is not None: merge = F.interpolate(merge, x_size, mode='bilinear', align_corners=True) return merge, torch.sigmoid(merge) pass
class MyGCNNet(nn.Module): def __init__(self, has_bn=False, normalize=False, residual=False, concat=True): super(MyGCNNet, self).__init__() # BASE backbone = resnet.__dict__["resnet50"]( pretrained=True, replace_stride_with_dilation=[False, False, True]) return_layers = { 'relu': 'e0', 'layer1': 'e1', 'layer2': 'e2', 'layer3': 'e3', 'layer4': 'e4' } self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) for para in self.backbone.named_parameters(): if "bn" in para[0]: para[1].requires_grad = False pass # Convert self.relu = nn.ReLU(inplace=True) self.convert5 = nn.Conv2d(2048, 512, 1, 1, bias=False) # 25 self.convert4 = nn.Conv2d(1024, 512, 1, 1, bias=False) # 25 self.convert3 = nn.Conv2d(512, 256, 1, 1, bias=False) # 50 self.convert2 = nn.Conv2d(256, 256, 1, 1, bias=False) # 100 self.convert1 = nn.Conv2d(64, 128, 1, 1, bias=False) # 200 # GCN self.model_gnn1 = SAGENet1(in_dim=512, hidden_dims=[512, 512], has_bn=has_bn, normalize=normalize, residual=residual, concat=concat) self.model_gnn2 = SAGENet2(in_dim=self.model_gnn1.hidden_dims[-1], hidden_dims=[512, 512, 512, 512], skip_which=[2, 4], skip_dim=256, has_bn=has_bn, normalize=normalize, residual=residual, concat=concat) # DEEP POOL deep_pool = [[512, 512, 256, 256, 128], [512, 256, 256, 128, 128]] self.deep_pool5 = DeepPoolLayer(deep_pool[0][0], deep_pool[1][0], True, True, True, 512) self.deep_pool4 = DeepPoolLayer(deep_pool[0][1], deep_pool[1][1], True, True, True, 512) self.deep_pool3 = DeepPoolLayer(deep_pool[0][2], deep_pool[1][2], True, True, False) self.deep_pool2 = DeepPoolLayer(deep_pool[0][3], deep_pool[1][3], True, True, False) self.deep_pool1 = DeepPoolLayer(deep_pool[0][4], deep_pool[1][4], False, False, False) # ScoreLayer score = 128 self.score = nn.Conv2d(score, 1, 1, 1) pass def forward(self, x, batched_graph, batched_pixel_graph): # BASE feature = self.backbone(x) feature1 = self.relu(self.convert1(feature["e0"])) # 128, 200 feature2 = self.relu(self.convert2(feature["e1"])) # 256, 100 feature3 = self.relu(self.convert3(feature["e2"])) # 256, 50 feature4 = self.relu(self.convert4(feature["e3"])) # 512, 25 feature5 = self.relu(self.convert5(feature["e4"])) # 512, 25 # SIZE x_size = x.size()[2:] # GCN 1 data_where = batched_pixel_graph.data_where pixel_nodes_feat = feature["e2"][data_where[:, 0], :, data_where[:, 1], data_where[:, 2]] batched_pixel_graph.x = pixel_nodes_feat gcn1_feature = self.model_gnn1.forward(batched_pixel_graph) # sod_gcn1_feature = self.sod_feature(data_where, gcn1_feature, batched_pixel_graph=batched_pixel_graph) # GCN 2 batched_graph.x = gcn1_feature gcn2_feature, gcn2_logits, gcn2_logits_sigmoid = self.model_gnn2.forward( batched_graph) sod_gcn2_feature = self.sod_feature( data_where, gcn2_feature, batched_pixel_graph=batched_pixel_graph) # For Eval sod_gcn2_sigmoid = self.sod_feature( data_where, gcn2_logits_sigmoid.unsqueeze(1), batched_pixel_graph=batched_pixel_graph) # For Eval merge = self.deep_pool5(feature5, feature4, x_gcn=sod_gcn2_feature) # A + F merge = self.deep_pool4(merge, feature3, x_gcn=sod_gcn2_feature) # A + F merge = self.deep_pool3(merge, feature2, x_gcn=sod_gcn2_feature) # A + F merge = self.deep_pool2(merge, feature1, x_gcn=sod_gcn2_feature) # A + F merge = self.deep_pool1(merge) # A # ScoreLayer merge = self.score(merge) if x_size is not None: merge = F.interpolate(merge, x_size, mode='bilinear', align_corners=True) # For Eval sod_gcn2_sigmoid = F.interpolate(sod_gcn2_sigmoid, x_size, mode='bilinear', align_corners=True) # For Eval return gcn2_logits, gcn2_logits_sigmoid, sod_gcn2_sigmoid, merge, torch.sigmoid( merge) @staticmethod def sod_feature(data_where, gcn_feature, batched_pixel_graph): # 构造特征 _shape = torch.max(data_where, dim=0)[0] + 1 _size = (_shape[0], gcn_feature.shape[-1], _shape[1], _shape[2]) _gcn_feature_for_sod = gcn_feature[batched_pixel_graph.batch] sod_gcn_feature = torch.Tensor(size=_size).to(gcn_feature.device) sod_gcn_feature[data_where[:, 0], :, data_where[:, 1], data_where[:, 2]] = _gcn_feature_for_sod return sod_gcn_feature pass
class MyGCNNet(nn.Module): def __init__(self): super(MyGCNNet, self).__init__() # BASE backbone = resnet.__dict__["resnet50"](pretrained=False, replace_stride_with_dilation=[False, False, True]) return_layers = {'relu': 'e0', 'layer1': 'e1', 'layer2': 'e2', 'layer3': 'e3', 'layer4': 'e4'} self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) for param in self.backbone.named_parameters(): if "bn" in param[0]: param[1].requires_grad = False pass # Convert self.relu = nn.ReLU(inplace=True) self.convert5 = nn.Conv2d(2048, 512, 1, 1, bias=False) # 25 self.convert4 = nn.Conv2d(1024, 512, 1, 1, bias=False) # 25 self.convert3 = nn.Conv2d(512, 256, 1, 1, bias=False) # 50 self.convert2 = nn.Conv2d(256, 256, 1, 1, bias=False) # 100 self.convert1 = nn.Conv2d(64, 128, 1, 1, bias=False) # 200 # DEEP POOL deep_pool = [[512, 512, 256, 256, 128], [512, 256, 256, 128, 128]] self.deep_pool5 = DeepPoolLayer(deep_pool[0][0], deep_pool[1][0], True, True) self.deep_pool4 = DeepPoolLayer(deep_pool[0][1], deep_pool[1][1], True, True) self.deep_pool3 = DeepPoolLayer(deep_pool[0][2], deep_pool[1][2], True, True) self.deep_pool2 = DeepPoolLayer(deep_pool[0][3], deep_pool[1][3], True, True) self.deep_pool1 = DeepPoolLayer(deep_pool[0][4], deep_pool[1][4], False, False) # ScoreLayer score = 128 self.score = nn.Conv2d(score, 1, 1, 1) self.weight_init(self.modules()) pass def forward(self, x): # BASE feature = self.backbone(x) feature1 = self.relu(self.convert1(feature["e0"])) # 128, 200 feature2 = self.relu(self.convert2(feature["e1"])) # 256, 100 feature3 = self.relu(self.convert3(feature["e2"])) # 256, 50 feature4 = self.relu(self.convert4(feature["e3"])) # 512, 25 feature5 = self.relu(self.convert5(feature["e4"])) # 512, 25 # SIZE x_size = x.size()[2:] merge = self.deep_pool5(feature5, feature4) # A + F merge = self.deep_pool4(merge, feature3) # A + F merge = self.deep_pool3(merge, feature2) # A + F merge = self.deep_pool2(merge, feature1) # A + F merge = self.deep_pool1(merge) # A # ScoreLayer merge = self.score(merge) if x_size is not None: merge = F.interpolate(merge, x_size, mode='bilinear', align_corners=True) return merge, torch.sigmoid(merge) def load_pretrained_model(self, pretrained_model="./pretrained/resnet50-19c8e357.pth"): self.backbone.load_state_dict(torch.load(pretrained_model), strict=False) pass @staticmethod def weight_init(modules): for m in modules: if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, 0.01) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() pass pass pass