def __init__(self): super().__init__() self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: self.grid = jt.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.shape[0] else: self.num_grids = 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers]) self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( src_channels[layer_idx], src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent, index=idx) self.prediction_layers.append(pred) # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh)
def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): super().__init__() self.num_classes = cfg.num_classes self.mask_dim = cfg.mask_dim # Defined by Yolact self.num_priors = sum(len(x) * len(scales) for x in aspect_ratios) self.parent = [parent] # Don't include this in the state dict self.index = index self.num_heads = cfg.num_heads # Defined by Yolact if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: self.mask_dim = self.mask_dim // self.num_heads if cfg.mask_proto_prototypes_as_features: in_channels += self.mask_dim if parent is None: if cfg.extra_head_net is None: out_channels = in_channels else: self.upfeature, out_channels = make_net( in_channels, cfg.extra_head_net) if cfg.use_prediction_module: self.block = Bottleneck(out_channels, out_channels // 4) self.conv = nn.Conv(out_channels, out_channels, kernel_size=1, bias=True) self.bn = nn.BatchNorm(out_channels) self.bbox_layer = nn.Conv(out_channels, self.num_priors * 4, **cfg.head_layer_params) self.conf_layer = nn.Conv(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) self.mask_layer = nn.Conv(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) if cfg.use_mask_scoring: self.score_layer = nn.Conv(out_channels, self.num_priors, **cfg.head_layer_params) if cfg.use_instance_coeff: self.inst_layer = nn.Conv( out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) # What is this ugly lambda doing in the middle of all this clean prediction module code? def make_extra(num_layers): if num_layers == 0: return lambda x: x else: # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu return nn.Sequential(*sum([[ nn.Conv(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU() ] for _ in range(num_layers)], [])) self.bbox_extra, self.conf_extra, self.mask_extra = [ make_extra(x) for x in cfg.extra_layers ] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: self.gate_layer = nn.Conv(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) self.aspect_ratios = aspect_ratios self.scales = scales self.priors = None self.last_conv_size = None self.last_img_size = None
def __init__(self): super().__init__() input_channels = 1 last_layer = [(cfg.num_classes-1, 1, {})] self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True)
def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): # 256, 256, [[1, 1/2, 2]], [24*2**0 , 24*2**(1/3) , 24*2**(2/3) ], parent, 0 # 256, 256, [[1, 1/2, 2]], [48*2**0 , 48*2**(1/3) , 48*2**(2/3) ], parent, 1 # 256, 256, [[1, 1/2, 2]], [96*2**0 , 96*2**(1/3) , 96*2**(2/3) ], parent, 2 # 256, 256, [[1, 1/2, 2]], [192*2**0, 192*2**(1/3), 192*2**(2/3)], parent, 3 # 256, 256, [[1, 1/2, 2]], [384*2**0, 384*2**(1/3), 384*2**(2/3)], parent, 4 # 2**0 = 0, 2**(1/3) = 1.26, 2**(2/3) = 1.587 # 좀 더 균일한 스케일을 뽑아낼 수 있도록 만든 장치로 보임. super().__init__() self.num_classes = cfg.num_classes # len(coco2017_dataset.class_names) + 1, self.mask_dim = cfg.mask_dim # Defined by Yolact #fpn -> 32 self.num_priors = sum(len(x) * len(scales) for x in aspect_ratios) # =9 self.parent = [parent] # Don't include this in the state dict self.index = index self.num_heads = cfg.num_heads # Defined by Yolact -> 5 #False if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: self.mask_dim = self.mask_dim // self.num_heads #False if cfg.mask_proto_prototypes_as_features: in_channels += self.mask_dim #처음 pred_layer는 None. if parent is None: if cfg.extra_head_net is None: out_channels = in_channels else: # 'extra_head_net': [(256, 3, {'padding': 1})], self.upfeature, out_channels = make_net( in_channels, cfg.extra_head_net) # 차원 수 변화없음. conv2d(f=3, p=1) # out_channels = 256. #False if cfg.use_prediction_module: self.block = Bottleneck(out_channels, out_channels // 4) self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) self.bn = nn.BatchNorm2d(out_channels) #cfg 'head_layer_params': {'kernel_size': 3, 'padding': 1}, self.bbox_layer = nn.Conv2d( out_channels, self.num_priors * 4, **cfg.head_layer_params) #out channel:(9*4) self.conf_layer = nn.Conv2d( out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) #out channel:(9*c) self.mask_layer = nn.Conv2d( out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) #out channel:(9*32) - coefficient # False if cfg.use_mask_scoring: self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) # False // 'num_instance_coeffs': 64, -> But, No used # bbox IoU가 아니라 coefficient로 loss함수를 내고 싶을 때 사용. if cfg.use_instance_coeff: self.inst_layer = nn.Conv2d( out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) # What is this ugly lambda doing in the middle of all this clean prediction module code? def make_extra(num_layers): if num_layers == 0: return lambda x: x else: # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu return nn.Sequential(*sum([[ nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(inplace=True) ] for _ in range(num_layers)], [])) # # Add extra layers between the backbone and the network heads # # The order is (bbox, conf, mask) # 'extra_layers': (0, 0, 0), -> #cw 즉, default설정으로는 아무것도 바뀌지 않는다. self.bbox_extra, self.conf_extra, self.mask_extra = [ make_extra(x) for x in cfg.extra_layers ] if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: #cw True and False. self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) self.aspect_ratios = aspect_ratios self.scales = scales self.priors = None self.last_conv_size = None self.last_img_size = None
def __init__(self): super().__init__() self.backbone = construct_backbone( cfg.backbone) #resnet101_dcn_inter3_backbone if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: if cfg.mask_proto_use_grid: #False self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 #cw yolact_plus default:0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 #cw 0 != None elif cfg.fpn is not None: in_channels = cfg.fpn.num_features #cw fpn.num_features -- default:'num_features': 256, else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids #cw (256 + 0) #TODO#Fig. 3 PART # The include_last_relu=false here is because we might want to change it to another function # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})], self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) #256 , 6개의 conv및 bilinear #cw make_net에 넘기는 cfg.mask_proto_net을 in_channels이 통과하였을 때 마지막 output의 채널을 두번째 인자로 반환하므로. # final in_channels이 cfg.mask_dim이 된다고 보면 되시겠다. if cfg.mask_proto_bias: #False cfg.mask_dim += 1 # cfg.mask_dim = 32 self.selected_layers = cfg.backbone.selected_layers #cw yp -- [1, 2, 3] src_channels = self.backbone.channels #src_channels = [256, 512, 1024, 2048] #True #TODO# if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() # 'fpn': fpn_base.copy({ # 'use_conv_downsample': True, # 'num_downsample': 2, # }), #TODO# if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN([src_channels[i] for i in self.selected_layers ]) #[512, 1024, 2048] 넘김. self.selected_layers = list( range(len(self.selected_layers) + cfg.fpn.num_downsample)) #cw range(3 + 2) src_channels = [cfg.fpn.num_features] * len(self.selected_layers) # src_channels = [256, 256, 256, 256, 256] # selected_layers : [0, 1, 2, 3, 4] self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) #5 #Prediction Module에서 쓰임. for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None #True if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] #cw src_channels는 본래 resnet의 layer_idx의 채널수를 가지고 있음. # 즉, selected layer에서는 bbox를 prediction하는 것. # call하여 얻은 pred는 prediction_layers에 추가. (selected_layers 수만큼 생성) pred = PredictionModule( src_channels[layer_idx], src_channels[layer_idx], aspect_ratios=cfg.backbone.pred_aspect_ratios[idx], scales=cfg.backbone.pred_scales[idx], parent=parent, index=idx) self.prediction_layers.append(pred) #False # Extra parameters for the extra losses if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) #True if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes - 1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, conf_thresh=cfg.nms_conf_thresh, nms_thresh=cfg.nms_thresh)
def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0): super().__init__() self.num_classes = cfg.num_classes self.mask_dim = cfg.mask_dim # Defined by Yolact # for yolact num_priors = 36 ?? self.num_priors = sum( len(x)*len(scales) for x in aspect_ratios ) self.parent = [parent] # Don't include this in the state dict self.index = index self.num_heads = cfg.num_heads # Defined by Yolact # 'mask_proto_split_prototypes_by_head': False for coco yolact if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb: self.mask_dim = self.mask_dim // self.num_heads # mask_proto_prototypes_as_features False for coco yolact if cfg.mask_proto_prototypes_as_features: in_channels += self.mask_dim ### ________________________________ making prediction head ____________________________________________________ if parent is None: # yolact 'extra_head_net': [(256, 3, {'padding': 1})] if cfg.extra_head_net is None: out_channels = in_channels else: self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net) # 'use_prediction_module': False, if cfg.use_prediction_module: self.block = Bottleneck(out_channels, out_channels // 4) self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True) self.bn = nn.BatchNorm2d(out_channels) # 'head_layer_params': {'kernel_size': 3, 'padding': 1} # 36 x 4 = 144 self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4, **cfg.head_layer_params) # 36 x 81 (for coco) = 2916 self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params) # mask_dim = 256 ?? self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, **cfg.head_layer_params) # 'use_mask_scoring': False, if cfg.use_mask_scoring: self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params) # 'use_instance_coeff': False, if cfg.use_instance_coeff: self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params) # What is this ugly lambda doing in the middle of all this clean prediction module code? def make_extra(num_layers): if num_layers == 0: return lambda x: x else: # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu return nn.Sequential(*sum([[ nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), nn.ReLU(inplace=True) ] for _ in range(num_layers)], [])) # 'extra_layers': (0, 0, 0), so these layers basically doing nothing self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers] # 'mask_proto_coeff_gate': False, so ... doing nothing if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate: self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1) ### ____________________________________________________________________________________________________________ self.aspect_ratios = aspect_ratios self.scales = scales self.priors = None self.last_conv_size = None ## only used for debugging ???? self.last_img_size = None
def __init__(self): super().__init__() # yolac++ cfg.backbone = # 'backbone': resnet101_dcn_inter3_backbone.copy({ # 'selected_layers': list(range(1, 4)), # # 'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5, # 'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]], # 'use_pixel_scales': True, # 'preapply_sqrt': False, # 'use_square_anchors': False, # }) self.backbone = construct_backbone(cfg.backbone) if cfg.freeze_bn: self.freeze_bn() # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early! if cfg.mask_type == mask_type.direct: # 16^2 = 256 ?? cfg.mask_dim = cfg.mask_size**2 elif cfg.mask_type == mask_type.lincomb: # mask_proto_use_grid ALWAYS false ?? if cfg.mask_proto_use_grid: self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file)) self.num_grids = self.grid.size(0) else: self.num_grids = 0 # yolact use 0 self.proto_src = cfg.mask_proto_src if self.proto_src is None: in_channels = 3 elif cfg.fpn is not None: in_channels = cfg.fpn.num_features else: in_channels = self.backbone.channels[self.proto_src] in_channels += self.num_grids # The include_last_relu=false here is because we might want to change it to another function # yolact ++ proto net # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 # + [(None, -2, {}), (256, 3, {'padding': 1})] # + [(32, 1, {})], self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False) if cfg.mask_proto_bias: cfg.mask_dim += 1 ## end of mask type if else ______________________________________________] self.selected_layers = cfg.backbone.selected_layers src_channels = self.backbone.channels if cfg.use_maskiou: self.maskiou_net = FastMaskIoUNet() if cfg.fpn is not None: # Some hacky rewiring to accomodate the FPN self.fpn = FPN( # yolact++ 101 selected layers = 1,2,3 # 2nd 128x4 # 3rd 256x4 # 4th 512x4 [src_channels[i] for i in self.selected_layers] ) self.selected_layers = list( # selected_layers = 0,1,2,3,4 range( # yolact++ # 1 , 2 , 3 2 len(self.selected_layers) + cfg.fpn.num_downsample) ) # num features = 256 x 5 src_channels = [cfg.fpn.num_features] * len(self.selected_layers) self.prediction_layers = nn.ModuleList() cfg.num_heads = len(self.selected_layers) # --> 5 num_heads ?? # sooo... is this making 5 prediction modules ???? for idx, layer_idx in enumerate(self.selected_layers): # If we're sharing prediction module weights, have every module's parent be the first one parent = None # yolact++ share_prediction_module always True if cfg.share_prediction_module and idx > 0: parent = self.prediction_layers[0] pred = PredictionModule( # in_channels= src_channels[layer_idx], # out_channels= src_channels[layer_idx], # 'pred_scales': [[1]] * 6 # 'pred_aspect_ratios': [[[0.66685089, 1.7073535, 0.87508774, 1.16524493, # 0.49059086]]] * 6 aspect_ratios = cfg.backbone.pred_aspect_ratios[idx], scales = cfg.backbone.pred_scales[idx], parent = parent, index = idx) self.prediction_layers.append(pred) # Extra parameters for the extra losses # always False ?? if cfg.use_class_existence_loss: # This comes from the smallest layer selected # Also note that cfg.num_classes includes background self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1) # yolact always True ?? if cfg.use_semantic_segmentation_loss: self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1) # For use in evaluation self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=cfg.nms_top_k, #'nms_top_k': 200, conf_thresh=cfg.nms_conf_thresh, #'nms_conf_thresh': 0.05 nms_thresh=cfg.nms_thresh #'nms_thresh': 0.5 )