def __init__(self, config, num_obj, num_rel, in_channels, hidden_dim=512, num_iter=3): super(IMPContext, self).__init__() self.cfg = config self.num_obj = num_obj self.num_rel = num_rel self.pooling_dim = config.MODEL.ROI_RELATION_HEAD.CONTEXT_POOLING_DIM self.hidden_dim = hidden_dim self.num_iter = num_iter # mode if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX: if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL: self.mode = 'predcls' else: self.mode = 'sgcls' else: self.mode = 'sgdet' self.rel_fc = make_fc(hidden_dim, self.num_rel) self.obj_fc = make_fc(hidden_dim, self.num_obj) self.obj_unary = make_fc(in_channels, hidden_dim) self.edge_unary = make_fc(self.pooling_dim, hidden_dim) self.edge_gru = nn.GRUCell(input_size=hidden_dim, hidden_size=hidden_dim) self.node_gru = nn.GRUCell(input_size=hidden_dim, hidden_size=hidden_dim) self.sub_vert_w_fc = nn.Sequential(nn.Linear(hidden_dim*2, 1), nn.Sigmoid()) self.obj_vert_w_fc = nn.Sequential(nn.Linear(hidden_dim*2, 1), nn.Sigmoid()) self.out_edge_w_fc = nn.Sequential(nn.Linear(hidden_dim*2, 1), nn.Sigmoid()) self.in_edge_w_fc = nn.Sequential(nn.Linear(hidden_dim*2, 1), nn.Sigmoid())
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() self.cfg = cfg self.in_channels = 1024 self.out_channels = cfg.REID.OUT_CHANNELS self.fc = make_fc(self.in_channels, self.out_channels) if self.cfg.REID.USE_DIFF_FEAT: resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) if self.cfg.MODEL.RETINANET_ON: scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler # @depreated # self.fc6 = nn.Linear(input_size, representation_size) # self.fc7 = nn.Linear(representation_size, representation_size) # for l in [self.fc6, self.fc7]: # # Caffe2 implementation uses XavierFill, which in fact # # corresponds to kaiming_uniform_ in PyTorch # nn.init.kaiming_uniform_(l.weight, a=1) # nn.init.constant_(l.bias, 0) self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn)
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO if cfg.MODEL.ROI_HEADS.USE_CASCADE_POOLING: pooler = CascadePooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) else: pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels, half_out=False, cat_all_levels=False): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, in_channels=in_channels, cat_all_levels=cat_all_levels, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) if half_out: out_dim = int(representation_size / 2) else: out_dim = representation_size self.fc7 = make_fc(representation_size, out_dim, use_gn) self.out_channels = out_dim
def __init__(self, cfg, in_channels): super(RelationFeatureExtractor, self).__init__() self.cfg = cfg.clone() # should corresponding to obj_feature_map function in neural-motifs resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pool_all_levels = cfg.MODEL.ROI_RELATION_HEAD.POOLING_ALL_LEVELS if cfg.MODEL.ATTRIBUTE_ON: self.feature_extractor = make_roi_box_feature_extractor( cfg, in_channels, half_out=True, cat_all_levels=pool_all_levels) self.att_feature_extractor = make_roi_attribute_feature_extractor( cfg, in_channels, half_out=True, cat_all_levels=pool_all_levels) self.out_channels = self.feature_extractor.out_channels * 2 else: self.feature_extractor = make_roi_box_feature_extractor( cfg, in_channels, cat_all_levels=pool_all_levels) self.out_channels = self.feature_extractor.out_channels # separete spatial self.separate_spatial = self.cfg.MODEL.ROI_RELATION_HEAD.CAUSAL.SEPARATE_SPATIAL if self.separate_spatial: input_size = self.feature_extractor.resize_channels out_dim = self.feature_extractor.out_channels self.spatial_fc = nn.Sequential(*[ make_fc(input_size, out_dim // 2), nn.ReLU(inplace=True), make_fc(out_dim // 2, out_dim), nn.ReLU(inplace=True), ]) # union rectangle size self.rect_size = resolution * 4 - 1 self.rect_conv = nn.Sequential(*[ nn.Conv2d(2, in_channels // 2, kernel_size=7, stride=2, padding=3, bias=True), nn.ReLU(inplace=True), nn.BatchNorm2d(in_channels // 2, momentum=0.01), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), nn.Conv2d(in_channels // 2, in_channels, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.BatchNorm2d(in_channels, momentum=0.01), ])
def __init__(self, cfg): super(FPNXconv1fcFeatureExtractor, self).__init__() self.cfg = cfg self.in_channels = 1024 self.out_channels = cfg.REID.OUT_CHANNELS self.fc = make_fc(self.in_channels, self.out_channels) if self.cfg.REID.USE_DIFF_FEAT: resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION xconvs = [] for ix in range(num_stacked_convs): xconvs.append( nn.Conv2d( in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True ) ) in_channels = conv_head_dim if use_gn: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) for modules in [self.xconvs,]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) if not use_gn: torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.fc6 = make_fc(input_size, representation_size, use_gn=False) if self.cfg.MODEL.RETINANET_ON: resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler
def __init__(self, config, obj_classes, rel_classes, in_channels): super(VTransEFeature, self).__init__() self.cfg = config self.obj_classes = obj_classes self.rel_classes = rel_classes self.num_obj_classes = len(obj_classes) # mode if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX: if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL: self.mode = 'predcls' else: self.mode = 'sgcls' else: self.mode = 'sgdet' # word embedding self.embed_dim = self.cfg.MODEL.ROI_RELATION_HEAD.EMBED_DIM obj_embed_vecs = obj_edge_vectors(self.obj_classes, wv_dir=self.cfg.GLOVE_DIR, wv_dim=self.embed_dim) self.obj_embed1 = nn.Embedding(self.num_obj_classes, self.embed_dim) self.obj_embed2 = nn.Embedding(self.num_obj_classes, self.embed_dim) with torch.no_grad(): self.obj_embed1.weight.copy_(obj_embed_vecs, non_blocking=True) self.obj_embed2.weight.copy_(obj_embed_vecs, non_blocking=True) # position embedding self.pos_embed = nn.Sequential(*[ nn.Linear(9, 32), nn.BatchNorm1d(32, momentum=0.001), nn.Linear(32, 128), nn.ReLU(inplace=True), ]) # object & relation context self.obj_dim = in_channels self.dropout_rate = self.cfg.MODEL.ROI_RELATION_HEAD.CONTEXT_DROPOUT_RATE self.hidden_dim = self.cfg.MODEL.ROI_RELATION_HEAD.CONTEXT_HIDDEN_DIM self.pred_layer = make_fc(self.obj_dim + self.embed_dim + 128, self.num_obj_classes) self.fc_layer = make_fc(self.obj_dim + self.embed_dim + 128, self.hidden_dim) # untreated average features self.average_ratio = 0.0005 self.effect_analysis = config.MODEL.ROI_RELATION_HEAD.CAUSAL.EFFECT_ANALYSIS if self.effect_analysis: self.register_buffer( "untreated_obj_feat", torch.zeros(self.obj_dim + self.embed_dim + 128)) self.register_buffer("untreated_edg_feat", torch.zeros(self.obj_dim + 128))
def __init__(self, cfg): super(MaskIoUFeatureExtractor, self).__init__() input_channels = 260 # default 257 use_gn = cfg.MODEL.MASKIOU_USE_GN self.maskiou_fcn1 = make_conv3x3(input_channels, 256, use_gn=use_gn) self.maskiou_fcn2 = make_conv3x3(256, 256, use_gn=use_gn) self.maskiou_fcn3 = make_conv3x3(256, 256, use_gn=use_gn) self.maskiou_fcn4 = make_conv3x3(256, 256, stride=2, use_gn=use_gn) self.maskiou_fc1 = make_fc(256 * 7 * 7, 1024, use_gn=use_gn) self.maskiou_fc2 = make_fc(1024, 1024, use_gn=use_gn)
def __init__(self, cfg, in_channels): super(MaskIoUFeatureExtractor, self).__init__() input_channels = in_channels + 1 # cat features and mask single channel use_gn = cfg.MODEL.ROI_MASKIOU_HEAD.USE_GN representation_size = cfg.MODEL.ROI_MASKIOU_HEAD.MLP_HEAD_DIM resolution_key = "RESOLUTION" pooler_resolution_key = "POOLER_RESOLUTION" resolution = cfg.MODEL.ROI_MASK_HEAD[resolution_key] input_pooler_resolution = cfg.MODEL.ROI_MASK_HEAD[ pooler_resolution_key] self.max_pool2d = lambda x: x if resolution == input_pooler_resolution * 2: self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2) resolution = resolution // 2 # after max pooling 2x2 elif resolution != input_pooler_resolution: raise NotImplementedError( "Only supports %s == %s or %s == 2x%s. Received %d vs %d instead" % (resolution_key, pooler_resolution_key, resolution_key, pooler_resolution_key, resolution, input_pooler_resolution)) layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS # stride=1 for each layer, and stride=2 for last layer strides = [1 for l in layers] strides[-1] = 2 next_feature = input_channels self.blocks = [] for layer_idx, layer_features in enumerate(layers): layer_name = "maskiou_fcn{}".format(layer_idx + 1) stride = strides[layer_idx] module = make_conv3x3(next_feature, layer_features, stride=stride, dilation=1, use_gn=use_gn) self.add_module(layer_name, module) self.blocks.append(layer_name) next_feature = layer_features if stride == 2: resolution = resolution // 2 self.maskiou_fc1 = make_fc(next_feature * resolution**2, representation_size, use_gn=False) self.maskiou_fc2 = make_fc(representation_size, representation_size, use_gn=False) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL[BOX_HEAD_KEY].POOLER_RESOLUTION pooler = make_pooler(cfg, BOX_HEAD_KEY) input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels, resolution, scales, sampling_ratio): super(FPN2MLPFeatureExtractorCustomized, self).__init__() pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, config, in_channels, RCNN_top=None): super(ResNet50Conv5ROIFeatureExtractorFlatten, self).__init__() resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) stage = resnet.StageSpec(index=4, block_count=3, return_features=False) head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage, ), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, stride_init=None, res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS, dilation=config.MODEL.RESNETS.RES5_DILATION) self.pooler = pooler self.head = head hidden_channels = head.out_channels use_gn = config.MODEL.ROI_BOX_HEAD.USE_GN self.out_channels = config.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( make_fc(hidden_channels, self.out_channels, use_gn), nn.ReLU())
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = PyramidRROIAlign( output_size=(resolution, resolution), scales=scales, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn)
def __init__(self, cfg, in_channels, out_channels=None): super(FPN2MLPLevelMixCostVolumeLRFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_SAMPLING_RATIO pooler = PoolerLevelMix( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = resolution**3 representation_size = cfg.MODEL.ROI_DEPTH_HEAD.MLP_HEAD_DIM * 2 if out_channels is None else out_channels use_gn = cfg.MODEL.ROI_DEPTH_HEAD.USE_GN self.nullvalue = torch.zeros(0, representation_size) self.resolution = resolution self.pooler = pooler self.inputconv = nn.Sequential( convbn(in_channels * len(cfg.MODEL.ROI_DEPTH_HEAD.POOLER_SCALES), 256, 3, 1, 1, 1), nn.ReLU(inplace=True), nn.Conv2d(256, 32, kernel_size=1, padding=0, stride=1, bias=False)) # self.conv3d = nn.Conv3d(in_channels, in_channels, (len(scales), 1, 1), bias=False) self.dres0 = nn.Sequential(convbn_3d(64, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True)) self.dres1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1)) self.dres2 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1)) self.dres3 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1)) self.dres4 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1)) self.classify = nn.Sequential( convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False)) # if cfg.MODEL.ROI_DEPTH_HEAD.INPUT_MASK_FEATURES: # input_size *= 2 self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW block = 0 use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV if use_delinear: use_gn = False use_gw = False block = cfg.MODEL.DECONV.BLOCK_FC #check here if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn, use_gw, use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.fc7 = make_fc(representation_size, representation_size, use_gn, use_gw, use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)
def __init__(self, cfg, in_channels, out_channels): super(SupportFPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.SUPP_POOLING.POOLER_RESOLUTION scales = cfg.MODEL.SUPP_POOLING.POOLER_SCALES sampling_ratio = cfg.MODEL.SUPP_POOLING.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.SUPP_POOLING.MLP_HEAD_DIM # default to 1024 use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, out_channels, use_gn) self.out_channels = out_channels
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION # 7 scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # 2 pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM # 1024 use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN # False self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) # 256, 1024, False self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION # resolution为roi pooling之后特征图的大小,一般为7 scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES # 获得原始图到特征图的比例函数,比如原始图到Res50的stage2是1/4 sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # sampling_ratio即采样率,指的是锚点大小与池化之后特征图的大小比例 pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM # 1024 use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels, RCNN_top=None): super(BottomUpMLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractorRnd, self).__init__() resolution = cfg.MODEL.ROI_IOU_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_IOU_HEAD.POOLER_SCALES pooler_type = cfg.MODEL.ROI_IOU_HEAD.POOLER_TYPE pooler = Pooler( pooler_type=pooler_type, output_size=(resolution, resolution), scales=scales, ) input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_IOU_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_IOU_HEAD.USE_GN self.pooler = pooler self.fc6_iou = make_fc(input_size, representation_size, use_gn) self.fc7_iou = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_PRED_HEAD.POOLER_RESOLUTION use_contextual_pooler = False if use_contextual_pooler: pooler = make_contextual_pooler(cfg, 'ROI_PRED_HEAD') else: pooler = make_pooler(cfg, 'ROI_PRED_HEAD') input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_PRED_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_PRED_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_UN_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_UN_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_UN_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_UN_HEAD.POOLER_TYPE pooler = Pooler( pooler_type=pooler_type, output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_UN_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_UN_HEAD.USE_GN self.pooler = pooler self.fc6_un = make_fc(input_size, representation_size, use_gn) self.fc7_un = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO lvl_map_func = cfg.MODEL.ROI_MASK_HEAD.LEVEL_MAP_FUNCTION pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, lvl_map_func=lvl_map_func ) input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size img_h = img_w = cfg.INPUT.MIN_SIZE_TEST self.img_sizes = (img_h, img_w)
def __init__(self, cfg, in_channels): super(PANETAdaptiveFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION # resolution为roi pooling之后特征图的大小,一般为7 scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125)获得原始图到特征图的比例函数,比如原始图到Res50的stage2是1/4 sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # 取2 有2*2个采样点 pooler = AdaptivePooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM # 1024 use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6_0 = make_fc(input_size, representation_size, use_gn) self.fc6_1 = make_fc(input_size, representation_size, use_gn) self.fc6_2 = make_fc(input_size, representation_size, use_gn) self.fc6_3 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLPLevelMix3DConvFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_DEPTH_HEAD.POOLER_SAMPLING_RATIO pooler = PoolerLevelMix( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_DEPTH_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_DEPTH_HEAD.USE_GN self.resolution = resolution self.pooler = pooler self.conv3d = nn.Conv3d(in_channels, in_channels, (len(scales), 1, 1), bias=False) self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(PA2MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = AllLevelPooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels * resolution ** 2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler fc6s = [] for _ in range(len(scales)): fc6s.append(make_fc(input_size, representation_size, use_gn)) # self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc6s = nn.ModuleList(fc6s) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): super(FPN2MLP_panet_FeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, panet=True) input_size = in_channels * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler #hard-coded levels: assume to be 4/5 num_levels = 4 self.fc6 = nn.ModuleList() for i in range(num_levels): self.fc6.append(make_fc(input_size, representation_size, use_gn)) # self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] self.conv4_fc = make_conv3x3(self.out_channels, dim_reduced, use_gn=use_gn) self.conv5_fc = make_conv3x3(dim_reduced, int(dim_reduced / 2), use_gn=use_gn) self.fc_final = make_fc( int(dim_reduced / 2) * (cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION)**2, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION**2)