def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes): super(_StereoRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox_left_right = 0 self.RCNN_loss_dis = 0 self.RCNN_loss_dim = 0 self.RCNN_loss_dim_orien = 0 self.RCNN_loss_kpts = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _Stereo_RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.RCNN_roi_kpts_align = ROIAlign( (cfg.POOLING_SIZE * 2, cfg.POOLING_SIZE * 2), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) self._init_modules() # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) roi_layer = {'type': 'RoIAlign', 'output_size': 7, 'sampling_ratio': 0} featmap_strides = [4, 8, 16, 32] self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE # self.RCNN_roi_crop = _RoICrop() self.RCNN_cls_score = nn.Linear(1024, self.n_classes) if self.class_agnostic: self.RCNN_bbox_pred = nn.Linear(1024, 4) else: self.RCNN_bbox_pred = nn.Linear(1024, 4 * (self.n_classes - 1)) self.RCNN_top = nn.Sequential( nn.Linear(256 * cfg.POOLING_SIZE * cfg.POOLING_SIZE, 1024), nn.ReLU(True), nn.Linear(1024, 1024), nn.ReLU(True)) self.upsample = Upsample() self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.gradients = None
def __init__(self, baseModels, obj_classes, att_classes, rel_classes, dout_base_model, pooled_feat_dim): super(_ISGG, self).__init__() self.obj_classes = obj_classes self.n_obj_classes = len(obj_classes) self.att_classes = att_classes self.n_att_classes = 0 if att_classes == None else len(att_classes) self.rel_classes = rel_classes self.n_rel_classes = 0 if rel_classes == None else len(rel_classes) # define base model self.RCNN_base_model = baseModels # define rpn self.RCNN_rpn = _RPN(dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer( self.n_obj_classes, self.n_att_classes, self.n_rel_classes) self.RCNN_proposal_target_msdn = _ProposalTargetLayer_MSDN( self.n_obj_classes, self.n_att_classes, self.n_rel_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() if cfg.HAS_RELATIONS: self.RELPN_rpn = _RelPN(pooled_feat_dim, self.n_obj_classes) self.RELPN_proposal_target = _RelProposalTargetLayer( self.n_rel_classes) self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RELPN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RELPN_grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RELPN_roi_crop = _RoICrop() reduced_pooled_feat_dim = pooled_feat_dim # define mps nhidden = 512 dropout = False gate_width = 1 use_kernel_function = False self.imp = _IMP(nhidden, dropout, gate_width=gate_width, use_kernel_function=use_kernel_function ) # the hierarchical message passing structure network.weights_normal_init(self.imp, 0.01) # self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes) # self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes) # self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes) if cfg.GCN_LAYERS > 0: if cfg.GCN_ON_SCORES: self.GRCNN_gcn_score = _GCN_1(self.n_obj_classes, self.n_att_classes, self.n_rel_classes) if cfg.GCN_ON_FEATS and not cfg.GCN_SHARE_FEAT_PARAMS: self.GRCNN_gcn_feat = _GCN_2(reduced_pooled_feat_dim) if cfg.GCN_ON_FEATS and cfg.GCN_SHARE_FEAT_PARAMS: self.GRCNN_gcn_feat = _GCN_3(reduced_pooled_feat_dim) if cfg.GCN_ON_FEATS and cfg.GCN_LOW_RANK_PARAMS: self.GRCNN_gcn_feat = _GCN_4(reduced_pooled_feat_dim) if cfg.GCN_HAS_ATTENTION: self.GRCNN_gcn_att1 = _GCN_ATT(self.n_obj_classes) self.GRCNN_gcn_att2 = _GCN_ATT(self.n_obj_classes) self.RCNN_loss_obj_cls = 0 self.RCNN_loss_att_cls = 0 self.RCNN_loss_rel_cls = 0 self.RCNN_loss_bbox = 0
def __init__(self, baseModels, obj_classes, att_classes, rel_classes, dout_base_model, pooled_feat_dim): ''' :param baseModels: :param obj_classes: :param att_classes: :param rel_classes: :param dout_base_model: ## todo??? :param pooled_feat_dim: ## todo??? ''' super(_graphRCNN, self).__init__() self.obj_classes = obj_classes self.n_obj_classes = len(obj_classes) self.att_classes = att_classes self.n_att_classes = 0 if att_classes == None else len(att_classes) self.rel_classes = rel_classes self.n_rel_classes = 0 if rel_classes == None else len(rel_classes) # define base model self.RCNN_base_model = baseModels # define rpn self.RCNN_rpn = _RPN(dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer( self.n_obj_classes, self.n_att_classes, self.n_rel_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() # define aGCN if cfg.HAS_RELATIONS: self.RELPN_rpn = _RelPN(pooled_feat_dim, self.n_obj_classes) self.RELPN_proposal_target = _RelProposalTargetLayer( self.n_rel_classes) self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RELPN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RELPN_grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RELPN_roi_crop = _RoICrop() reduced_pooled_feat_dim = pooled_feat_dim # self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) # self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes) # self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes) # self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes) if cfg.GCN_LAYERS > 0: if cfg.GCN_ON_SCORES: self.GRCNN_gcn_score = _GCN_1(self.n_obj_classes, self.n_att_classes, self.n_rel_classes) if cfg.GCN_ON_FEATS and not cfg.GCN_SHARE_FEAT_PARAMS: self.GRCNN_gcn_feat = _GCN_2(reduced_pooled_feat_dim) if cfg.GCN_ON_FEATS and cfg.GCN_SHARE_FEAT_PARAMS: self.GRCNN_gcn_feat = _GCN_3(reduced_pooled_feat_dim) if cfg.GCN_ON_FEATS and cfg.GCN_LOW_RANK_PARAMS: self.GRCNN_gcn_feat = _GCN_4(reduced_pooled_feat_dim) if cfg.GCN_ON_FEATS and cfg.GCN_DIFF_FEAT_DIM: self.GRCNN_gcn_feat = _GCN_5(cfg.OBJECT_DIM, cfg.ATTRIBUTE_DIM, cfg.RELATION_DIM) if cfg.GCN_HAS_ATTENTION: self.GRCNN_gcn_att1 = _GCN_ATT(self.n_obj_classes) self.GRCNN_gcn_att2 = _GCN_ATT(self.n_obj_classes) self.RCNN_loss_obj_cls = 0 self.RCNN_loss_att_cls = 0 self.RCNN_loss_rel_cls = 0 self.RCNN_loss_bbox = 0