def __init__(self, classes, class_agnostic): super(_HierRCNN, self).__init__() with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f: self.label_vecs = np.array(f['label_vec']) self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.order_embedding = nn.Sequential( nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH)) self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14 ''' # Size of the pooled region after RoI pooling __C.POOLING_SIZE = 7 roi pooling 之后得到的特征的尺寸 CROP_RESIZE_WITH_MAX_POOL = True ''' self.RCNN_roi_crop = _RoICrop()
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(test_association, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg['CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'],self.cfg['min_dim']) if phase == 'vid_test': self.softmax = nn.Softmax(dim=-1) self.detect = test_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(association_lstm, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[ 'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim']) self.tensor_len = 4 + self.num_classes + 49 self.bnlstm1 = BNLSTM(input_size=84, hidden_size=150, batch_first=False, bidirectional=False) self.bnlstm2 = BNLSTM(input_size=150, hidden_size=300, batch_first=False, bidirectional=False) self.cls_pred = nn.Linear(300, self.num_classes) self.bbox_pred = nn.Linear(300, 4) self.association_pred = nn.Linear(300, 49) self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3, 0.5) if phase == 'vid_train': self.softmax = nn.Softmax(dim=-1) #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45) self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, out_size, phase, in_im_sz, fm_use): """Initializes RoI_layer module.""" super(RoI_layer, self).__init__() self.phase = phase # in order to get the RoI reigon self.out_size = out_size self.in_img_sz = in_im_sz self.tm_scale = 8 self.fm_ROI = int(fm_use / 4) self.Dense_scale = int(self.tm_scale / 2) if phase == 'train': data_index_file = './data/Charades_train.pkl' elif phase == 'eval': data_index_file = './data/Charades_Val_Video.pkl' else: assert 0, 'The data can not find' self.bx_dir = '/VIDEO_DATA/BBOX/' self.data_index = pickle.load(open( data_index_file, 'rb')) # in order to get the bbox (RPN) # define rpn self.ROI_Align = RoIAlignAvg(out_size, out_size, 1 / 16.0) # scale need to change self.ROI_Pool = _RoIPooling(out_size, out_size, 1 / 16.0) # scale need to change self.Ptorch_ROI = Torch_ROI(feature_scal=(self.in_img_sz / 16)) self.Scene_Roi = np.array( [[i, 0, 0, self.in_img_sz - 32, self.in_img_sz - 32] for i in range(self.fm_ROI)]) # 32 = scale * 2 = 16*2 for ROI Align self.Scens_Full = np.array( [[i, 0, 0, self.in_img_sz - 16, self.in_img_sz - 16] for i in range(self.fm_ROI)]) self.Scens_Pytorch = np.array( [[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(self.fm_ROI)]) self.Scens_Sparse = np.array( [[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(1, self.fm_ROI, 2)])
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, phase, base, extras, head, extras_lstd, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = 300 # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) ''' self.extras_lstd = nn.ModuleList(extras_lstd) self.classifier = nn.ModuleList([nn.Linear(256*3*3, 21)]) ''' self.classifier = nn.ModuleList([ nn.Linear(1024 * 5 * 5, 4096), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(0.5), nn.Linear(4096, 21) ]) self.softmax = nn.Softmax(dim=-1) self.post_rois = Post_rois(num_classes, 0, 100, 0, 0.65) self.detect = Detect(num_classes, 0, 100, 0, 0.65) #self.ROI_POOL = RoIPoolFunction(5, 5, 1.0/19.0) #self.roi_pooling = _roi_pooling self.roi_pool = _RoIPooling(5, 5, 1.0 / 16.0)
if __name__ == '__main__': import torch import numpy as np from torch.autograd import Variable from lib.model.roi_pooling.modules.roi_pool import _RoIPooling input = torch.randn(2, 21 * 7 * 7, 50, 72) rois = torch.from_numpy( np.array([ [0.0000, 350.6689, 211.0240, 779.0886, 777.7496], [0.0000, 744.0627, 277.4919, 988.4307, 602.7589], [1.0000, 350.6689, 211.0240, 779.0886, 777.7496], [1.0000, 744.0627, 277.4919, 988.4307, 602.7589], ])).float() pool = PSRoIPool(7, 7, 1 / 16.0, 7, 21) input = Variable(input.cuda()) rois = Variable(rois.cuda()) print(rois.size(), input.size()) print(input) out = pool(input, rois) print(out) print(out.size()) print('============================') roi_pool = _RoIPooling(7, 7, 1 / 16.0) out = roi_pool(input, rois.view(-1, 5)) print(out) print(out.size())