def forward(self, x, rois, roi_indices): roi_indices = at.totensor(roi_indices).float() rois = at.totensor(rois).float() indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1) xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] indices_and_rois = xy_indices_and_rois.contiguous() pool = self.roi(x, indices_and_rois) pool = pool.view(pool.size(0), -1) fc7 = self.classifier(pool) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.scores(fc7) return roi_cls_locs, roi_scores
def predict(self, imgs, sizes=None, visualize=False): """ 在计算mAP的时候使用 :param imgs: 一个batch的图片 :param sizes: batch中每张图片的输入尺寸 :return: 返回所有一个batch中所有图片的坐标,类,类概率值 三个值都是list型数据,里面包含的是numpy数据 """ self.eval() if visualize: self.use_preset('visualize') prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois = self(img, scale=scale) roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale mean = torch.Tensor(self.loc_normalize_mean).repeat(self.n_class)[None] std = torch.Tensor(self.loc_normalize_std).repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) # 从这里其实可以看出,roi_cls_loc是位置偏差,是用来计算更接近的位置,而不是位置, cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # 这里应该是控制cls_bbox的边界,在min和max之间 cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() # 这里返回的是预测图片的bbox,labels,scores return bboxes, labels, scores
def forward(self, x): y = x.view(-1, x.size(1), x.size(2) * x.size(3)) # b x c x hw vladres = np.zeros([y.size(0), self.k, y.size(-1)]) for i in range(y.size(0)): feature = tonumpy(y[i]) c, hw = feature.shape self.model.fit(feature) clusters = self.model.cluster_centers_ labels = self.model.labels_ for j in range(c): label = labels[j] vladres[i, label, :] += feature[j] - clusters[label] vladres = totensor(vladres) return vladres
def forward(self, features, rois): rois = at.totensor(rois).float() roi_list = [] for roi in rois: # 这个计算的应该是按比例缩放roi尺寸到特征图上面 # 比如原图为(300, 500) roi(34, 76, 82, 95),那么roi是在原图上框出来的框,现在要转换到特征图上的框 # 特征图比如说是(37, 50) 给roi除以16的位置就是特征图所在的位置 # roi / 16 = (34 / 16, 76 / 16, 82 / 16, 95 / 16) 求整 (2, 4, 5, 5) # 即对应到特征图的位置就是(2,4,5,5) 这里的位置就是坐标就是索引,即索引(2:5+1,4:5+1)是高和宽 # 为什么加一,我猜是因为怕ymin和ymax值一样,长度为0了 roi_part = features[:, :, (roi[0] / self.spatial_scale).int(): (roi[2] / self.spatial_scale).int() + 1, (roi[3] / self.spatial_scale).int(): (roi[1] / self.spatial_scale).int() + 1] roi_part = nn.AdaptiveMaxPool2d((7, 1))(roi_part) roi_list.append(roi_part) pool = torch.cat(roi_list) # B,C,7,7 pool = pool.reshape(pool.shape[0], -1) # B,C*7*7 return pool
def forward(self, x, rois): rois = at.totensor(rois).float() roi_list = [] for roi in rois: # 这个计算的应该是按比例缩放roi尺寸到特征图上面 # 比如原图为(300, 500) roi(34, 76, 82, 95),那么roi是在原图上框出来的框,现在要转换到特征图上的框 # 特征图比如说是(37, 50) 给roi除以16的位置就是特征图所在的位置 # roi / 16 = (34 / 16, 76 / 16, 82 / 16, 95 / 16) 求整 (2, 4, 5, 5) # 即对应到特征图的位置就是(2,4,5,5) 这里的位置就是坐标就是索引,即索引(2:5+1,4:5+1)是高和宽 # 为什么加一,我猜是因为怕ymin和ymax值一样,长度为0了 roi_part = x[:, :, (roi[0] * self.spatial_scale).int(): (roi[2] * self.spatial_scale).int() + 1, (roi[1] * self.spatial_scale).int(): (roi[3] * self.spatial_scale).int() + 1] roi_part = nn.AdaptiveMaxPool2d((7, 7))(roi_part) roi_list.append(roi_part) pool = torch.cat(roi_list) # B,C,7,7 pool = pool.reshape(pool.shape[0], -1) # B,C*7*7 # pool(B, C*7*7) # pool = self.roi(x, rois) fc7 = self.classifier(pool) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.cls_score(fc7) # 返回roi修正系数和分数 return roi_cls_locs, roi_scores
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def forward(self, imgs, bboxes, labels, scale): # 每次进来一张图,对应的bbox,对应的label n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # 求出特征图 features = self.faster_rcnn.extractor(imgs) # 求出rpn网络输出的损失,roi,所有anchor # rpn_locs(N, H W A, 4) rpn_locs, rpn_scores, rois, anchor = self.faster_rcnn.rpn( features, img_size, scale) # bbox(R, 4) label(R, 1) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # bbox和label是真实的bbox和label # roi是预测的bbox # 返回的是选择后的roi以及roi对应真实bbox的位置偏差,以及roi所给的label sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # 应该是返回在特征图上的roi全连接之后的位置和概率 roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi) # rpn loss # 给anchor标记标签,loc上默认值是0,正样本为位移偏移量 # label上默认值为-1,正样本为1 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) # rpn_loc(H * W * A, 4) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # 计算rpn网络的分类损失,忽略label = -1的 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1) # ROI loss n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long(), at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc, gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) losses = { 'rpn_loc_loss': rpn_loc_loss, 'rpn_cls_loss': rpn_cls_loss, 'roi_loc_loss': roi_loc_loss, 'roi_cls_loss': roi_cls_loss } losses['total_loss'] = sum(losses.values()) return losses
def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses)