def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma self.rpn_pen = opt.rpn_pen self.roi_pen = opt.roi_pen # target creator create gt_bbox gt_label etc as training targets. # FLAG: add params # Initail best: pos 0.2, neg 0.1 self.anchor_target_creator = AnchorTargetCreator(pos_ratio=0.5, pos_iou_thresh=0.7, neg_iou_thresh=0.3) # Initial best: pos 0.2, neg 0.2 self.proposal_target_creator = ProposalTargetCreator(pos_ratio=0.5, pos_iou_thresh=0.5, neg_iou_thresh_hi=0.5) self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(4) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale, stop): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) # rpn_cls_penalty = _add_cls_penalty(rpn_score, gt_rpn_label.cuda(), self.rpn_pen) # roi_cls_penalty = _add_cls_penalty(rpn_score, gt_rpn_label.cuda(), self.roi_pen) losses = [self.rpn_pen*rpn_loc_loss, rpn_cls_loss, roi_loc_loss, self.rpn_pen*roi_cls_loss] losses = losses + [sum(losses)] if stop: import ipdb; ipdb.set_trace() return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale, stop=False): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale, stop) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: # timestr = time.strftime('%m%d%H%M') # save_path = 'checkpoints/fasterrcnn_%s' % timestr # for k_, v_ in kwargs.items(): # save_path += '_%s' % v_ save_path = os.path.join(opt.logs_path, opt.model_name, 'results') os.makedirs(save_path, exist_ok=True) # save_dir = os.path.dirname(save_path) # if not os.path.exists(save_dir): # os.makedirs(save_dir) t.save(save_dict, os.path.join(save_path, opt.model_name + '.pt')) self.vis.save([self.vis.env]) with open(os.path.join(save_path, 'infos.json'), 'w') as fp: json.dump(save_dict['other_info'], fp) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
# log parser.add_argument('--env_name', dest='env_name', help='name of visdom environment', default='HopeNet', type=str) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda:{}'.format(args.gpu)) # logger vis = Visualizer(env=args.env_name) # dataset trainset = graph_dataset(subsets=('zara01', 'eth', 'hotel', 'univ')) validset = graph_dataset(subsets=('zara02', )) train_dataloader = DataLoader(trainset, batch_size=args.bs, shuffle=True, collate_fn=trainset.collate_fn, num_workers=args.num_workers, pin_memory=True) valid_dataloader = DataLoader(validset, batch_size=args.bs, shuffle=False, collate_fn=validset.collate_fn, num_workers=args.num_workers, pin_memory=True)
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn #在faster_rcnn_loc_losss中调用,用来计算位置损失函数时用到的超参 self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. #用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,用于rpn的训练 self.anchor_target_creator = AnchorTargetCreator() #从2000个筛选出的ROIS中再次选出128个ROIs用于ROIhead训练 self.proposal_target_creator = ProposalTargetCreator() #定义位置信息的均值方差。因为送入网络训练的位置信息需全部归一化处理 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss #@staticmethod def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) #提取图片特征 features = self.faster_rcnn.extractor(imgs) #ProposalCreator(过程) #1.对于每张图片,利用它的feature map, 计算 (H/16)× (W/16)×9(大概20000)个anchor属于前景的概率,以及对应的位置参数。 #2.选取概率较大的12000个anchor #3.利用回归的位置参数,修正这12000个anchor的位置,得到RoIs #4.利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的2000个RoIs rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input #经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) #经过head网络,完成预测 roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# #在20000个anchor中挑选256个anchor进行rpn训练过程中的损失计算 #挑选过程: #1.对于每一个ground truth bounding box (gt_bbox),选择和它重叠度(IoU)最高的一个anchor作为正样本 #2.对于剩下的anchor,从中选择和任意一个gt_bbox重叠度超过0.7的anchor,作为正样本,正样本的数目不超过128个。 #3.随机选择和gt_bbox重叠度小于0.3的anchor作为负样本。负样本和正样本的总数为256。 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) #loc类损失采用l1损失 rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... #label类损失采用交叉熵 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #共两大类损失loc和label,每类下分为rpn部分和roihead的损失,所以共四种 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) #进行了一次参数优化 def train_step(self, imgs, bboxes, labels, scale): #将梯度数据全部清0 self.optimizer.zero_grad() #利用前向传播函数将所有损失计算出来 losses = self.forward(imgs, bboxes, labels, scale) #反向传播计算梯度 losses.total_loss.backward() #进行一次参数优化过程 self.optimizer.step() #将所有损失的数据更新到可视化界面 self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoint_caffe/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self #update_meters,reset_meters以及get_meter_data()负责将数据向可视化界面更新传输获取以及重置的函数, # 不太懂,但和主要代码没啥关系 def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. # anchor_target_creator:将20000多个候选的anchor选出256个anchor进行分类和回归位置 self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # extractor在这里是VGG16的前10层,通过extractor可以提取feature_map features = self.faster_rcnn.extractor(imgs) # ------------------ RPN Network -------------------# # ------------------ RPN 预测 -------------------# # 通过RPN网络提取roi # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4] # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2] # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4] # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到 # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引 # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的 # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测 rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # Since batch size is one, convert variables to singular form # 因为这里只支持BatchSize=1,所以直接提取出来 bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] # [n_anchor,2] rpn_loc = rpn_locs[0] # [n_anchor,4] roi = rois # ------------------ RPN 标注 -------------------# # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值 # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,在计算loss时加权区分 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) # ------------------ RPN losses 计算 -------------------# # loc loss(位置回归loss) # loc的loss只计算正样本的 rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # cls loss(分类loss,这里只分两类) # label=-1的样本被忽略 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI Nework -------------------# # ------------------ ROI 标注 -------------------# # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类 # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正 # gt_roi_label:N+1类,多了一个背景类(是不是物体) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问) sample_roi_index = t.zeros(len(sample_roi)) # ------------------ ROI 预测 -------------------# # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测 # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # [n_sample, n_class+1, 4] # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测 # roi_loc就是在ground truth上的类的位置预测 roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] # [m_sample.4] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) # loc loss(位置回归loss) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # cls loss(分类loss,这里分21类) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}