def train(**kwargs): opt._parse(kwargs) device = t.device('cuda') if opt.use_gpu else t.device('cpu') vis = Visualizer(opt.env) # Data loading transfroms = tv.transforms.Compose([ tv.transforms.Resize(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Lambda(lambda x: x * 255) ]) dataset = tv.datasets.ImageFolder(opt.data_root, transfroms) dataloader = data.DataLoader(dataset, opt.batch_size) # style transformer network transformer = TransformerNet() if opt.model_path: transformer.load_state_dict( t.load(opt.model_path, map_location=lambda _s, _: _s)) transformer.to(device) # Vgg16 for Perceptual Loss vgg = Vgg16().eval() vgg.to(device) for param in vgg.parameters(): param.requires_grad = False # Optimizer: use Adam optimizer = t.optim.Adam(transformer.parameters(), opt.lr) # Get style image style = utils.get_style_data(opt.style_path) vis.img('style', (style.data[0] * 0.225 + 0.45).clamp(min=0, max=1)) style = style.to(device) # print("style.shape: ", style.shape) # gram matrix for style image with t.no_grad(): features_style = vgg(style) gram_style = [utils.gram_matrix(y) for y in features_style] # Loss meter style_meter = tnt.meter.AverageValueMeter() content_meter = tnt.meter.AverageValueMeter() for epoch in range(opt.epoches): content_meter.reset() style_meter.reset() for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)): # Train optimizer.zero_grad() x = x.to(device) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) # content loss content_loss = opt.content_weight * F.mse_loss( features_y.relu2_2, features_x.relu2_2) # style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gram_y = utils.gram_matrix(ft_y) style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() # Loss smooth for visualization content_meter.add(content_loss.item()) style_meter.add(style_loss.item()) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # visualization vis.plot('content_loss', content_meter.value()[0]) vis.plot('style_loss', style_meter.value()[0]) # denorm input/output, since we have applied (utils.normalize_batch) vis.img('output', (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1)) # save checkpoint vis.save([opt.env]) t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn, nclasses): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(nclasses) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), at.tonumpy(anchor), img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class BRFasterRcnnTrainer(nn.Module): def __init__(self, faster_rcnn, attacker=None, layer_idx=None, attack_mode=False): super(BRFasterRcnnTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.attacker = attacker self.layer_idx = layer_idx self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma self.attack_mode = attack_mode self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() self.vis = Visualizer(env=opt.env) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} self.BR_meters = {k: AverageValueMeter() for k in LossTupleBR._fields} def forward(self, imgs, bboxes, labels, scale, attack=False): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # 创造钩子函数,记录featureamp的值 features = self.faster_rcnn.extractor(imgs) feature_maps = self.faster_rcnn.feature_maps if not features.sum()[0] == 0: rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input if rois.size == 0: print("Features are 0 for some reason") losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \ Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()] losses = losses + [sum(losses)] return losses, features sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# if not attack: if anchor.size != 0: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # adv_losses = self.attacker.forward(imgs.detach(),gt_rpn_label.cuda(), img_size, scale, self) # adv_losses = LossTupleAdv(*adv_losses) # self.update_meters(adv_losses,adv=True) else: rpn_cls_loss = 0 rpn_loc_loss = 0 # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) if attack: return roi_score, gt_roi_label, feature_maps else: roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] # if attack: # del rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, losses, features # return roi_score, gt_roi_label, feature_map # else: return LossTuple(*losses) else: print("Features are 0 for some reason") losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \ Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()] losses = losses + [sum(losses)] return losses def train_step(self, imgs, bboxes, labels, scale, target_feature=None, rois=None, roi_scores=None): if not self.attack_mode: print ('....') else: BR_losses = self.attacker.forward(imgs, self, labels, bboxes, scale, target_feature, rois, roi_scores) BR_losses = LossTupleBR(*BR_losses) self.update_meters(BR_losses, BR=True) # 将save_rcnn设置成False,因为我们在训练生成器过程中,不动rcnn的参数 def save(self, save_optimizer=False, save_path=None, save_rcnn=False, **kwargs): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/faterrcnn_full_%s' % timestr if not self.attack_mode: for k_, v_ in kwargs.items(): save_path += '%s' % v_ if self.attacker is not None: self.attacker.save('checkpoints/attack_%s_%d.path' % (timestr, kwargs['epochs'])) if save_rcnn: t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses, BR=False): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} if not BR: for key, meter in self.meters.items(): meter.add(loss_d[key]) else: for key, meter in self.BR_meters.items(): meter.add(loss_d[key]) def reset_meters(self, BR=False): for key, meter in self.meters.items(): meter.reset() if BR: for key, meter in self.BR_meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self, BR=False): if BR: return {k: v.value()[0] for k, v in self.BR_meters.items()} else: return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss # pic6.png # 整幅图片描述在求损失之前训练过程经历了什么!不准确的说是一个伪正向传播的过程,为啥说是伪正向传播呢,因为过程中调用了proposal_target_creator(), # 而这个函数的作用其实是为了训练ROI_Header网络而提供所谓的128张sample_roi以及它的ground_truth的位置和label用的!所以它的根本目的是为了训练网络,在测试的时候是用不到的! # 流程图中红色圆框代表的是网络运行过程中产生的参数,而蓝色框代表的是网络定义的时候就有的参数!仔细看整个流程图,网络的运作结构就一目了然了!下面解释下代码: # n= bboxes.shape[0]首先获取batch个数,如果不等于就报错,因为本程序只支持batch_size=1,接着读取图片的高和宽,这里解释下,不论图片还是bbox,它们的数据格式都是形如n,c,hh,ww这种,所以H,W就可以获取到图片的尺寸, # 紧接着用self.faster_rcnn.extractor(imgs)提取图片的特征,然后放到rpn网络里面self.faster_rcnn.rpn(feature,img_size,scale)提取出rpn_locs,rpn_scores,rois,roi_indices,anchor来, # 下一步就是经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score,最后经过head网络,完成整个的预测过程!流程图中的结构是一模一样的! # 但是这个文件之所以叫trainer就是因为不仅仅有正向的运作过程,肯定还有反向的传播,包括了损失计算等等,没错,接下来我们看下面的损失计算部分的流程图 # pic7.png # 如上图所示,其实剩下的代码就是计算了两部分的损失,一个是RPN_losses,一个是ROI_Losses,为啥要这样做呢?大家考虑一下,这个Faster-rcnn的网络,哪些地方应用到了网络呢?一个是提取proposal的过程, # 在faster-rcnn里创造性的提出了anchor,用网络来产生proposals,所以rpn_losses就是为了计算这部分的损失,从而使用梯度下降的办法来提升提取prososal的网络的性能,另一个使用到网络的地方就是ROI_header, # 没错就是在利用特征图和ROIs来预测目标检测的类别以及位置的偏移量的时候再一次使用到了网络,那这部分预测网络的性能如何保证呢?ROI_losses就是计算这部分的损失函数,从而用梯度下降的办法来继续提升网络的性能 # 这样一来,这两部分的网络的损失都记算出来了!forward函数也就介绍完了!这个地方需要特别注意的一点就是rpn_cm和roi_cm这两个对象应该是Confusion matrix也就是混淆矩阵啦,作用就是用于后续的数据可视化 def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) # 整个函数实际上就是进行了一次参数的优化过程,首先self.optimizer.zero_grad()将梯度数据全部清零,然后利用刚刚介绍的self.forward(imgs,bboxes,labels,scales)函数将所有的损失计算出来, # 接着进行依次losses.total_loss.backward()反向传播计算梯度,self.optimizer.step()进行一次参数更新过程,self.update_meters(losses)就是将所有损失的数据更新到可视化界面上,最后将losses返回! def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma #是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数, self.roi_sigma = opt.roi_sigma self.anchor_target_creator = AnchorTargetCreator() #从上万个anchor中挑选256个来训练rpn,其中正样本不超过128 self.proposal_target_creator = ProposalTargetCreator() #从rpn给的2000个框中挑出128个来训练roihead,其中正样本不超过32个 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() #可视化 self.vis = Visualizer(env=opt.env) #验证预测值和真实值的精度 self.rpn_cm = ConfusionMeter(2) #混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter(2)括号里的参数指的是类别数 self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} #验证平均loss def forward(self, imgs, bboxes, labels, scale): ''' :param imgs: (~torch.autograd.Variable) 一个批次的图片 :param bboxes: (~torch.autograd.Variable) (N, R, 4) :param labels: (~torch.autograd..Variable) (N, R) [0 - L-1] L为类别数 :param scale: (float) 原图经过preprocessing处理后的缩放比 :return: namedtuple of 5 losses ''' n = bboxes.shape[0] #batch_size 数量 if n != 1: raise ValueError('Currently only batch size 1 is supported') _, _, H, W = imgs.shape img_size = (H, W) c2_out = self.faster_rcnn.C2(imgs) c3_out = self.faster_rcnn.C3(c2_out) c4_out = self.faster_rcnn.C4(c3_out) p2, p3, p4, p5 = self.faster_rcnn.fpn(c2_out, c3_out, c4_out) feature_maps = [p2, p3, p4, p5] rcnn_maps = [p2, p3, p4] # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), rois的维度为(2000,4), # roi_indices用不到,anchor的维度为(hh*ww*9,4),H和W是经过数据预处理后的。 # 计算(H/16)x(W/16)x9(大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G^的坐标。 # roi的维度为(2000,4) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( feature_maps, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] #(hh*ww*9,2) rpn_loc = rpn_locs[0] #(hh*ww*9,4) roi = rois #(2000,4) # 调用proposal_target_creator函数生成sample roi(128,4)、gt_roi_loc(128,4)、 # gt_roi_label(128,1),RoIHead网络利用这sample_roi+featue为输入, # 输出是分类(21类)和回归(进一步微调bbox)的预测值, # 那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, array_tool.tonumpy(bbox), array_tool.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = torch.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( rcnn_maps, sample_roi, sample_roi_index) #------------------RPN loss------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( array_tool.tonumpy(bbox), anchor, img_size) gt_rpn_label = array_tool.totensor(gt_rpn_label).long() gt_rpn_loc = array_tool.totensor(gt_rpn_loc) #rpn的回归l1smooth损失 rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) #rpn的分类交叉熵损失 rpn_cls_loss = functional.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _gt_rpn_score = rpn_score[gt_rpn_label > -1] _rpn_score = array_tool.tonumpy(rpn_score)[array_tool.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(array_tool.totensor(_rpn_score, False), _gt_rpn_label.data.long()) #------------------------ROI loss------------------------# n_sample = roi_cls_loc.shape[0] #n_sample为128 , roi_cls_loc为VGG16RoIHead的输出(128*84) roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # roi_cls_loc=(128,21,4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), \ array_tool.totensor(gt_roi_label).long()] # (128,4),按照label编号从21类中挑出当前标签的loc,从(128,21,4)降为(128,4) gt_roi_label = array_tool.totensor(gt_roi_label).long() gt_roi_loc = array_tool.totensor(gt_roi_loc) #roi的回归l1smooth损失 roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) #roi的交叉熵损失 self.roi_cm.add(array_tool.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] #总loss,增加losses列表长度到5 return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): save_dict= dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) torch.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = torch.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: array_tool.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses wrapper以便方便训练,返回losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for Region Proposal Network (RPN). RPN定位loss * :obj:`rpn_cls_loss`: The classification loss for RPN. RPN分类loss * :obj:`roi_loc_loss`: The localization loss for the head module. roi定位loss * :obj:`roi_cls_loss`: The classification loss for the head module. roi分类loss * :obj:`total_loss`: The sum of 4 loss above. 4个loss之和 Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() #传入的是FasterRCNNVGG16模型,继承了FasterRCNN模型,而参数根据说明 是FasterRCNN模型 #即初始化的是FasterRCNN模型 #FasterRCNN模型是父类 FasterRCNNVGG16模型是子类 self.faster_rcnn = faster_rcnn #sigma for l1_smooth_loss self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. #目标框creator 目标是产生 真实的bbox 类别标签等 #将真实的bbox分配给锚点 self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() #得到faster网络权重,均值 和方差 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std #得到faster网络的优化器 self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status #训练状态指标 两个混淆矩阵 2×2(前景后景) 21×21(20类+背景) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 平均损失 def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Faster网络的前向传播、计算losses************************* Here are notations used. * :math:`N` is the batch size. `N`是批量大小 * :math:`R` is the number of bounding boxes per image. `R`是每个图像的边界框的数量 Currently, only :math:`N=1` is supported. 当前模型,只有N=1可用 Args: imgs (~torch.autograd.Variable): A variable with a batch of images. batch=1的图片变量 bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. 真实人工标注的bboxes变量 labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. 背景被排除在定义之外,这意味着值的范围。`L`是前景类的数量 scale (float): Amount of scaling applied to the raw image during preprocessing. 预处理期间应用于原始图像的缩放量 Returns: namedtuple of 5 losses 五个损失 """ n = bboxes.shape[0] #判断,只支持batch为1 if n != 1: raise ValueError('Currently only batch size 1 is supported.') #img_size=原图像的高、宽 _, _, H, W = imgs.shape img_size = (H, W) #通过提取器(预训练好的VGG16)网络提取特征 features = self.faster_rcnn.extractor(imgs) #通过rpn网络(区域提案网络)得到 #rpn这是一个区域提案网络。它提取图像特征,预测输出rois #rpn_locs[1,17316,4] rpn_scores[1,17316,2] rois[2000,4] roi_indices[2000,]全为0 anchor [17316,4] rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # 由于批量大小为1,因此将变量转换为单数形式(即压缩第一维) #bbox变为[1,4] bbox = bboxes[0] label = labels[0] #则rpn_score变为[17316,4] rpn_loc 变为[17316,2] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] #大约2000个rois roi = rois # Sample RoIs and forward 简单的ROIs和前向传播 # it's fine to break the computation graph of rois, consider them as constant input #打破rois的计算图,将它作为一个固定不变的输入 #proposal_target_creator 输入为rois(2000个候选框,和人工标注的bbox)用于生成训练目标,只训练用到 #2000个rois选出128个 #sample_roi[128,4] gt_roi_loc[128,4] gt_roi_label[128,] 值为0或1 表示正负样本 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now #它全部为零,因为现在它只支持batch = 1 sample_roi_index = t.zeros(len(sample_roi)) #roi head网络进行预测类别和目标框 #RoIHead: 负责对rois分类和微调。对RPN找出的rois,判断它是否包含目标,并修正框的位置和座标 #使用RoIs提议的的feature maps,对RoI中的对象进行分类并提高目标框定位 #roi_cls_loc roi的分类、回归 #传入 特征提取的features 和 128个ROI #roi_cls_loc [128,84]回归定位 roi_score[128,21]分类(20类加背景) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# #真实标注的bbox,预测出来的anchor锚点 # 将真实的bbox分配给锚点,返回 经过rpn后对应的定位和标签 #gt_rpn_loc[17316,4] gt_rpn_label [17316,] gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) #转为变量V 转为long型 gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) #rpn的回归定位损失 rpn_loc_loss[1] rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... #ignore_index的默认值是 - 100... #F:pytorch的function #分类使用交叉熵 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] #添加进rpn 混淆矩阵 self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# #roi分类和回归 压缩第一维 #n_sample 128 n_sample = roi_cls_loc.shape[0] #改变形状为[ 32,4] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) #得到roi的回归 roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # gt_roi_label:真实roi的标签 #gt_roi_loc:真实roi的回归 gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) #roi的回归损失 计算回归定位的损失 roi_loc_loss = _fast_rcnn_loc_loss( #contiguous从不连续调整为连续 roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) #roi分类损失(交叉熵) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) #添加进roi 混淆矩阵 self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #计算总损失 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] #返回Tuple,四个损失+总损失 return LossTuple(*losses) #训练并更新可学习参数 def train_step(self, imgs, bboxes, labels, scale): #优化器梯度清零 self.optimizer.zero_grad() #前向传播(重点*) 返回(总损失 和四类损失) losses = self.forward(imgs, bboxes, labels, scale) #反向传播(重点*) #针对总损失进行反向传播 losses.total_loss.backward() # 更新可学习参数 self.optimizer.step() #将losses写入meter中 self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self #更新仪表盘 用以显示 def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) #将值重置到0 def reset_meters(self): for key, meter in self.meters.items(): meter.reset() #将两个混淆矩阵的内容也置为0 self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma #是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数, # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归, # 也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准 self.proposal_target_creator = ProposalTargetCreator() # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标(或称ground truth),只在训练阶段用到, # ProposalCreator是RPN为Fast R-CNN生成RoIs,在训练和测试阶段都会用到。 # 所以测试阶段直接输进来300个RoIs,而训练阶段会有AnchorTargetCreator的再次干预。 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean #(0., 0., 0., 0.) self.loc_normalize_std = faster_rcnn.loc_normalize_std #(0.1, 0.1, 0.2, 0.2) self.optimizer = self.faster_rcnn.get_optimizer() #SGD # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) #混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter(2)括号里的参数指的是类别数 self.roi_cm = ConfusionMeter(21) #roi的类别有21种(20个object类+1个background) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. 当前batch size只为一 Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] #batch size = 1 if n != 1: #绝了 raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape #N,C,H,W img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # emmmmmm。。。 bbox = bboxes[0] #bbox维度(N, R, 4) label = labels[0] #labels维度为(N,R) rpn_score = rpn_scores[0] #hh*ww*9 rpn_loc = rpn_locs[0] #(hh*ww*9,4) roi = rois # (2000,4) # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) # rpn_score为rpn网络得到的(20000个)与anchor_target_creator得到的2000个label求交叉熵损失 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] #不计算背景类 _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # 混淆矩阵 # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] # roi_cls_loc为VGG16RoIHead的输出(128*84), n_sample=128 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # roi_cls_loc=(128,21,4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() # 128个标签 gt_roi_loc = at.totensor(gt_roi_loc) # proposal_target_creator()生成的128个proposal与bbox求得的偏移量dx,dy,dw,dh roi_loc_loss = _fast_rcnn_loc_loss( #采用smooth_l1_loss roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) #求交叉熵损失 self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #混淆矩阵 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] #四个loss加起来 losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() # 将梯度数据全部清零 losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() # 更新参数 self.update_meters(losses) # 将所有损失的数据更新到可视化界面上,最后将losses返回 return losses # 返回loss def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): # 将所有损失的数据更新到可视化界面上,最后将losses返回 loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): # 继承父模块的初始化 super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn # 下面2个参数是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数 self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是 # 为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准 self.anchor_target_creator = AnchorTargetCreator() # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标 # (或称ground truth),只在训练阶段用到,ProposalCreator是RPN为Fast # R-CNN生成RoIs,在训练和测试阶段都会用到。所以测试阶段直接输进来300 # 个RoIs,而训练阶段会有AnchorTargetCreator的再次干预 self.proposal_target_creator = ProposalTargetCreator() # (0., 0., 0., 0.) self.loc_normalize_mean = faster_rcnn.loc_normalize_mean # (0.1, 0.1, 0.2, 0.2) self.loc_normalize_std = faster_rcnn.loc_normalize_std # SGD self.optimizer = self.faster_rcnn.get_optimizer() # 可视化,vis_tool.py self.vis = Visualizer(env=opt.env) # 混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter # (2)括号里的参数指的是类别数 self.rpn_cm = ConfusionMeter(2) # roi的类别有21种(20个object类+1个background) self.roi_cm = ConfusionMeter(21) # 平均损失 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): # 获取batch个数 n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape # (n,c,hh,ww) img_size = (H, W) # vgg16 conv5_3之前的部分提取图片的特征 features = self.faster_rcnn.extractor(imgs) # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), # rois的维度为(2000,4),roi_indices用不到,anchor的维度为 # (hh*ww*9,4),H和W是经过数据预处理后的。计算(H/16)x(W/16)x9 # (大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个 # 近似目标框G^的坐标。roi的维度为(2000,4) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # bbox维度(N, R, 4) bbox = bboxes[0] # labels维度为(N,R) label = labels[0] #hh*ww*9 rpn_score = rpn_scores[0] # hh*ww*9 rpn_loc = rpn_locs[0] # (2000,4) roi = rois # Sample RoIs and forward # 调用proposal_target_creator函数生成sample roi(128,4)、 # gt_roi_loc(128,4)、gt_roi_label(128,1),RoIHead网络 # 利用这sample_roi+featue为输入,输出是分类(21类)和回归 # (进一步微调bbox)的预测值,那么分类回归的groud truth就 # 是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) # roi回归输出的是128*84和128*21,然而真实位置参数是128*4和真实标签128*1 roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# # 输入20000个anchor和bbox,调用anchor_target_creator函数得到 # 2000个anchor与bbox的偏移量与label gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) # 下面分析_fast_rcnn_loc_loss函数。rpn_loc为rpn网络回归出来的偏移量 # (20000个),gt_rpn_loc为anchor_target_creator函数得到2000个anchor # 与bbox的偏移量,rpn_sigma=1. rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... # rpn_score为rpn网络得到的(20000个)与anchor_target_creator # 得到的2000个label求交叉熵损失 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] #不计算背景类 _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# # roi_cls_loc为VGG16RoIHead的输出(128*84), n_sample=128 n_sample = roi_cls_loc.shape[0] # roi_cls_loc=(128,21,4) roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # proposal_target_creator()生成的128个proposal与bbox求得的偏移量 # dx,dy,dw,dh gt_roi_label = at.totensor(gt_roi_label).long() # 128个标签 gt_roi_loc = at.totensor(gt_roi_loc) # 采用smooth_l1_loss roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # 求交叉熵损失 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) # 四个loss加起来 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) # 整个函数实际上就是进行了一次参数的优化过程,首先`self.optimizer.zero_grad()`将梯度数据全部清零, # 然后利用刚刚介绍`self.forward(imgs,bboxes,labels,scales)`函数将所有的损失计算出来,接着进行 # 依次`losses.total_loss.backward()`反向传播计算梯度,`self.optimizer.step()`进行一次参数 # 更新过程,`self.update_meters(losses)`就是将所有损失的数据更新到可视化界面上,最后将`losses`返回 def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses # 模型保存 def save(self, save_optimizer=False, save_path=None, **kwargs): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path # 模型加载 def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator()#用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准 self.proposal_target_creator = ProposalTargetCreator()#AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean# self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer()#SGD # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2)#构造一个用于多类分类问题的混淆矩阵 self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape#(1,3,600,800) img_size = (H, W)#(600,800) features = self.faster_rcnn.extractor(imgs)#(1,512,38,50) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale)#(1,38*50*9,4),(1,38*50*9,2),(2000,4),(38*50*9,4) # Since batch size is one, convert variables to singular form bbox = bboxes[0]#(1,R,4) label = labels[0]#(1,R) rpn_score = rpn_scores[0]#(1,38*50*9,2) rpn_loc = rpn_locs[0]#(1,38*50*9,4) roi = rois#(2000,4) # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std)##调用proposal_target_creator函数生成sample roi(128,4)、gt_roi_loc(128,4)、gt_roi_label(128,1),RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值,那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index)#roi回归输出的是128*84和128*21,然而真实位置参数是128*4和真实标签128*1 # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size)#输入20000个anchor和bbox,调用anchor_target_creator函数得到2000个anchor与bbox的偏移量与label gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma)#rpn_loc为rpn网络回归出来的偏移量(38*50*9个),gt_rpn_loc为anchor_target_creator函数得到2000个anchor与bbox的偏移量,rpn_sigma=1. # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)#交叉熵损失函数 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] #不计算背景类 _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0]#roi_cls_loc为VGG16RoIHead的输出(128,84), n_sample=128 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)#(128,21,4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long()#proposal_target_creator()生成的128个proposal与bbox求得的偏移量dx,dy,dw,dh gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]# 4个loss加起来 losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status # self.rpn_cm = ConfusionMeter(2) # self.roi_cm = ConfusionMeter(21) # self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(features, img_size, scale) """ rpn_locs.shape, rpn_scores.shape, rois.shape, roi_indices.shape, anchor.shape = (torch.Size([1, 18648, 4]), torch.Size([1, 18648, 2]), (1714, 4), (1714,), (18648, 4)) rpn网络做的事情是: 对于每张图片,利用它的feature map, 计算 (H/16)× (W/16)×9(大概20000)个anchor属于前景或背景的概率(rpn_scores), 以及对应的网络预测的需要修正的位置参数(rpn_locs)。 然后,对于每张图片,根据前面算出来的前景的概率(rpn_fg_scores), 选取概率较大的12000个anchor, 利用回归的位置参数(rpn_locs),修正这12000个anchor的位置,得到RoIs 利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的2000个RoIs 注意:在inference的时候,为了提高处理速度,12000和2000分别变为6000和300. """ # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois """ bbox.shape,label.shape,rpn_score.shape,rpn_loc.shape,roi.shape = (torch.Size([2, 4]), torch.Size([2]), torch.Size([16650, 2]), torch.Size([16650, 4]), (2000, 4)) """ # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), # at = array_tools,tensor to numpy 用不着了,在pytorch0.4里 at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) """ sample_roi.shape, gt_roi_loc.shape, gt_roi_label.shape = ((128, 4), (128, 4), (128,)) proposal_target_creator的作用是: RPN会产生大约2000个RoIs,这2000个RoIs不是都拿去训练, 而是利用ProposalTargetCreator 选择128个RoIs用以训练。选择的规则如下: RoIs和gt_bboxes 的IoU大于0.5的,选择一些(比如32个) 选择 RoIs和gt_bboxes的IoU小于0.5,同时大于等于0(或者0.1)的选择一些(比如 128-32=96个)作为负样本 为了便于训练,对选择出的128个RoIs,还对他们的gt_roi_loc 进行标准化处理(减去均值除以标准差) 最终输出128个roi框及其分别对应的需要修正的[ty,tx,th,tw]和label """ # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) """ x.shape, rois.shape, roi_indices.shape = (torch.Size([1, 512, 37, 56]), (128, 4), torch.Size([128])) ROIHEAD做的事情是根据前面得到的128个roi框, 去feature上分别做roi pool, 得到[128,512,7,7]的最终信息 相当于每一个roi框,不管他有多大, 统统roi pool到[512,7,7] 再然后就是几个linear layer, 从512*7*7 = 25088 得到 21维的class score 和 84维的roi_cls_locs 最终输出是[128, 84],[128, 21] """ # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) """ 所以总的来说,AnchorTargetCreator做的事情是: 根据每一个预先设定的anchor和这张图片的gt_bbox去计算iou, 再用求得的iou来给每一个anchor打标签, 1是正样本,0是负样本,-1表示不关心,不参与后续计算 打标签是通过 正负样本之和应该是self.n_sample,比例是self.pos_ratio 打标签的依据是: 1. iou < 0.3的都算负样本 2. 对每一个gt_object,标记和它iou最高的的anchor为正样本 可能同时有多个anchor同时iou最高(相等) 3. 剩下的anchor里面,iou大于0.7的也算正样本 4. 还要平衡一下正负样本的数量和比例 它不但打标签,还会计算每一个anchor和它最匹配的gt_bbox的loc, 用于后续的bbox回归loss计算 最后,返回的是loc和label # ((16650,), (16650, 4)) """ gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) # gt_rpn_loc.shape, gt_rpn_label.shape : ((18648, 4), (18648,)) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # loss value # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) """ rpn_score.shape,gt_rpn_label.shape : (torch.Size([15318, 2]), torch.Size([15318])) ignore_index (int, optional): Specifies a target value that is ignored and does not contribute to the input gradient. When :attr:`size_average` is ``True``, the loss is averaged over non-ignored targets. Default: -100 """ _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] # _gt_rpn_label.shape,_rpn_score.shape : (torch.Size([256]), (256, 2)) # self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] # 128 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) #torch.Size([128, 84]) to torch.Size([128, 21, 4]) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # 21个class的loc,取对应的gt制定的那个,即gt_roi_label # torch.Size([128, 4]) gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) # self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): # 继承父模块的初始化 super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # 超参:在_faster_rcnn_loc_loss调用用来计算位置损失函数 # target creator create gt_bbox gt_label etc as training targets. # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准 self.anchor_target_creator = AnchorTargetCreator() # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标(或称ground truth),只在训练阶段用到,ProposalCreator是RPN为Fast R-CNN生成RoIs,在训练和测试阶段都会用到。所以测试阶段直接输进来300个RoIs,而训练阶段会有AnchorTargetCreator的再次干预。 self.proposal_target_creator = ProposalTargetCreator() # (0., 0., 0., 0.) self.loc_normalize_mean = faster_rcnn.loc_normalize_mean # (0.1, 0.1, 0.2, 0.2) self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # SGD # visdom wrapper self.vis = Visualizer(env=opt.env) # 可视化工具 # indicators for training status # 混淆矩阵,验证预测值和真实值精确度,2为类别数 self.rpn_cm = ConfusionMeter(2) # self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] # batch个数 print("trainer.py - batch个数", n) if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) # vgg16 con5_3之前的部分提取图片特征 # rpn_locs维度(hh*ww*9, 4), rpn_scores维度(hh*ww*9, 2), rois维度(2000,4), roi_indices用不到 # anchor的维度为(hh*ww*9, 4), H和W是经过数据预处理之后的。计算(H/16)*(W/16)*9大概20000个anchor属于前景的概率 # 取12000个经过NMS得到2000个近似目标框G^坐标。roi维度为(2000,4) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] # (N, R, 4) label = labels[0] # (N, R) rpn_score = rpn_scores[0] #(hh * ww * 9, 4) rpn_loc = rpn_locs[0] # hh * ww * 9 roi = rois # (2000, 4) # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # 调用proposal_target_creator函数生成sample roi(128,4), # gt_roi_loc(128, 4), gt_roi_label(128,1) # RoIHead网络利用sample_roi+featue为输入, # 输出是分类(21类)和回归(进一步微调bbox)的预测值, # 那么分类回归的ground truth就是ProposalTargetCreator输出的 # gt_roi_label和gt_roi_loc # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# # 输入20000个anchor和bbox,调用anchor_target_creator函数 # 得到2000个anchor与bbox的偏移量与label gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, # rpn网络回归的偏移量20000个 gt_rpn_loc, # anchor_target_creator函数得到的2000个anchor与bbox偏移量 gt_rpn_label.data, self.rpn_sigma) # 1 # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy( # 求交叉熵损失 rpn_score, # rpn网络得到的20000个 gt_rpn_label.cuda(), # anchor_target_creator得到的2000个label ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] # 不计算背景类 _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# # roi_cls_loc为VGG16RoIHead的输出(128*84),n_sample=128 n_sample = roi_cls_loc.shape[0] # roi_cls_los = (128, 21, 4) roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # proposal_target_creator()生成的128个proposal与bbox求得偏移量dx, dy, dw, dh gt_roi_label = at.totensor(gt_roi_label).long() # 128个标签 gt_roi_loc = at.totensor(gt_roi_loc) # 采用smooth_l1_loss roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # 求交叉熵损失 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) # 四个loss加起来 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) # 参数优化 def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() # 清零梯度 losses = self.forward(imgs, bboxes, labels, scale) # 计算所有损失 losses.total_loss.backward() # 反向传播计算梯度 self.optimizer.step() # 参数更新 self.update_meters(losses) # 将损失更新到可视化界面 return losses # 返回损失 # 保存模型 def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path # 加载模型 def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self # 更新可视化 def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) # 重置函数 def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() # 获取数据 def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(4) #ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss self.sparse = False def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale, prune_train=False): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() if prune_train: for name, m in self.named_modules(): if hasattr(m, 'mask') and hasattr(m, 'weight'): dev = m.weight.device tensor = m.weight.data # .cpu().numpy() mask = m.mask.data # .cpu().numpy() grad_tensor = m.weight.grad.data # .cpu().numpy() grad_tensor = grad_tensor * mask # np.where(mask==0, 0, grad_tensor) m.weight.grad.data = grad_tensor # t.from_numpy(grad_tensor).to(dev) self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, prune=False, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['sparse_list'] = [] if self.sparse: for n, m in self.named_modules(): if hasattr(m, "sparse"): if m.sparse and hasattr(m, 'weight'): w_dev = m.weight.device w = m.weight.data.coalesce().to_dense() m.weight.data = w.to(w_dev) save_dict['sparse_list'].append(str(m)) save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() save_dict['sparse'] = self.sparse if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ if prune: save_path += "_prune" save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def generate_simple_state_dict(self, pre_trained, debug=False): new = list(pre_trained.items()) curr_model_kvpair = self.faster_rcnn.state_dict() if debug: for k, v in curr_model_kvpair.items(): print("curr :", str(k)) for i in new: print("new :", str(i[0])) count = 0 for k, v in curr_model_kvpair.items(): if "mask" in k: continue if "head.cls_loc" in str(k) or "head.score" in str(k): count += 1 continue _, weights = new[count] curr_model_kvpair[k] = weights count += 1 return curr_model_kvpair def generate_state_dict(self, pre_trained, simple=False, debug=False): self.set_dense() if simple: return self.generate_simple_state_dict(pre_trained, debug) new = list(pre_trained.items()) curr_model_kvpair = self.faster_rcnn.state_dict() if debug: for k, v in curr_model_kvpair.items(): print("curr :", str(k)) for i in new: print("new :", str(i[0])) for k, v in new: if k in curr_model_kvpair: curr_model_kvpair[k] = v else: print(f"Key Weight Mismatch at: {str(k)} -- Not Loading") return curr_model_kvpair def to_sparse(self, sparse_mx, n, m): print(f"Turning Sparse: {n}: {m}") sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = t.from_numpy(np.vstack( (sparse_mx.row, sparse_mx.col))).long() values = t.from_numpy(sparse_mx.data) shape = t.Size(sparse_mx.shape) return t.sparse.FloatTensor(indices, values, shape) def revert_to_sparse(self, sparse_list): self.sparse = True for n, m in self.named_modules(): if str(m) in sparse_list: m.sparse = True if hasattr(m, 'weight') and not m.weight.is_sparse: try: dev = m.weight.device weight = m.weight.data.cpu().numpy() matrix = coo_matrix(weight) tensor = self.to_sparse(matrix, n, str(m)) m.weight.data = tensor.to(dev) except: raise ValueError( f"Couldn't convert {n},{str(m)} to sparse") return self def generate_state_dict(self, pre_trained, simple=False, debug=False): self.set_dense() if simple: return self.generate_simple_state_dict(pre_trained, debug) new = list(pre_trained.items()) curr_model_kvpair = self.faster_rcnn.state_dict() if debug: for k, v in curr_model_kvpair.items(): print("curr :", str(k)) for i in new: print("new :", str(i[0])) for k, v in new: if k in curr_model_kvpair: curr_model_kvpair[k] = v else: print(f"Key Weight Mismatch at: {str(k)} -- Not Loading") return curr_model_kvpair def to_sparse(self, sparse_mx, n, m): print(f"Turning Sparse: {n}: {m}") sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = t.from_numpy(np.vstack( (sparse_mx.row, sparse_mx.col))).long() values = t.from_numpy(sparse_mx.data) shape = t.Size(sparse_mx.shape) return t.sparse.FloatTensor(indices, values, shape) def revert_to_sparse(self, sparse_list): self.sparse = True for n, m in self.named_modules(): if str(m) in sparse_list: m.sparse = True if hasattr(m, 'weight') and not m.weight.is_sparse: try: dev = m.weight.device weight = m.weight.data.cpu().numpy() matrix = coo_matrix(weight) tensor = self.to_sparse(matrix, n, str(m)) m.weight.data = tensor.to(dev) except: raise ValueError( f"Couldn't convert {n},{str(m)} to sparse") return self def load( self, path, load_optimizer=False, parse_opt=False, debug=False, simple=opt.use_simple, ): state_dict = t.load(path) if 'model' in state_dict: sd = self.generate_state_dict(state_dict['model'], simple, debug) self.faster_rcnn.load_state_dict(sd) else: sd = self.generate_state_dict(state_dict, simple, debug) self.faster_rcnn.load_state_dict(sd) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) if 'sparse' in state_dict and state_dict['sparse'] == True: print("Reverting to Sparse") self.revert_to_sparse(state_dict['sparse_list']) print(f"Successfully Loaded Model: {path}") return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()} def quantize(self, bits=5, verbose=False): self.sparse = True self.faster_rcnn = quantization.quantize(self.faster_rcnn, bits=bits, verbose=verbose) def replace_with_sparsedense(self): self.faster_rcnn.replace_with_sparsedense() def set_sparse(self): self.sparse = True self.faster_rcnn.set_sparse() def set_dense(self): self.sparse = False self.faster_rcnn.set_dense()
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn #在faster_rcnn_loc_losss中调用,用来计算位置损失函数时用到的超参 self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. #用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,用于rpn的训练 self.anchor_target_creator = AnchorTargetCreator() #从2000个筛选出的ROIS中再次选出128个ROIs用于ROIhead训练 self.proposal_target_creator = ProposalTargetCreator() #定义位置信息的均值方差。因为送入网络训练的位置信息需全部归一化处理 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss #@staticmethod def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) #提取图片特征 features = self.faster_rcnn.extractor(imgs) #ProposalCreator(过程) #1.对于每张图片,利用它的feature map, 计算 (H/16)× (W/16)×9(大概20000)个anchor属于前景的概率,以及对应的位置参数。 #2.选取概率较大的12000个anchor #3.利用回归的位置参数,修正这12000个anchor的位置,得到RoIs #4.利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的2000个RoIs rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input #经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) #经过head网络,完成预测 roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# #在20000个anchor中挑选256个anchor进行rpn训练过程中的损失计算 #挑选过程: #1.对于每一个ground truth bounding box (gt_bbox),选择和它重叠度(IoU)最高的一个anchor作为正样本 #2.对于剩下的anchor,从中选择和任意一个gt_bbox重叠度超过0.7的anchor,作为正样本,正样本的数目不超过128个。 #3.随机选择和gt_bbox重叠度小于0.3的anchor作为负样本。负样本和正样本的总数为256。 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) #loc类损失采用l1损失 rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... #label类损失采用交叉熵 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #共两大类损失loc和label,每类下分为rpn部分和roihead的损失,所以共四种 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) #进行了一次参数优化 def train_step(self, imgs, bboxes, labels, scale): #将梯度数据全部清0 self.optimizer.zero_grad() #利用前向传播函数将所有损失计算出来 losses = self.forward(imgs, bboxes, labels, scale) #反向传播计算梯度 losses.total_loss.backward() #进行一次参数优化过程 self.optimizer.step() #将所有损失的数据更新到可视化界面 self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoint_caffe/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self #update_meters,reset_meters以及get_meter_data()负责将数据向可视化界面更新传输获取以及重置的函数, # 不太懂,但和主要代码没啥关系 def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. # anchor_target_creator:将20000多个候选的anchor选出256个anchor进行分类和回归位置 self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # extractor在这里是VGG16的前10层,通过extractor可以提取feature_map features = self.faster_rcnn.extractor(imgs) # ------------------ RPN Network -------------------# # ------------------ RPN 预测 -------------------# # 通过RPN网络提取roi # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4] # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2] # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4] # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到 # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引 # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的 # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测 rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # Since batch size is one, convert variables to singular form # 因为这里只支持BatchSize=1,所以直接提取出来 bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] # [n_anchor,2] rpn_loc = rpn_locs[0] # [n_anchor,4] roi = rois # ------------------ RPN 标注 -------------------# # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值 # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,在计算loss时加权区分 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) # ------------------ RPN losses 计算 -------------------# # loc loss(位置回归loss) # loc的loss只计算正样本的 rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # cls loss(分类loss,这里只分两类) # label=-1的样本被忽略 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI Nework -------------------# # ------------------ ROI 标注 -------------------# # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类 # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正 # gt_roi_label:N+1类,多了一个背景类(是不是物体) sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问) sample_roi_index = t.zeros(len(sample_roi)) # ------------------ ROI 预测 -------------------# # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测 # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # [n_sample, n_class+1, 4] # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测 # roi_loc就是在ground truth上的类的位置预测 roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] # [m_sample.4] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) # loc loss(位置回归loss) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # cls loss(分类loss,这里分21类) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}