def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss
def model_analysis(model, dataloader, params, temperature=1., num_classes=10): """ Generate Confusion Matrix on evaluation set """ model.eval() confusion_matrix = ConfusionMeter(num_classes) softmax_scores = [] predict_correct = [] with tqdm(total=len(dataloader)) as t: for idx, (data_batch, labels_batch) in enumerate(dataloader): if params.cuda: data_batch, labels_batch = data_batch.cuda(async=True), \ labels_batch.cuda(async=True) data_batch, labels_batch = Variable(data_batch), Variable(labels_batch) output_batch = model(data_batch) confusion_matrix.add(output_batch.data, labels_batch.data) softmax_scores_batch = F.softmax(output_batch/temperature, dim=1) softmax_scores_batch = softmax_scores_batch.data.cpu().numpy() softmax_scores.append(softmax_scores_batch) # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() predict_correct_batch = (np.argmax(output_batch, axis=1) == labels_batch).astype(int) predict_correct.append(np.reshape(predict_correct_batch, (labels_batch.size, 1))) t.update() softmax_scores = np.vstack(softmax_scores) predict_correct = np.vstack(predict_correct) return softmax_scores, predict_correct, confusion_matrix.value().astype(int)
def __init__(self, faster_rcnn, attacker=None, layer_idx=None, attack_mode=False): super(BRFasterRcnnTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.attacker = attacker self.layer_idx = layer_idx self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma self.attack_mode = attack_mode self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() self.vis = Visualizer(env=opt.env) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} self.BR_meters = {k: AverageValueMeter() for k in LossTupleBR._fields}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) #前后景误差矩阵 self.roi_cm = ConfusionMeter(21) #21分类误差矩阵 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss
def evaluate(model, dataloader, crf, criterion, f1=False): model.eval() if f1: confusion_matrix = ConfusionMeter(8) confusion_matrix.reset() correct, total = 0, 0 loss_total = 0 for i, (features, labels, _, lengths) in enumerate(dataloader): max_length = max(lengths) features = cuda_var_wrapper(features[:, :, :max_length], volatile=True) labels = cuda_var_wrapper(labels[:, :max_length], volatile=True) lengths = cuda_var_wrapper(lengths) if crf: _, output = model(features, lengths) loss = model.forward_alg(features, labels, lengths) preds = output else: output = model(features) loss = criterion(output.contiguous().view(-1, 8), labels.contiguous().view(-1)) _, preds = torch.max(F.softmax(output, 2), 2) correct_batch, total_batch = get_batch_accuracy( labels, output, lengths) loss_total += loss.data[0] * features.size()[0] correct += correct_batch total += total_batch if f1: if isinstance(preds, Variable): preds = preds.data for pred, truth, length in zip(preds, labels.data, lengths.data): confusion_matrix.add(pred[:length], truth[:length]) if f1: # print(confusion_matrix.value()) precision, recall, f1 = precision_recall_f1(confusion_matrix) return correct / total, loss_total, precision, recall, f1 return correct / total, loss_total
import os import torch import os.path as osp from torch.utils import data from torchnet.meter import ConfusionMeter, AverageValueMeter m = ConfusionMeter(3) p = torch.tensor([[0.3, 0.7, 0.1], [0.7, 0.2, 0.9], [0.9, 0.2, 1]]) t = torch.tensor([0, 1, 1]) m.add(p, t) n = m.value() pass
def test( main_options: MainOptions, test_options: TestOptions ) -> None: steps = main_options.step json_path = test_options.json_path state_dict_path = test_options.state_dict_path image_root = test_options.image_root output_dir = test_options.output_dir assert exists(json_path), \ f"JSON path \"{json_path}\" does not exist" assert isfile(json_path), \ f"\"{json_path}\" is not a file" assert exists(state_dict_path), \ f"State dict path {state_dict_path} does not exist" assert isfile(state_dict_path), \ f"{state_dict_path} is not a file" if exists(output_dir) and isdir(output_dir): print(f"File in {output_dir} will be overwritten") elif exists(output_dir) and not isdir(output_dir): raise Exception(f"\"{output_dir}\" is not a directory") else: print(f"Create \"{output_dir}\"") mkdir(output_dir) img_pipeline = tr.Compose([ tr.ToTensor(), custom_tr.NormalNorm() ]) img_dataset = ImageFolder(image_root, transform=img_pipeline) idx = list(range(len(img_dataset))) shuffle(idx) idx_test = idx[int(0.85 * len(idx)):] test_dataset = Subset(img_dataset, idx_test) nn_models = ModelsWrapper.from_json(json_path) nn_models.load_state_dict(th.load(state_dict_path)) marl_m = MultiAgent.load_from( json_path, main_options.nb_agent, nn_models, obs_2d_img, trans_2d_img ) data_loader = DataLoader( test_dataset, batch_size=test_options.batch_size, shuffle=True, num_workers=8, drop_last=False ) cuda = main_options.cuda device_str = "cpu" # Pass pytorch stuff to GPU # for agents hidden tensors (belief etc.) if cuda: nn_models.cuda() marl_m.cuda() device_str = "cuda" conf_meter = ConfusionMeter(nn_models.nb_class) for x, y in tqdm(data_loader): x, y = x.to(th.device(device_str)), y.to(th.device(device_str)) preds, probas = episode(marl_m, x, 0., steps) conf_meter.add(preds.detach(), y) print(conf_meter.value()) precs, recs = prec_rec(conf_meter) precs_str = format_metric(precs, img_dataset.class_to_idx) recs_str = format_metric(recs, img_dataset.class_to_idx) print(f"Precision : {precs_str}") print(f"Precision mean = {precs.mean()}") print(f"Recall : {recs_str}") print(f"Recall mean : {recs.mean()}")
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class Trainer(nn.Module): def __init__(self, rfcn, config): super().__init__() self.rfcn = rfcn self.rpn_target_generator = RPNTargetGenerator() self.roi_target_generator = RoITargetGenerator() self.rpn_sigma = config.rpn_sigma self.roi_sigma = config.roi_sigma self.loc_normalize_mean = (0., 0., 0., 0.) self.loc_normalize_std = (.1, .1, .2, .2) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(config.num_classes) self.loss_avgmeter = { k: AverageValueMeter() for k in [ 'rpn_loc_loss', 'rpn_fg_loss', 'roi_loc_loss', 'roi_cls_loss', 'tot_loss' ] } self.optimizer = self._get_optimizer(config) self.vis = Visualizer() self.train() def forward(self, imgs, bboxes, labels, scale): """ Args: imgs: (N, C, H, W) bboxes: (N, R, 4) labels: (N, R) scale: scale factor of preprocessing """ if imgs.size(0) != 1: raise ValueError("Only batch_size 1 is supported.") img_size = imgs.size()[2:] features = self.rfcn.extractor(imgs) rpn_scores, rpn_locs, rois, roi_indices, anchors = self.rfcn.rpn( features, img_size) bbox = bboxes[0] label = labels[0] rpn_locs = rpn_locs[0] rpn_scores = rpn_scores[0] sample_roi, gt_roi_loc, gt_roi_label = self.roi_target_generator( rois, bbox, label, self.loc_normalize_mean, self.loc_normalize_std) roi_score, roi_loc = self.rfcn.RoIhead(features, sample_roi, torch.zeros(len(sample_roi))) # RPN losses gt_rpn_locs, gt_rpn_labels = self.rpn_target_generator( anchors, bboxes[0], img_size) rpn_loc_loss = _loc_loss(rpn_locs, gt_rpn_locs, gt_rpn_labels, self.rpn_sigma) rpn_fg_loss = F.cross_entropy(rpn_scores, gt_rpn_labels, ignore_index=-1) self.rpn_cm.add(rpn_scores[gt_rpn_labels > -1].detach(), gt_rpn_labels[gt_rpn_labels > -1].detach()) # RoI losses roi_loc = roi_loc.view(roi_loc.size(0), -1, 4) roi_loc = roi_loc[:, gt_roi_label].contiguous() roi_loc_loss = _loc_loss(roi_loc, gt_roi_loc, gt_roi_label, self.roi_sigma) roi_cls_loss = F.cross_entropy(roi_score, gt_roi_label) self.roi_cm.add(roi_score.detach(), gt_roi_label) tot_loss = rpn_loc_loss + rpn_fg_loss + roi_loc_loss + roi_cls_loss return { 'rpn_loc_loss': rpn_loc_loss, 'rpn_fg_loss': rpn_fg_loss, 'roi_loc_loss': roi_loc_loss, 'roi_cls_loss': roi_cls_loss, 'tot_loss': tot_loss } def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) for k, v in losses.items(): self.loss_avgmeter[k].add(v) losses['tot_loss'].backward() self.optimizer.step() return losses def save(self, save_path): torch.save({'model', self.rfcn.state_dict()}, save_path) def reset_meters(self): for meter in self.loss_avgmeter.values(): meter.reset() self.rpn_cm.reset() self.roi_cm.reset() def get_meter(self): return {(k, v) for k, v in self.loss_avgmeter.items()} def _get_optimizer(self, config): lr = config.lr params = [] for key, value in dict(self.rfcn.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * 2, 'weight_decay': 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': config.weight_decay }] return torch.optim.Adam(params)
class FasterRCNNTrainer(nn.Module): """把训练过程写入类里面,方便训练""" def __init__(self, faster_rcnn): """faster_rcnn是继承了faster rcnn基类的子网络""" super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn # 锚点框相对于真实框的真实偏移量和前景背景标签 self.anchor_target_creator = AnchorTargetCreator() # 候选框相对于真实框的真实偏移量和类别标签 self.proposal_target_creator = ProposalTargetCreator() # 位置估计的均值和标准差 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean self.loc_normalize_std = self.faster_rcnn.loc_normalize_std # 优化器 self.optimizer = self.faster_rcnn.get_optimizer() # 损失计算的超参数 self.rpn_sigma = OPT.rpn_sigma self.roi_sigma = OPT.roi_sigma # 训练过程中的一些评估指标 # rpn过程的评估指标--混淆矩阵 self.rpn_cm = ConfusionMeter(2) # 只有前景和背景两类 # fast rcnn过程的评估指标--混淆矩阵 self.roi_cm = ConfusionMeter(OPT.n_fg_class + 1) # 前景类别数+背景类 # 损失函数--average loss # 每个损失函数都运用一个averagevaluemeter进行求平均 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} def forward(self, imgs, bboxes, labels, scale): """前向传播过程计算损失 参数: imgs: [N, C, H, W] bboxes: [N, R, 4] labels: [N, R] scale: 单个值就可以 返回:5个损失""" num_batch = bboxes.shape[0] if num_batch != 1: raise ValueError("仅支持batch_size=1") # 得到图片的尺寸H, W _, _, H, W = imgs.shape img_size = (H, W) # 得到特征图 features = self.faster_rcnn.extractor(imgs) # 进入rpn网络, 输出预测的锚点框预测偏移量和得分 rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # 由于batch size为1,所以取其中的元素为: bbox = bboxes[0] label = labels[0] rpn_loc = rpn_locs[0] rpn_score = rpn_scores[0] roi = rois # 产生锚点框的真实偏移量和标签 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( bbox=tonumpy(data=bbox), anchor=anchor, img_size=img_size) # 产生候选框的真实偏移量和标签 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi=roi, bbox=tonumpy(bbox), label=tonumpy(label), loc_normalize_mean=self.loc_normalize_mean, loc_normalize_std=self.loc_normalize_std) # 由于batch_size=1,所以sample_roi_indice都为0 sample_roi_index = torch.zeros(len(sample_roi)) # 产生由候选框产生的预测框的偏移量和得分 roi_cls_loc, roi_score = self.faster_rcnn.head( x=features, rois=sample_roi, roi_indices=sample_roi_index) # ------------------------rpn loss----------------------------------# gt_rpn_label = totensor(data=gt_rpn_label).long() gt_rpn_loc = totensor(data=gt_rpn_loc) rpn_loc_loss = _faster_rcnn_loc_loss(pred_loc=rpn_loc, gt_loc=gt_rpn_loc, gt_label=gt_rpn_label.data, sigma=self.rpn_sigma) rpn_cls_loss = F.cross_entropy(input=rpn_score, target=gt_rpn_label.cuda(), ignore_index=-1) # 除了标签为-1之外的真实标签 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = tonumpy(data=rpn_score)[tonumpy(data=gt_rpn_label) > -1] self.rpn_cm.add(predicted=totensor(data=_rpn_score, cuda=False), target=_gt_rpn_label.data.long()) # ---------------------roi loss---------------------------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # 取出gt_roi_label对应的预测框的预测偏移量 roi_loc = roi_cls_loc[torch.arange(0, n_sample), totensor(data=gt_roi_label).long()] gt_roi_loc = totensor(data=gt_roi_loc) gt_roi_label = totensor(data=gt_roi_label).long() roi_loc_loss = _faster_rcnn_loc_loss(pred_loc=roi_loc.contiguous(), gt_loc=gt_roi_loc, gt_label=gt_roi_label.data, sigma=self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(predicted=totensor(roi_score, False), target=gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): """训练过程""" # 梯度清零 self.optimizer.zero_grad() # 得到损失 losses = self.forward(imgs, bboxes, labels, scale) # 总损失反向传播 losses.total_loss.backward() # 更新梯度 self.optimizer.step() # 累加各个损失函数 self.update_meters(losses) return losses # 返回损失 def val_step(self, imgs, sizes, bboxes, labels): """验证过程""" self.optimizer.zero_grad() scale = imgs.shape[2] / (sizes[0].item()) with torch.no_grad(): losses = self.forward(imgs, bboxes, labels, scale) self.update_meters(losses) return losses def update_meters(self, losses): """对各个损失分别求均值""" # 由于train_step返回的是nametuple形式的损失,所以要先变成字典 loss_dict = {k: scalar(v) for k, v in losses._asdict().items()} # 分别遍历每种损失,求其均值 for key, meter in self.meters.items(): meter.add(loss_dict[key]) def reset_meters(self): # 将损失值清零,用在一个epoch之后 for key, meter in self.meters.items(): meter.reset() self.rpn_cm.reset() self.roi_cm.reset() def get_meter_data(self): # 获取损失值 return {k: v.value()[0] for k, v in self.meters.items()} def save(self, save_optimizer=False, save_path=None, **kwargs): """保存模型,并返回模型保存的路径""" save_dict = dict() # 要存储的信息 # 模型权重和偏置参数 save_dict["model"] = self.faster_rcnn.state_dict() # 配置文件 save_dict["config"] = OPT._state_dict() # 其他信息,如果写其他信息的话则保存 save_dict["other_info"] = kwargs if save_optimizer: # 如果要保存优化器的参数信息,则把其加入save_dict中 save_dict["optimizer"] = self.optimizer.state_dict() # 如果保存路径为None,则由时间戳自动生成 if save_path is None: timestr = time.strftime("%m%d%H%M") save_path = "checkpoints/fasterrcnn_%s" % timestr for k_, v_ in kwargs.items(): save_path += "_%s" % v_ # 存储路径 save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) # 进行存储 torch.save(save_dict, save_path) return save_path def load(self, path, load_optimizer=True, parse_opt=False): """加载模型优化器参数之类的""" state_dict = torch.load(path) if "model" in state_dict: self.faster_rcnn.load_state_dict(state_dict["model"]) else: self.faster_rcnn.load_state_dict(state_dict) return self # 如果加载配置文件 if parse_opt: OPT._parse(state_dict["config"]) # 如果加载优化器 if "optimizer" in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict["optimizer"]) return self
def reset_before_test_epoch(self): self.cm = ConfusionMeter(self.num_classes, normalized=False)
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): # 继承父模块的初始化 super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn # 下面2个参数是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数 self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是 # 为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准 self.anchor_target_creator = AnchorTargetCreator() # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标 # (或称ground truth),只在训练阶段用到,ProposalCreator是RPN为Fast # R-CNN生成RoIs,在训练和测试阶段都会用到。所以测试阶段直接输进来300 # 个RoIs,而训练阶段会有AnchorTargetCreator的再次干预 self.proposal_target_creator = ProposalTargetCreator() # (0., 0., 0., 0.) self.loc_normalize_mean = faster_rcnn.loc_normalize_mean # (0.1, 0.1, 0.2, 0.2) self.loc_normalize_std = faster_rcnn.loc_normalize_std # SGD self.optimizer = self.faster_rcnn.get_optimizer() # 可视化,vis_tool.py self.vis = Visualizer(env=opt.env) # 混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter # (2)括号里的参数指的是类别数 self.rpn_cm = ConfusionMeter(2) # roi的类别有21种(20个object类+1个background) self.roi_cm = ConfusionMeter(21) # 平均损失 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): # 获取batch个数 n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape # (n,c,hh,ww) img_size = (H, W) # vgg16 conv5_3之前的部分提取图片的特征 features = self.faster_rcnn.extractor(imgs) # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), # rois的维度为(2000,4),roi_indices用不到,anchor的维度为 # (hh*ww*9,4),H和W是经过数据预处理后的。计算(H/16)x(W/16)x9 # (大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个 # 近似目标框G^的坐标。roi的维度为(2000,4) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # bbox维度(N, R, 4) bbox = bboxes[0] # labels维度为(N,R) label = labels[0] #hh*ww*9 rpn_score = rpn_scores[0] # hh*ww*9 rpn_loc = rpn_locs[0] # (2000,4) roi = rois # Sample RoIs and forward # 调用proposal_target_creator函数生成sample roi(128,4)、 # gt_roi_loc(128,4)、gt_roi_label(128,1),RoIHead网络 # 利用这sample_roi+featue为输入,输出是分类(21类)和回归 # (进一步微调bbox)的预测值,那么分类回归的groud truth就 # 是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) # roi回归输出的是128*84和128*21,然而真实位置参数是128*4和真实标签128*1 roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# # 输入20000个anchor和bbox,调用anchor_target_creator函数得到 # 2000个anchor与bbox的偏移量与label gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) # 下面分析_fast_rcnn_loc_loss函数。rpn_loc为rpn网络回归出来的偏移量 # (20000个),gt_rpn_loc为anchor_target_creator函数得到2000个anchor # 与bbox的偏移量,rpn_sigma=1. rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... # rpn_score为rpn网络得到的(20000个)与anchor_target_creator # 得到的2000个label求交叉熵损失 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] #不计算背景类 _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# # roi_cls_loc为VGG16RoIHead的输出(128*84), n_sample=128 n_sample = roi_cls_loc.shape[0] # roi_cls_loc=(128,21,4) roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # proposal_target_creator()生成的128个proposal与bbox求得的偏移量 # dx,dy,dw,dh gt_roi_label = at.totensor(gt_roi_label).long() # 128个标签 gt_roi_loc = at.totensor(gt_roi_loc) # 采用smooth_l1_loss roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) # 求交叉熵损失 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) # 四个loss加起来 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) # 整个函数实际上就是进行了一次参数的优化过程,首先`self.optimizer.zero_grad()`将梯度数据全部清零, # 然后利用刚刚介绍`self.forward(imgs,bboxes,labels,scales)`函数将所有的损失计算出来,接着进行 # 依次`losses.total_loss.backward()`反向传播计算梯度,`self.optimizer.step()`进行一次参数 # 更新过程,`self.update_meters(losses)`就是将所有损失的数据更新到可视化界面上,最后将`losses`返回 def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses # 模型保存 def save(self, save_optimizer=False, save_path=None, **kwargs): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path # 模型加载 def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
def on_loader_start(self, state): self.confusion_matrix = ConfusionMeter(self.num_classes)
class AdversarialAutoencoderTrainer(CommonTrainer): def __init__(self, args, problem, device): super().__init__(args, problem, device) self.encoder_semisup_opt = None self.encoder_generator_opt = None self.encoder_reconstruction_opt = None self.decoder_opt = None self.discriminator_prior_opt = None self.discriminator_cat_opt = None self.optimizers = [] self.use_pdf = args.use_density_weights self.normalize_inputs = None self.schedulers = None def get_test_metric_name(self): return "test_accuracy" def is_better(self, metric, previous_metric): return metric > previous_metric def create_training_performance_estimators(self): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [FloatHelper("discriminator_loss")] performance_estimators += [FloatHelper("generator_loss")] performance_estimators += [FloatHelper("semisup_loss")] performance_estimators += [FloatHelper("weight")] self.training_performance_estimators = performance_estimators return performance_estimators def create_test_performance_estimators(self): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [LossHelper("test_loss")] performance_estimators += [AccuracyHelper("test_")] performance_estimators += [FloatHelper("weight")] self.test_performance_estimators = performance_estimators return performance_estimators def init_model(self, create_model_function,class_frequencies=None): super().init_model(create_model_function,class_frequencies) self.encoder_semisup_opt = torch.optim.Adam(self.net.encoder.parameters(), lr=self.args.lr, weight_decay=self.args.L2) self.encoder_generator_opt = torch.optim.Adam(self.net.encoder.parameters(), lr=self.args.lr * 0.8, weight_decay=self.args.L2) self.encoder_reconstruction_opt = torch.optim.Adam(self.net.encoder.parameters(), lr=self.args.lr * 0.8, weight_decay=self.args.L2) self.decoder_opt = torch.optim.Adam(self.net.decoder.parameters(), lr=self.args.lr * 0.8, weight_decay=self.args.L2) self.discriminator_prior_opt = torch.optim.Adam(self.net.discriminator_prior.parameters(), lr=self.args.lr * 0.6, weight_decay=self.args.L2) self.discriminator_cat_opt = torch.optim.Adam(self.net.discriminator_cat.parameters(), lr=self.args.lr * 0.6, weight_decay=self.args.L2) self.optimizers = [ self.encoder_semisup_opt, self.encoder_generator_opt, self.encoder_reconstruction_opt, self.decoder_opt, self.discriminator_prior_opt, self.discriminator_cat_opt, ] self.schedulers = [] for optimizer in self.optimizers: self.schedulers += [self.create_scheduler_for_optimizer(optimizer)] if self.args.normalize: problem_mean = self.problem.load_tensor("input", "mean") problem_std = self.problem.load_tensor("input", "std") self.normalize_inputs = lambda x: normalize_mean_std(x, problem_mean=problem_mean, problem_std=problem_std) if self.args.normalize else x def train_one_batch(self, performance_estimators, batch_idx, input_s, target_s, meta_data, input_u): self.zero_grad_all_optimizers() self.num_classes = len(target_s[0]) # Train reconstruction phase: self.net.encoder.train() self.net.decoder.train() reconstruction_loss = self.net.get_reconstruction_loss(input_u) reconstruction_loss.backward() for opt in [self.decoder_opt, self.encoder_reconstruction_opt]: opt.step() # Train discriminators: self.net.encoder.train() self.net.discriminator_cat.train() self.net.discriminator_prior.train() self.zero_grad_all_optimizers() genotype_frequencies = self.class_frequencies["softmaxGenotype"] category_prior = (genotype_frequencies / torch.sum(genotype_frequencies)).numpy() discriminator_loss = self.net.get_discriminator_loss(common_trainer=self, model_input=input_u, category_prior=category_prior, recode_labels=lambda x: recode_for_label_smoothing(x, epsilon=self.epsilon)) discriminator_loss.backward() for opt in [self.discriminator_cat_opt, self.discriminator_prior_opt]: opt.step() self.zero_grad_all_optimizers() # Train generator: self.net.encoder.train() generator_loss = self.net.get_generator_loss(input_u) generator_loss.backward() for opt in [self.encoder_generator_opt]: opt.step() self.zero_grad_all_optimizers() weight = 1 if self.use_pdf: self.net.encoder.train() _, latent_code = self.net.encoder(input_s) weight *= self.estimate_example_density_weight(latent_code) indel_weight = self.args.indel_weight_factor snp_weight = 1.0 weight *= self.estimate_batch_weight(meta_data, indel_weight=indel_weight, snp_weight=snp_weight) self.net.encoder.train() semisup_loss = self.net.get_semisup_loss(input_s, target_s) * weight semisup_loss.backward() for opt in [self.encoder_semisup_opt]: opt.step() self.zero_grad_all_optimizers() performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.item()) performance_estimators.set_metric(batch_idx, "discriminator_loss", discriminator_loss.item()) performance_estimators.set_metric(batch_idx, "generator_loss", generator_loss.item()) performance_estimators.set_metric(batch_idx, "semisup_loss", semisup_loss.item()) performance_estimators.set_metric(batch_idx, "weight", weight) if self.args.latent_code_output is not None: _, latent_code = self.net.encoder(input_u) # Randomly select n rows from the minibatch to keep track of the latent codes for idxs_to_sample = torch.randperm(latent_code.size()[0])[:self.args.latent_code_n_per_minibatch] for row_idx in idxs_to_sample: latent_code_row = latent_code[row_idx] self.gaussian_codes.append(torch.squeeze(draw_from_gaussian(latent_code_row.size()[0], 1))) self.latent_codes.append(latent_code_row) if not self.args.no_progress: progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message( ["reconstruction_loss", "discriminator_loss", "generator_loss", "semisup_loss"])) def reset_before_train_epoch(self): self.latent_codes = [] self.gaussian_codes = [] def train_semisup_aae(self, epoch, performance_estimators=None): if performance_estimators is None: performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.net.train() supervised_grad_norm = 1. unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unlabeled_loader = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset, unlabeled_loader), device=self.device, batch_names=["training", "unlabeled"], requires_grad={"training": ["input"], "unlabeled": ["input"]}, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }, vectors_to_keep=["metaData"] ) self.reset_before_train_epoch() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["training"]["input"] target_s = data_dict["training"]["softmaxGenotype"] input_u = data_dict["unlabeled"]["input"] meta_data = data_dict["training"]["metaData"] num_batches += 1 self.train_one_batch( performance_estimators, batch_idx, input_s, target_s, meta_data, input_u) if ((batch_idx + 1) * self.mini_batch_size) > self.max_training_examples: break finally: data_provider.close() latent_code_device = torch.device("cpu") if self.args.latent_code_output is not None: # Each dimension in latent code should be Gaussian distributed, so take histogram of each column # Plot histograms later to see how they compare to Gaussian latent_code_tensor = torch.stack(self.latent_codes).to(latent_code_device) latent_code_histograms = [torch.histc(latent_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(latent_code_tensor.size()[1])] gaussian_code_tensor = torch.stack(self.gaussian_codes).to(latent_code_device) gaussian_code_histograms = [torch.histc(gaussian_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(gaussian_code_tensor.size()[1])] torch.save({ "latent": latent_code_histograms, "gaussian": gaussian_code_histograms, }, "{}_{}.pt".format(self.args.latent_code_output, epoch)) return performance_estimators def estimate_example_density_weight(self, latent_code): cumulative_pdf = 0 n_pdf = 0 for z in latent_code: pdf = norm.pdf(z.data) # in the early stages of training, pdf larger than 1 if latent variable far from normally distributed valid_pdf = pdf[pdf <= 1] cumulative_pdf += numpy.sum(valid_pdf) n_pdf = n_pdf + len(valid_pdf) cumulative_pdf /= n_pdf return max(cumulative_pdf, 1 - cumulative_pdf) def zero_grad_all_optimizers(self): for optimizer in self.optimizers: optimizer.zero_grad() def reset_before_test_epoch(self): self.cm = ConfusionMeter(self.num_classes, normalized=False) def test_semisup_aae(self, epoch, performance_estimators=None): print('\nTesting, epoch: %d' % epoch) if performance_estimators is None: performance_estimators = self.create_test_performance_estimators() self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range(0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(validation_loader_subset), device=self.device, batch_names=["validation"], requires_grad={"validation": []}, recode_functions={ "input": self.normalize_inputs }, vectors_to_keep=["softmaxGenotype", "metaData"] ) self.reset_before_test_epoch() errors=None try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] meta_data = data_dict["validation"]["metaData"] self.test_one_batch(performance_estimators, batch_idx, input_s, target_s, meta_data, errors) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() # Apply learning rate schedules: test_metric = performance_estimators.get_metric(self.get_test_metric_name()) assert test_metric is not None, (self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: for scheduler in self.schedulers: scheduler.step(test_metric, epoch) self.compute_after_test_epoch() return performance_estimators def test_one_batch(self, performance_estimators, batch_idx, input_s, target_s, metadata=None, errors=None): # Estimate the reconstruction loss on validation examples: reconstruction_loss = self.net.get_reconstruction_loss(input_s) # now evaluate prediction of categories: categories_predicted, latent_code = self.net.encoder(input_s) categories_predicted_p = self.get_p(categories_predicted) categories_predicted_p[categories_predicted_p != categories_predicted_p] = 0.0 _, target_index = torch.max(target_s, dim=1) _, output_index = torch.max(categories_predicted_p, dim=1) categories_loss = self.net.semisup_loss_criterion(categories_predicted, target_s) weight = 1 indel_weight = self.args.indel_weight_factor snp_weight = 1.0 if self.use_pdf: weight *= self.estimate_example_density_weight(latent_code) else: weight *= self.estimate_batch_weight(metadata, indel_weight=indel_weight, snp_weight=snp_weight) self.cm.add(predicted=output_index.data, target=target_index.data) performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.item()) performance_estimators.set_metric(batch_idx, "weight", weight) performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", reconstruction_loss.item(), categories_predicted_p, target_index) performance_estimators.set_metric_with_outputs(batch_idx, "test_loss", categories_loss.item() * weight, categories_predicted_p, target_s) if not self.args.no_progress: progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message(["test_loss", "test_accuracy", "reconstruction_loss"])) def compute_after_test_epoch(self): self.confusion_matrix = self.cm.value().transpose() if self.best_model_confusion_matrix is None: self.best_model_confusion_matrix = torch.from_numpy(self.confusion_matrix).to(self.device)
class Trainer(nn.Module): def __init__(self, faster_rcnn, LR=0.001): super(Trainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_lamda = 3 self.roi_lamda = 1 self.anchor_target = tools_gpu.TargetAnchor() self.proposal_target = tools_gpu.TargetProposal() self.norm_mean = faster_rcnn.norm_mean self.norm_std = faster_rcnn.norm_std self.optimizer = torch.optim.Adam(self.faster_rcnn.parameters(), lr=LR) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(1002) self.loss = [[], [], [], [], []] self.file1 = open('losses.txt', 'a') self.file2 = open('rpn_cm', 'a') self.file3 = open('roi_cm', 'a') def forward(self, imgs, bboxes, labels, scale): n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, h, w = imgs.shape img_size = (h, w) features = self.faster_rcnn.extractor(imgs) rpn_params, rpn_scores, rois, roi_index, anchor = self.faster_rcnn.rpn( features, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_params = rpn_params[0] rpn_score = rpn_scores[0] sample_roi, sample_params, sample_label = self.proposal_target( rois, bbox.detach().cpu().numpy(), label.detach().cpu().numpy(), self.norm_mean, self.norm_std) sample_roi_index = np.zeros((len(sample_roi), )) roi_params, roi_scores = self.faster_rcnn.head(features, sample_roi, sample_roi_index) gt_rpn_params, gt_rpn_label = self.anchor_target( bbox.detach().cpu().numpy(), anchor, img_size) gt_rpn_params = torch.from_numpy(gt_rpn_params).float().cuda() gt_rpn_label = torch.from_numpy(gt_rpn_label).long().cuda() rpn_params_loss = self.rpn_lamda * params_loss( rpn_params, gt_rpn_params, gt_rpn_label) rpn_class_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = rpn_score[gt_rpn_label > -1] self.rpn_cm.add(_rpn_score.data, _gt_rpn_label.data) with open('rpn_cm.txt', 'a') as file1: file1.write(str(self.rpn_cm.value()) + '\n\n') n_sample = roi_params.shape[0] roi_params = roi_params.view(n_sample, -1, 4) sample_label = torch.from_numpy(sample_label).long().cuda() sample_params = torch.from_numpy(sample_params).float().cuda() roi_params = roi_params[torch.arange(n_sample).long().cuda(), sample_label] roi_params_loss = self.roi_lamda * params_loss( roi_params.contiguous(), sample_params, sample_label) roi_class_loss = F.cross_entropy(roi_scores, sample_label) self.roi_cm.add(roi_scores.data, sample_label.data) with open('roi_cm.txt', 'a') as file2: file2.write(str(self.roi_cm.value()) + '\n\n') losses = [ rpn_params_loss, rpn_class_loss, roi_params_loss, roi_class_loss ] losses = losses + [sum(losses)] return losses def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) for i in range(len(losses)): self.loss[i] = losses[i].cpu().item() loss_copy = copy.deepcopy(self.loss) with open('losses.txt', 'a') as file3: file3.write(str(loss_copy) + '\n\n') losses[4].backward() self.optimizer.step() def save(self): torch.save(self.faster_rcnn.state_dict(), 'faster_rcnn_parameters.pkl') def load(self): self.faster_rcnn.load_state_dict( torch.load('faster_rcnn_parameters.pkl'))
class fasterrcnn_train(nn.Module): def __init__(self,faster_rcnn): super(fasterrcnn_train, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchortarget = gtor.Anchortarget_generator() self.propasaltarget = gtor.ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self,imgs,bbox,label,n_pre_nms,n_post_nms,scale): _,_,H,W = imgs.shape img_size = (H,W) feature = self.faster_rcnn.extractor(imgs) rpn_loc,rpn_cls,roi,roi_indices,anchor = self.faster_rcnn.rpn(feature,img_size,n_pre_nms,n_post_nms,scale) gt_rpn_loc, gt_rpn_label = self.anchortarget(at.tonumpy(bbox), anchor, img_size) sample_roi, gt_roi_loc, gt_roi_label = self.propasaltarget(roi,at.tonumpy(bbox),at.tonumpy(label)) sample_roi_index = t.zeros(len(sample_roi))#batchsize =1,全为0 roi_cls, roi_loc = self.faster_rcnn.roihead(feature,sample_roi,sample_roi_index) # rpn loss gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc[0, :, :], gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_cls[0, :, :], gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_cls[0,:,:])[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) #roiloss n_sample = roi_loc.shape[0] roi_loc = roi_loc.view(n_sample, -1, 4) roi_loc = roi_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = F.cross_entropy(roi_cls, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_cls, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels,n_pre_nms,n_post_nms, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels,n_pre_nms,n_post_nms,scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """ Wrapper for conveniently training. returns losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn, n_fg_class=20): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.n_fg_class = n_fg_class self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(n_fg_class+1) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """ Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form #print(bboxes) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) #print(gt_roi_label) #print('got region proposals') # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# n_bbox = bbox.shape if len(n_bbox) > 0: n_bbox = n_bbox[0] if n_bbox > 0: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) #print(gt_rpn_label.shape) #print(gt_rpn_label) #print(anchor.shape) #print(sample_roi.shape) #print('got anchor targets') gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) #print(rpn_loc_loss) else: #if no bboxes, should have no rpn loc loss rpn_loc_loss = t.tensor(0.) if opt.use_cuda: rpn_loc_loss = rpn_loc_loss.cuda() #print('got rpn loc loss') # if no bboxes, all region labels are 0 (background) if n_bbox == 0: gt_rpn_label = t.tensor([0 for i in range(anchor.shape[0])]) # NOTE: default value of ignore_index is -100 ... fg_bg_count = np.unique(gt_rpn_label.detach().cpu(), return_counts=True)[1][1:] if opt.reduce_bg_weight: # Reweight foreground / background for the case we couldn't sample identical numbers rpn_class_weights = 1.0 / fg_bg_count rpn_class_weights = t.FloatTensor(rpn_class_weights / np.sum(rpn_class_weights) * 2) else: rpn_class_weights = None if opt.use_cuda: rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1, weight=rpn_class_weights.cuda() if rpn_class_weights is not None else None) else: rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1, weight=rpn_class_weights) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) #print('got rpn class loss') # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) #print(n_sample, gt_roi_label.shape, sample_roi.shape) if opt.use_cuda: roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] else: roi_loc = roi_cls_loc[t.arange(0, n_sample).long(), at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) if n_bbox > 0: roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) else: #no roi loc loss if no gt bboxes roi_loc_loss = t.tensor(0.) if opt.use_cuda: roi_loc_loss = roi_loc_loss.cuda() #print('got roi loc loss') if opt.reduce_bg_weight: bg_weight = 1.0 / gt_roi_label.size()[0] class_weights = t.FloatTensor(np.hstack([bg_weight, np.ones((self.n_fg_class,))])) else: class_weights = None if opt.use_cuda: roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights.cuda() if class_weights is not None else None)(roi_score, gt_roi_label.cuda()) else: roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights)(roi_score, gt_roi_label) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #print('got roi class loss') losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] #print(losses) sum_losses = sum(losses) #print(sum_losses.type) losses = losses + [sum_losses] return LossTuple(*losses) # ...def forward(self, imgs, bboxes, labels, scale) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """ Serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs for k_, v_ in kwargs.items(): save_dict[k_] = v_ if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr if 'best_map' in kwargs.keys(): save_path += '_%s' % kwargs['best_map'] t.save(save_dict, save_path) return save_path def load(self, state_dict, load_optimizer=True, parse_opt=False, ): if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return state_dict def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key].detach().cpu().numpy()) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = configurations.rpn_sigma self.roi_sigma = configurations.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std lr = configurations.lr params = [] for key, value in dict(faster_rcnn.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * 2, 'weight_decay': 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': configurations.weight_decay }] self.optimizer = t.optim.SGD(params, momentum=0.9) #self.optimizer = self.faster_rcnn.get_optimizer() # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale, epoch): pass def step1(self, imgs, bboxes, labels, scale, epoch): #train RPN alone self.optimizer.zero_grad() _, _, H, W = imgs.shape img_size = (H, W) ############ EXTRACTOR STEP ################# features1 = self.faster_rcnn.extractor1(imgs) ############ RPN STEP ####################### rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features1, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) roi_loc_loss = t.tensor([0]).cuda() roi_cls_loss = t.tensor([0]).cuda() losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss ] + [roi_loc_loss + roi_cls_loss] all_losses = LossTuple(*losses) all_losses.total_rpn.backward() self.optimizer.step() self.update_meters(all_losses) return all_losses def step2(self, imgs, bboxes, labels, scale, epoch): self.optimizer.zero_grad() _, _, H, W = imgs.shape img_size = (H, W) ############ EXTRACTOR STEP ################# features1 = self.faster_rcnn.extractor1(imgs) features2 = self.faster_rcnn.extractor2(imgs) ############ RPN STEP ####################### rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features1, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois ############ HEAD STEP ####################### sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi, sample_roi_index) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) rpn_loc_loss = t.tensor([0]).cuda() rpn_cls_loss = t.tensor([0]).cuda() losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss ] + [roi_loc_loss + roi_cls_loss] all_losses = LossTuple(*losses) all_losses.total_roi.backward() self.optimizer.step() self.update_meters(all_losses) return all_losses def step3(self, imgs, bboxes, labels, scale, epoch): self.optimizer.zero_grad() _, _, H, W = imgs.shape img_size = (H, W) ############ EXTRACTOR STEP ################# features2 = self.faster_rcnn.extractor2(imgs) ############ RPN STEP ####################### rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features2, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) roi_loc_loss = t.tensor([0]).cuda() roi_cls_loss = t.tensor([0]).cuda() losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss ] + [roi_loc_loss + roi_cls_loss] all_losses = LossTuple(*losses) all_losses.total_rpn.backward() self.optimizer.step() self.update_meters(all_losses) return all_losses def step4(self, imgs, bboxes, labels, scale, epoch): self.optimizer.zero_grad() _, _, H, W = imgs.shape img_size = (H, W) ############ EXTRACTOR STEP ################# features2 = self.faster_rcnn.extractor2(imgs) ############ RPN STEP ####################### rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features2, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois ############ HEAD STEP ####################### sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi, sample_roi_index) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) rpn_loc_loss = t.tensor([0]).cuda() rpn_cls_loss = t.tensor([0]).cuda() losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss ] + [roi_loc_loss + roi_cls_loss] all_losses = LossTuple(*losses) all_losses.total_roi.backward() self.optimizer.step() self.update_meters(all_losses) return all_losses ###################################################################################### def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
def train( main_options: MainOptions, train_options: TrainOptions ) -> None: assert train_options.dim == 2 or train_options.dim == 3, \ "Only 2D is supported at the moment " \ "for data loading and observation / transition. " \ "See torchvision.datasets.ImageFolder" output_dir = train_options.output_dir model_dir = "models" if not exists(join(output_dir, model_dir)): mkdir(join(output_dir, model_dir)) if exists(join(output_dir, model_dir)) \ and not isdir(join(output_dir, model_dir)): raise Exception(f"\"{join(output_dir, model_dir)}\"" f"is not a directory.") exp_name = "MARLClassification" mlflow.set_experiment(exp_name) mlflow.start_run(run_name=f"train_{main_options.run_id}") mlflow.log_param("output_dir", output_dir) mlflow.log_param("model_dir", join(output_dir, model_dir)) img_pipeline = tr.Compose([ tr.ToTensor(), custom_tr.NormalNorm() ]) if train_options.ft_extr_str.startswith("resisc"): dataset_constructor = RESISC45Dataset elif train_options.ft_extr_str.startswith("mnist"): dataset_constructor = MNISTDataset else: dataset_constructor = KneeMRIDataset nn_models = ModelsWrapper( train_options.ft_extr_str, train_options.window_size, train_options.hidden_size_belief, train_options.hidden_size_action, train_options.hidden_size_msg, train_options.hidden_size_state, train_options.dim, train_options.action, train_options.nb_class, train_options.hidden_size_linear_belief, train_options.hidden_size_linear_action ) dataset = dataset_constructor(img_pipeline) marl_m = MultiAgent( main_options.nb_agent, nn_models, train_options.hidden_size_belief, train_options.hidden_size_action, train_options.window_size, train_options.hidden_size_msg, train_options.action, obs_generic, trans_generic ) mlflow.log_params({ "ft_extractor": train_options.ft_extr_str, "window_size": train_options.window_size, "hidden_size_belief": train_options.hidden_size_belief, "hidden_size_action": train_options.hidden_size_action, "hidden_size_msg": train_options.hidden_size_msg, "hidden_size_state": train_options.hidden_size_state, "dim": train_options.dim, "action": train_options.action, "nb_class": train_options.nb_class, "hidden_size_linear_belief": train_options.hidden_size_linear_belief, "hidden_size_linear_action": train_options.hidden_size_linear_action, "nb_agent": main_options.nb_agent, "frozen_modules": train_options.frozen_modules, "epsilon": train_options.epsilon, "epsilon_decay": train_options.epsilon_decay, "nb_epoch": train_options.nb_epoch, "learning_rate": train_options.learning_rate, "img_size": train_options.img_size, "retry_number": train_options.retry_number, "step": main_options.step, "batch_size": train_options.batch_size }) json_f = open(join(output_dir, "class_to_idx.json"), "w") json.dump(dataset.class_to_idx, json_f) json_f.close() mlflow.log_artifact(join(output_dir, "class_to_idx.json")) cuda = main_options.cuda device_str = "cpu" # Pass pytorch stuff to GPU # for agents hidden tensors (belief etc.) if cuda: nn_models.cuda() marl_m.cuda() device_str = "cuda" mlflow.log_param("device", device_str) module_to_train = ModelsWrapper.module_list \ .difference(train_options.frozen_modules) # for RL agent models parameters optim = th.optim.Adam( nn_models.get_params(list(module_to_train)), lr=train_options.learning_rate ) idx = th.randperm(len(dataset)) idx_train = idx[:int(0.85 * idx.size(0))] idx_test = idx[int(0.85 * idx.size(0)):] train_dataset = Subset(dataset, idx_train) test_dataset = Subset(dataset, idx_test) train_dataloader = DataLoader( train_dataset, batch_size=train_options.batch_size, shuffle=True, num_workers=3, drop_last=False ) test_dataloader = DataLoader( test_dataset, batch_size=train_options.batch_size, shuffle=True, num_workers=3, drop_last=False ) epsilon = train_options.epsilon curr_step = 0 for e in range(train_options.nb_epoch): nn_models.train() sum_loss = 0. i = 0 conf_meter = ConfusionMeter(train_options.nb_class) tqdm_bar = tqdm(train_dataloader) for x_train, y_train in tqdm_bar: x_train, y_train = x_train.to(th.device(device_str)), \ y_train.to(th.device(device_str)) # pred = [Nr, Ns, Nb, Nc] # prob = [Nr, Ns, Nb] retry_pred, retry_prob = episode_retry( marl_m, x_train, epsilon, main_options.step, train_options.retry_number, train_options.nb_class, device_str ) # Class one hot encoding y_eye = th.eye( train_options.nb_class, device=th.device(device_str) )[y_train.unsqueeze(0)].unsqueeze(1).repeat( 1, main_options.step, 1, 1) # Update confusion meter # mean between trials conf_meter.add( retry_pred.detach()[:, -1, :, :].mean(dim=0), y_train ) # L2 Loss - Classification error / reward # reward = -error(y_true, y_step_pred).mean(class_dim) r = -th.pow(y_eye - retry_pred, 2.).mean(dim=-1) # Compute loss losses = retry_prob * r.detach() + r # Losses mean on images batch and trials # maximize(E[reward]) -> minimize(-E[reward]) loss = -losses.mean() # Reset gradient optim.zero_grad() # Backward on compute graph loss.backward() # Update weights optim.step() # Update epoch loss sum sum_loss += loss.item() # Compute global score precs, recs = prec_rec(conf_meter) if curr_step % 100 == 0: mlflow.log_metrics( {"loss": loss.item(), "train_prec": precs.mean().item(), "train_rec": recs.mean().item(), "epsilon": epsilon}, step=curr_step ) tqdm_bar.set_description( f"Epoch {e} - Train, " f"loss = {sum_loss / (i + 1):.4f}, " f"eps = {epsilon:.4f}, " f"train_prec = {precs.mean():.3f}, " f"train_rec = {recs.mean():.3f}" ) epsilon *= train_options.epsilon_decay epsilon = max(epsilon, 0.) i += 1 curr_step += 1 sum_loss /= len(train_dataloader) save_conf_matrix(conf_meter, e, output_dir, "train") mlflow.log_artifact( join(output_dir, f"confusion_matrix_epoch_{e}_train.png") ) nn_models.eval() conf_meter.reset() with th.no_grad(): tqdm_bar = tqdm(test_dataloader) for x_test, y_test in tqdm_bar: x_test, y_test = x_test.to(th.device(device_str)), \ y_test.to(th.device(device_str)) preds, _ = episode(marl_m, x_test, 0., main_options.step) conf_meter.add(preds.detach(), y_test) # Compute score precs, recs = prec_rec(conf_meter) tqdm_bar.set_description( f"Epoch {e} - Eval, " f"eval_prec = {precs.mean():.4f}, " f"eval_rec = {recs.mean():.4f}" ) # Compute score precs, recs = prec_rec(conf_meter) save_conf_matrix(conf_meter, e, output_dir, "eval") mlflow.log_metrics( {"eval_prec": precs.mean(), "eval_recs": recs.mean()}, step=curr_step ) nn_models.json_args( join(output_dir, model_dir, f"marl_epoch_{e}.json") ) th.save( nn_models.state_dict(), join(output_dir, model_dir, f"nn_models_epoch_{e}.pt") ) mlflow.log_artifact( join(output_dir, model_dir, f"marl_epoch_{e}.json") ) mlflow.log_artifact( join(output_dir, model_dir, f"nn_models_epoch_{e}.pt") ) mlflow.log_artifact( join(output_dir, f"confusion_matrix_epoch_{e}_eval.png") ) empty_pipe = tr.Compose([ tr.ToTensor() ]) dataset_tmp = dataset_constructor(empty_pipe) test_dataloader_ori = Subset(dataset_tmp, idx_test) test_dataloader = Subset(dataset, idx_test) test_idx = randint(0, len(test_dataloader_ori)) visualize_steps( marl_m, test_dataloader[test_idx][0], test_dataloader_ori[test_idx][0], main_options.step, train_options.window_size, output_dir, train_options.nb_class, device_str, dataset.class_to_idx ) mlflow.end_run()
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma #是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数, self.roi_sigma = opt.roi_sigma self.anchor_target_creator = AnchorTargetCreator() #从上万个anchor中挑选256个来训练rpn,其中正样本不超过128 self.proposal_target_creator = ProposalTargetCreator() #从rpn给的2000个框中挑出128个来训练roihead,其中正样本不超过32个 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() #可视化 self.vis = Visualizer(env=opt.env) #验证预测值和真实值的精度 self.rpn_cm = ConfusionMeter(2) #混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter(2)括号里的参数指的是类别数 self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} #验证平均loss def forward(self, imgs, bboxes, labels, scale): ''' :param imgs: (~torch.autograd.Variable) 一个批次的图片 :param bboxes: (~torch.autograd.Variable) (N, R, 4) :param labels: (~torch.autograd..Variable) (N, R) [0 - L-1] L为类别数 :param scale: (float) 原图经过preprocessing处理后的缩放比 :return: namedtuple of 5 losses ''' n = bboxes.shape[0] #batch_size 数量 if n != 1: raise ValueError('Currently only batch size 1 is supported') _, _, H, W = imgs.shape img_size = (H, W) c2_out = self.faster_rcnn.C2(imgs) c3_out = self.faster_rcnn.C3(c2_out) c4_out = self.faster_rcnn.C4(c3_out) p2, p3, p4, p5 = self.faster_rcnn.fpn(c2_out, c3_out, c4_out) feature_maps = [p2, p3, p4, p5] rcnn_maps = [p2, p3, p4] # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), rois的维度为(2000,4), # roi_indices用不到,anchor的维度为(hh*ww*9,4),H和W是经过数据预处理后的。 # 计算(H/16)x(W/16)x9(大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G^的坐标。 # roi的维度为(2000,4) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( feature_maps, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] #(hh*ww*9,2) rpn_loc = rpn_locs[0] #(hh*ww*9,4) roi = rois #(2000,4) # 调用proposal_target_creator函数生成sample roi(128,4)、gt_roi_loc(128,4)、 # gt_roi_label(128,1),RoIHead网络利用这sample_roi+featue为输入, # 输出是分类(21类)和回归(进一步微调bbox)的预测值, # 那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, array_tool.tonumpy(bbox), array_tool.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = torch.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( rcnn_maps, sample_roi, sample_roi_index) #------------------RPN loss------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( array_tool.tonumpy(bbox), anchor, img_size) gt_rpn_label = array_tool.totensor(gt_rpn_label).long() gt_rpn_loc = array_tool.totensor(gt_rpn_loc) #rpn的回归l1smooth损失 rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) #rpn的分类交叉熵损失 rpn_cls_loss = functional.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _gt_rpn_score = rpn_score[gt_rpn_label > -1] _rpn_score = array_tool.tonumpy(rpn_score)[array_tool.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(array_tool.totensor(_rpn_score, False), _gt_rpn_label.data.long()) #------------------------ROI loss------------------------# n_sample = roi_cls_loc.shape[0] #n_sample为128 , roi_cls_loc为VGG16RoIHead的输出(128*84) roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # roi_cls_loc=(128,21,4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), \ array_tool.totensor(gt_roi_label).long()] # (128,4),按照label编号从21类中挑出当前标签的loc,从(128,21,4)降为(128,4) gt_roi_label = array_tool.totensor(gt_roi_label).long() gt_roi_loc = array_tool.totensor(gt_roi_loc) #roi的回归l1smooth损失 roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) #roi的交叉熵损失 self.roi_cm.add(array_tool.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] #总loss,增加losses列表长度到5 return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): save_dict= dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) torch.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False, ): state_dict = torch.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: array_tool.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class BRFasterRcnnTrainer(nn.Module): def __init__(self, faster_rcnn, attacker=None, layer_idx=None, attack_mode=False): super(BRFasterRcnnTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.attacker = attacker self.layer_idx = layer_idx self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma self.attack_mode = attack_mode self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() self.vis = Visualizer(env=opt.env) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} self.BR_meters = {k: AverageValueMeter() for k in LossTupleBR._fields} def forward(self, imgs, bboxes, labels, scale, attack=False): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # 创造钩子函数,记录featureamp的值 features = self.faster_rcnn.extractor(imgs) feature_maps = self.faster_rcnn.feature_maps if not features.sum()[0] == 0: rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input if rois.size == 0: print("Features are 0 for some reason") losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \ Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()] losses = losses + [sum(losses)] return losses, features sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# if not attack: if anchor.size != 0: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # adv_losses = self.attacker.forward(imgs.detach(),gt_rpn_label.cuda(), img_size, scale, self) # adv_losses = LossTupleAdv(*adv_losses) # self.update_meters(adv_losses,adv=True) else: rpn_cls_loss = 0 rpn_loc_loss = 0 # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) if attack: return roi_score, gt_roi_label, feature_maps else: roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] # if attack: # del rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, losses, features # return roi_score, gt_roi_label, feature_map # else: return LossTuple(*losses) else: print("Features are 0 for some reason") losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \ Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()] losses = losses + [sum(losses)] return losses def train_step(self, imgs, bboxes, labels, scale, target_feature=None, rois=None, roi_scores=None): if not self.attack_mode: print ('....') else: BR_losses = self.attacker.forward(imgs, self, labels, bboxes, scale, target_feature, rois, roi_scores) BR_losses = LossTupleBR(*BR_losses) self.update_meters(BR_losses, BR=True) # 将save_rcnn设置成False,因为我们在训练生成器过程中,不动rcnn的参数 def save(self, save_optimizer=False, save_path=None, save_rcnn=False, **kwargs): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/faterrcnn_full_%s' % timestr if not self.attack_mode: for k_, v_ in kwargs.items(): save_path += '%s' % v_ if self.attacker is not None: self.attacker.save('checkpoints/attack_%s_%d.path' % (timestr, kwargs['epochs'])) if save_rcnn: t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load(self, path, load_optimizer=True, parse_opt=False): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses, BR=False): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} if not BR: for key, meter in self.meters.items(): meter.add(loss_d[key]) else: for key, meter in self.BR_meters.items(): meter.add(loss_d[key]) def reset_meters(self, BR=False): for key, meter in self.meters.items(): meter.reset() if BR: for key, meter in self.BR_meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self, BR=False): if BR: return {k: v.value()[0] for k, v in self.BR_meters.items()} else: return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn, nclasses): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(nclasses) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), at.tonumpy(anchor), img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
def evaluate(config, model, criterion, validation_loader, method, gpu, test_flag=False, save_dir=None): losses = AverageMeter('Loss', ':.5f') # Define confusion matrix via built in PyTorch functionality conf_meter = ConfusionMeter(config['n_class']) # Torch.no_grad() implies that no back-propagation is occurring during evaluation with torch.no_grad(): # Setting model to eval turns off dropout and batch normalization which would # obscure the results of testing model.eval() for t, (inputs, labels, names) in enumerate(tqdm(validation_loader, file=sys.stdout)): if gpu and torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda().long() else: inputs, labels = inputs, labels.long() if method == 'pixelnet': model.set_train_flag(False) # Compute output and loss function outputs = model(inputs) loss = criterion(outputs, labels) # Save predictions if evaluating the network predictions = outputs.cpu().argmax(1) if test_flag: for i in range(predictions.shape[0]): plt.imsave('%s/%s.png' % (save_dir, names[i][:-4]), predictions[i].squeeze(), cmap='gray') # Update loss losses.update(loss.item(), inputs.size(0)) # Update the confusion matrix given outputs and labels conf_meter.add( outputs.permute(0, 2, 3, 1).contiguous().view(-1, config['n_class']), labels.view(-1)) if test_flag: print('--- evaluation result ---') else: print('--- validation result ---') # Output the updated confusion matrix so that it is interpretable by the metrics function conf_mat = conf_meter.value() # Obtain Accuracy, IoU, Class Accuracies, and Class IoU from metrics function acc, iou, precision, recall, class_iou = metrics(conf_mat, verbose=test_flag) if not test_flag: print('loss: %.5f, accuracy: %.5f, mIU: %.5f' % (losses.avg, acc, iou)) print('precision:', np.round(precision, 5)) else: print('loss: %.5f, accuracy: %.5f, mIU: %.5f' % (losses.avg, acc, iou)) # Define second entry of precision vector as Soma accuracy class_precision = precision[1] return losses.avg, acc, iou, class_precision, class_iou
class GenotypingSemisupervisedMixupTrainer(CommonTrainer): """Train a genotyping model using semisupervised mixup (labels on the unlabeled set are made up by sampling).""" def __init__(self, args, problem, device): super().__init__(args, problem, device) self.criterion_classifier = None self.cm = None if self.args.normalize: problem_mean = self.problem.load_tensor("input", "mean") problem_std = self.problem.load_tensor("input", "std") self.categorical_distribution = None self.normalize_inputs = lambda x: (normalize_mean_std( x, problem_mean=problem_mean, problem_std=problem_std) if self.args.normalize else x) def rebuild_criterions(self, output_name, weights=None): if output_name == "softmaxGenotype": self.criterion_classifier = MultiLabelSoftMarginLoss( weight=weights) def get_test_metric_name(self): return "test_accuracy" def is_better(self, metric, previous_metric): return metric > previous_metric def create_training_performance_estimators(self): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [AccuracyHelper("train_")] self.training_performance_estimators = performance_estimators return performance_estimators def create_test_performance_estimators(self): performance_estimators = PerformanceList() performance_estimators += [LossHelper("test_supervised_loss")] performance_estimators += [AccuracyHelper("test_")] self.test_performance_estimators = performance_estimators return performance_estimators def train_semisupervised_mixup(self, epoch): performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range( 0, self.args.num_training) unlabeled_loader_subset = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset, unlabeled_loader_subset), device=self.device, batch_names=["training", "unlabeled"], requires_grad={ "training": ["input"], "unlabeled": ["input"] }, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }, vectors_to_keep=["metaData"]) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s_1 = data_dict["training"]["input"] target_s_1 = data_dict["training"]["softmaxGenotype"] input_u_2 = data_dict["unlabeled"]["input"] metadata_1 = data_dict["training"]["metaData"] num_batches += 1 self.train_one_batch(performance_estimators, batch_idx, input_s_1, target_s_1, metadata_1, input_u_2) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators def train_one_batch(self, performance_estimators, batch_idx, input_s_1, target_s_1, metadata_1, input_u_2): self.net.train() self.num_classes = len(target_s_1[0]) genotype_frequencies = self.class_frequencies["softmaxGenotype"] category_prior = (genotype_frequencies / torch.sum(genotype_frequencies)).numpy() indel_weight = self.args.indel_weight_factor snp_weight = 1.0 target_s_2 = self.dreamup_target_for(num_classes=self.num_classes, category_prior=category_prior, input=input_u_2).to(self.device) with self.lock: input_s_mixup, target_s_mixup = self._recreate_mixup_batch( input_s_1, input_u_2, target_s_1, target_s_2) self.optimizer_training.zero_grad() self.net.zero_grad() # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: output_s = self.net(input_s_mixup) output_s_p = self.get_p(output_s) _, target_index = torch.max(target_s_mixup, dim=1) supervised_loss = self.criterion_classifier(output_s, target_s_mixup) # assume weight is the same for the two batches (we don't know metadata on the unlabeled batch): with self.lock: batch_weight = self.estimate_batch_weight( metadata_1, indel_weight=indel_weight, snp_weight=snp_weight) supervised_loss = supervised_loss * batch_weight supervised_loss.backward() self.optimizer_training.step() performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.item()) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.item(), output_s_p, targets=target_index) if not self.args.no_progress: progress_bar( batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message([ "supervised_loss", "reconstruction_loss", "train_accuracy" ])) def reset_before_test_epoch(self): self.cm = ConfusionMeter(self.num_classes, normalized=False) def test_one_batch(self, performance_estimators, batch_idx, input_s, target_s, metadata=None, errors=None): #if errors is None: # errors = torch.zeros(target_s[0].size()) output_s = self.net(input_s) output_s_p = self.get_p(output_s) _, target_index = torch.max(recode_as_multi_label(target_s), dim=1) _, output_index = torch.max(recode_as_multi_label(output_s_p), dim=1) self.cm.add(predicted=output_index.data, target=target_index.data) supervised_loss = self.criterion_classifier(output_s, target_s) #self.estimate_errors(errors, output_s_p, target_s) performance_estimators.set_metric(batch_idx, "test_supervised_loss", supervised_loss.item()) performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", supervised_loss.item(), output_s_p, targets=target_index) if not self.args.no_progress: progress_bar( batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message([ "test_supervised_loss", "test_reconstruction_loss", "test_accuracy" ])) def test_semisupervised_mixup(self, epoch): print('\nTesting, epoch: %d' % epoch) errors = None performance_estimators = self.create_test_performance_estimators() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), device=self.device, batch_names=["validation"], requires_grad={"validation": []}, recode_functions={"input": self.normalize_inputs}, vectors_to_keep=["softmaxGenotype"]) if self.best_model is None: self.best_model = self.net self.reset_before_test_epoch() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] self.net.eval() self.test_one_batch(performance_estimators, batch_idx, input_s, target_s, errors=None) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() print("test errors by class: ", str(errors)) # Apply learning rate schedule: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: self.scheduler_train.step(test_metric, epoch) self.compute_after_test_epoch() return performance_estimators def compute_after_test_epoch(self): """Call this method after an epoch of calling test_one_batch. This is used to compute variables in the trainer after each test epoch. """ self.confusion_matrix = self.cm.value().transpose() if self.best_model_confusion_matrix is None: self.best_model_confusion_matrix = torch.from_numpy( self.confusion_matrix).to(self.device)
class RFCN_Trainer(nn.Module): """ trainer for RFCN, return loss: rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss params: r_fcn --RFCN model """ def __init__(self, r_fcn: RFCN): super(RFCN_Trainer, self).__init__() self.r_fcn = r_fcn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # generate anchor for RPN training self.anchor_target_creator = AnchorTargetCreator() proposal_target_num = 300 if opt.use_OHEM else 128 self.proposal_target_creator = ProposalTargetCreator(n_sample=proposal_target_num) self.loc_normalize_mean = r_fcn.loc_normalize_mean self.loc_normalize_std = r_fcn.loc_normalize_std self.optimizer = self.get_optimizer() # visdom wrapper self.viz = visdom.Visdom(env=opt.viz_env) self.viz_index = 0 self.log_text = '' # record training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(self.r_fcn.class_num) if opt.FIX_HEAD: self.meters = {k: AverageValueMeter() for k in RPN_LossTuple._fields} else: self.meters = {k: AverageValueMeter() for k in RFCN_LossTuple._fields} def forward(self, imgs, bboxes, labels, scale): """ :param imgs: variable with a batch of images. :param bboxes: A batch of GT bounding boxes. :param labels: labels of gt bboxes. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value :param scale: Amount of scaling applied to the raw image during preprocessing. :return: namedtuple of losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.r_fcn.extractor_phase1(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.r_fcn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] # shape: (gt_num,) rpn_score = rpn_scores[0] # shape: (anchor_num, 2) rpn_loc = rpn_locs[0] # shape: (anchor_num, 4) roi = rois[np.where(roi_indices == 0)[0]] # shape(R, 4) # --------------- rpn losses ------------ # anchor_loc_gt, anchor_label_gt = self.anchor_target_creator( tonumpy(bbox), anchor, img_size) anchor_loc_gt = totensor(anchor_loc_gt) anchor_label_gt = totensor(anchor_label_gt).long() rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, anchor_loc_gt, anchor_label_gt.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, anchor_label_gt.cuda(), ignore_index=-1) with torch.no_grad(): _anchor_label_gt = anchor_label_gt[anchor_label_gt > -1] _rpn_score = rpn_score[anchor_label_gt > -1] self.rpn_cm.add(_rpn_score, _anchor_label_gt.data.long()) # **************** for head **************** if opt.FIX_HEAD: losses = [rpn_loc_loss, rpn_cls_loss] losses = losses + [sum(losses)] return RPN_LossTuple(*losses) else: # sample rois for Head training sample_roi, roi_loc_gt, roi_label_gt = self.proposal_target_creator( roi, tonumpy(bbox), tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # Note: set all value to zero(batch_size == 1) sample_roi_index = torch.zeros(len(sample_roi), dtype=torch.float).cuda() sample_roi = totensor(sample_roi).float() roi_locs, roi_scores = self.r_fcn.head( features, sample_roi, sample_roi_index) # ----------- PsROI losses ----------- # roi_label_gt = totensor(roi_label_gt).long() roi_loc_gt = totensor(roi_loc_gt) n_sample = roi_locs.shape[0] roi_locs = roi_locs.view(n_sample, -1, 4) if opt.cls_reg_specific: roi_locs = roi_locs[torch.arange(n_sample), roi_label_gt] else: roi_locs = roi_locs[torch.arange(n_sample), 1] roi_loc_loss = _fast_rcnn_loc_loss( roi_locs.contiguous(), roi_loc_gt, roi_label_gt.data, self.roi_sigma, ohem=opt.use_OHEM) if opt.use_OHEM: roi_cls_loss = F.cross_entropy(roi_scores, roi_label_gt.cuda(), reduction='none') roi_cls_loss, roi_loc_loss = self.ohem_dectect_loss(roi_cls_loss, roi_loc_loss, roi_label_gt, sample_roi, use_nms=True, hard_num=opt.hard_num) else: roi_cls_loss = F.cross_entropy(roi_scores, roi_label_gt.cuda()) with torch.no_grad(): self.roi_cm.add(roi_scores, roi_label_gt.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return RFCN_LossTuple(*losses) def ohem_dectect_loss(self, cls_loss, loc_loss, gt_label, rois, hard_num=128, use_nms=True, nms_thresh=0.7): """ :param cls_loss: cls loss :param loc_loss: reg loss :param gt_label: gt label of rois :param rois: sampled rois by proposalTarget module :param hard_num: the number of rois for backward :param use_nms: filter ROI with excessive overlap :param nms_thresh: nms阈值 :return: """ bbox_loss = cls_loss + loc_loss if use_nms: # nms based on loss keep = torchvision.ops.nms(rois, bbox_loss, iou_threshold=nms_thresh) bbox_loss = bbox_loss[keep] cls_loss = cls_loss[keep] loc_loss = loc_loss[keep] gt_label = gt_label[keep] # the number of rois for backward back_size = min(len(bbox_loss), hard_num) # hard example mining if back_size < len(bbox_loss): _, top_idx = torch.topk(bbox_loss, back_size) top_cls_loss = cls_loss[top_idx] isFg = (gt_label > 0)[top_idx] top_fg_idx = top_idx[isFg] top_loc_loss = loc_loss[top_fg_idx] if len(top_fg_idx) > 0 else torch.tensor([0.]).cuda() else: top_cls_loss = cls_loss top_loc_loss = loc_loss top_cls_loss_normal = top_cls_loss.mean() top_loc_loss_normal = top_loc_loss.mean() return top_cls_loss_normal, top_loc_loss_normal def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.update_meters(losses) self.optimizer.step() return losses def update_meters(self, losses): loss_d = {k: toscalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()} def save(self, save_optimizer=False, save_path=None, best_map=0., **kwargs): save_dict = dict() save_dict['model'] = self.r_fcn.state_dict() save_dict['config'] = opt.state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = {'viz_index': self.viz_index} save_dict['best_map'] = best_map if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: save_path = './checkPoints/rfcn_' if opt.head_ver is not None: save_path += 'vgg_roi_' time_str = time.strftime('%m%d%H%M') save_path += '{}_{}.pth'.format(time_str, best_map) save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) torch.save(save_dict, save_path) return save_path def load(self, path, load_optimizer=True, load_viz_idx=False, parse_opt=False): state_dict = torch.load(path) if 'model' in state_dict: self.r_fcn.load_state_dict(state_dict['model']) else: raise ValueError("Cannot find the model parameters of RFCN, load_path:\n", path) if load_viz_idx: self.viz_index = state_dict['vis_info']['viz_index'] if parse_opt: print("Load history configuration!") opt.parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def scale_lr(self, epoch, gamma=0.1): if (epoch + 1) in opt.LrMilestones: for param_group in self.optimizer.param_groups: param_group['lr'] *= gamma return self.optimizer def get_optimizer(self): """ return optimizer """ lr = opt.rfcn_init_lr params = [] for key, value in dict(self.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * 2, 'weight_decay': 0}] else: params += [{'params': [value], 'lr': lr, 'weight_decay': opt.weight_decay}] return torch.optim.SGD(params=params, momentum=0.9) def log(self, info, win='log_text'): """ self.log({'loss':1, 'lr':0.0001}) """ self.log_text += ('[{time}] {info} <br>'.format( time=time.strftime('%m%d_%H%M%S'), info=info)) self.viz.text(self.log_text, win, opts={"title": 'log_text'})
class FasterRCNNTrainer(nn.Module): """ 方便FasterRCNN训练,输入图像imgs、标签labels、bboxes标定框和原始缩放尺度scale,输出对应的losses 总体的losses包含rpn_loc_loss、rpn_cls_loss、roi_loc_loss、roi_cls_loss rpn_loc_loss:rpn区域建议网络的定位loss(前景定位微调loss) rpn_cls_loss:rpn区域建议网络的分类loss(前景和背景的类无关分类loss) roi_los_loss:ROIHead模块roi的定位loss(每一类相关的定位loss) roi_cls_loss:ROIHead模块roi的分类loss(每一类的分类loss) 输入: faster_rcnn:FasterRCNN 输出: total_loss:total_loss=rpn_loc_loss+rpn_cls_loss+roi_loc_loss_roi_cls_loss """ def __init__(self, faster_rcnn): """ :type faster_rcnn: FasterRCNN """ super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn # faster_rcnn模块用来进行目标检测 # 设置rpn和roi的sigma参数 self.rpn_sigma = faster_rcnn_config.rpn_sigma self.roi_sigma = faster_rcnn_config.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, bbox, label, self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = torch.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( bbox, anchor, img_size) gt_rpn_label = Variable(gt_rpn_label).long() gt_rpn_loc = Variable(gt_rpn_loc) rpn_loc_loss = fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = rpn_score[gt_rpn_label > -1] self.rpn_cm.add(_rpn_score, _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long(), gt_roi_label.long()] gt_roi_label = Variable(gt_roi_label).long() gt_roi_loc = Variable(gt_roi_loc) roi_loc_loss = fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) self.roi_cm.add(roi_score, gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = faster_rcnn_config.state_dict() save_dict['other_info'] = kwargs if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ torch.save(save_dict, save_path) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = torch.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: faster_rcnn_config.parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: v for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. * :obj:`total_loss`: The sum of 4 loss above. Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss # pic6.png # 整幅图片描述在求损失之前训练过程经历了什么!不准确的说是一个伪正向传播的过程,为啥说是伪正向传播呢,因为过程中调用了proposal_target_creator(), # 而这个函数的作用其实是为了训练ROI_Header网络而提供所谓的128张sample_roi以及它的ground_truth的位置和label用的!所以它的根本目的是为了训练网络,在测试的时候是用不到的! # 流程图中红色圆框代表的是网络运行过程中产生的参数,而蓝色框代表的是网络定义的时候就有的参数!仔细看整个流程图,网络的运作结构就一目了然了!下面解释下代码: # n= bboxes.shape[0]首先获取batch个数,如果不等于就报错,因为本程序只支持batch_size=1,接着读取图片的高和宽,这里解释下,不论图片还是bbox,它们的数据格式都是形如n,c,hh,ww这种,所以H,W就可以获取到图片的尺寸, # 紧接着用self.faster_rcnn.extractor(imgs)提取图片的特征,然后放到rpn网络里面self.faster_rcnn.rpn(feature,img_size,scale)提取出rpn_locs,rpn_scores,rois,roi_indices,anchor来, # 下一步就是经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score,最后经过head网络,完成整个的预测过程!流程图中的结构是一模一样的! # 但是这个文件之所以叫trainer就是因为不仅仅有正向的运作过程,肯定还有反向的传播,包括了损失计算等等,没错,接下来我们看下面的损失计算部分的流程图 # pic7.png # 如上图所示,其实剩下的代码就是计算了两部分的损失,一个是RPN_losses,一个是ROI_Losses,为啥要这样做呢?大家考虑一下,这个Faster-rcnn的网络,哪些地方应用到了网络呢?一个是提取proposal的过程, # 在faster-rcnn里创造性的提出了anchor,用网络来产生proposals,所以rpn_losses就是为了计算这部分的损失,从而使用梯度下降的办法来提升提取prososal的网络的性能,另一个使用到网络的地方就是ROI_header, # 没错就是在利用特征图和ROIs来预测目标检测的类别以及位置的偏移量的时候再一次使用到了网络,那这部分预测网络的性能如何保证呢?ROI_losses就是计算这部分的损失函数,从而用梯度下降的办法来继续提升网络的性能 # 这样一来,这两部分的网络的损失都记算出来了!forward函数也就介绍完了!这个地方需要特别注意的一点就是rpn_cm和roi_cm这两个对象应该是Confusion matrix也就是混淆矩阵啦,作用就是用于后续的数据可视化 def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) # 整个函数实际上就是进行了一次参数的优化过程,首先self.optimizer.zero_grad()将梯度数据全部清零,然后利用刚刚介绍的self.forward(imgs,bboxes,labels,scales)函数将所有的损失计算出来, # 接着进行依次losses.total_loss.backward()反向传播计算梯度,self.optimizer.step()进行一次参数更新过程,self.update_meters(losses)就是将所有损失的数据更新到可视化界面上,最后将losses返回! def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): """wrapper for conveniently training. return losses wrapper以便方便训练,返回losses The losses include: * :obj:`rpn_loc_loss`: The localization loss for Region Proposal Network (RPN). RPN定位loss * :obj:`rpn_cls_loss`: The classification loss for RPN. RPN分类loss * :obj:`roi_loc_loss`: The localization loss for the head module. roi定位loss * :obj:`roi_cls_loss`: The classification loss for the head module. roi分类loss * :obj:`total_loss`: The sum of 4 loss above. 4个loss之和 Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained. """ def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() #传入的是FasterRCNNVGG16模型,继承了FasterRCNN模型,而参数根据说明 是FasterRCNN模型 #即初始化的是FasterRCNN模型 #FasterRCNN模型是父类 FasterRCNNVGG16模型是子类 self.faster_rcnn = faster_rcnn #sigma for l1_smooth_loss self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma # target creator create gt_bbox gt_label etc as training targets. #目标框creator 目标是产生 真实的bbox 类别标签等 #将真实的bbox分配给锚点 self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() #得到faster网络权重,均值 和方差 self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std #得到faster网络的优化器 self.optimizer = self.faster_rcnn.get_optimizer() # visdom wrapper self.vis = Visualizer(env=opt.env) # indicators for training status #训练状态指标 两个混淆矩阵 2×2(前景后景) 21×21(20类+背景) self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 平均损失 def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Faster网络的前向传播、计算losses************************* Here are notations used. * :math:`N` is the batch size. `N`是批量大小 * :math:`R` is the number of bounding boxes per image. `R`是每个图像的边界框的数量 Currently, only :math:`N=1` is supported. 当前模型,只有N=1可用 Args: imgs (~torch.autograd.Variable): A variable with a batch of images. batch=1的图片变量 bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. 真实人工标注的bboxes变量 labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. 背景被排除在定义之外,这意味着值的范围。`L`是前景类的数量 scale (float): Amount of scaling applied to the raw image during preprocessing. 预处理期间应用于原始图像的缩放量 Returns: namedtuple of 5 losses 五个损失 """ n = bboxes.shape[0] #判断,只支持batch为1 if n != 1: raise ValueError('Currently only batch size 1 is supported.') #img_size=原图像的高、宽 _, _, H, W = imgs.shape img_size = (H, W) #通过提取器(预训练好的VGG16)网络提取特征 features = self.faster_rcnn.extractor(imgs) #通过rpn网络(区域提案网络)得到 #rpn这是一个区域提案网络。它提取图像特征,预测输出rois #rpn_locs[1,17316,4] rpn_scores[1,17316,2] rois[2000,4] roi_indices[2000,]全为0 anchor [17316,4] rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form # 由于批量大小为1,因此将变量转换为单数形式(即压缩第一维) #bbox变为[1,4] bbox = bboxes[0] label = labels[0] #则rpn_score变为[17316,4] rpn_loc 变为[17316,2] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] #大约2000个rois roi = rois # Sample RoIs and forward 简单的ROIs和前向传播 # it's fine to break the computation graph of rois, consider them as constant input #打破rois的计算图,将它作为一个固定不变的输入 #proposal_target_creator 输入为rois(2000个候选框,和人工标注的bbox)用于生成训练目标,只训练用到 #2000个rois选出128个 #sample_roi[128,4] gt_roi_loc[128,4] gt_roi_label[128,] 值为0或1 表示正负样本 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now #它全部为零,因为现在它只支持batch = 1 sample_roi_index = t.zeros(len(sample_roi)) #roi head网络进行预测类别和目标框 #RoIHead: 负责对rois分类和微调。对RPN找出的rois,判断它是否包含目标,并修正框的位置和座标 #使用RoIs提议的的feature maps,对RoI中的对象进行分类并提高目标框定位 #roi_cls_loc roi的分类、回归 #传入 特征提取的features 和 128个ROI #roi_cls_loc [128,84]回归定位 roi_score[128,21]分类(20类加背景) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# #真实标注的bbox,预测出来的anchor锚点 # 将真实的bbox分配给锚点,返回 经过rpn后对应的定位和标签 #gt_rpn_loc[17316,4] gt_rpn_label [17316,] gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) #转为变量V 转为long型 gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) #rpn的回归定位损失 rpn_loc_loss[1] rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... #ignore_index的默认值是 - 100... #F:pytorch的function #分类使用交叉熵 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] #添加进rpn 混淆矩阵 self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# #roi分类和回归 压缩第一维 #n_sample 128 n_sample = roi_cls_loc.shape[0] #改变形状为[ 32,4] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) #得到roi的回归 roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] # gt_roi_label:真实roi的标签 #gt_roi_loc:真实roi的回归 gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) #roi的回归损失 计算回归定位的损失 roi_loc_loss = _fast_rcnn_loc_loss( #contiguous从不连续调整为连续 roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) #roi分类损失(交叉熵) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) #添加进roi 混淆矩阵 self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) #计算总损失 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] #返回Tuple,四个损失+总损失 return LossTuple(*losses) #训练并更新可学习参数 def train_step(self, imgs, bboxes, labels, scale): #优化器梯度清零 self.optimizer.zero_grad() #前向传播(重点*) 返回(总损失 和四类损失) losses = self.forward(imgs, bboxes, labels, scale) #反向传播(重点*) #针对总损失进行反向传播 losses.total_loss.backward() # 更新可学习参数 self.optimizer.step() #将losses写入meter中 self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwargs): """serialize models include optimizer and other info return path where the model-file is stored. Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs. Returns: save_path(str): the path to save models. """ save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['config'] = opt._state_dict() save_dict['other_info'] = kwargs save_dict['vis_info'] = self.vis.state_dict() if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwargs.items(): save_path += '_%s' % v_ t.save(save_dict, save_path) self.vis.save([self.vis.env]) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = t.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: # legacy way, for backward compatibility self.faster_rcnn.load_state_dict(state_dict) return self if parse_opt: opt._parse(state_dict['config']) if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self #更新仪表盘 用以显示 def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) #将值重置到0 def reset_meters(self): for key, meter in self.meters.items(): meter.reset() #将两个混淆矩阵的内容也置为0 self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class ConfusionMatrixCallback(Callback): def __init__(self, input_key: str = "targets", output_key: str = "logits", prefix: str = "confusion_matrix", version: str = "tnt", class_names: List[str] = None, num_classes: int = None, plot_params: Dict = None): self.prefix = prefix self.output_key = output_key self.input_key = input_key assert version in ["tnt", "sklearn"] self._version = version self._plot_params = plot_params or {} self.class_names = class_names self.num_classes = num_classes \ if class_names is None \ else len(class_names) assert self.num_classes is not None self._reset_stats() @staticmethod def _get_tensorboard_logger(state: RunnerState) -> SummaryWriter: # @TODO: remove this hack, simplify state for logger in state.loggers: if isinstance(logger, TensorboardLogger): return logger.loggers[state.loader_name] raise RuntimeError( f"Cannot find Tensorboard logger for loader {state.loader_name}") def _reset_stats(self): if self._version == "tnt": self.confusion_matrix = ConfusionMeter(self.num_classes) elif self._version == "sklearn": self.outputs = [] self.targets = [] def _add_to_stats(self, outputs, targets): if self._version == "tnt": self.confusion_matrix.add(predicted=outputs, target=targets) elif self._version == "sklearn": outputs = outputs.cpu().numpy() targets = targets.cpu().numpy() outputs = np.argmax(outputs, axis=1) self.outputs.extend(outputs) self.targets.extend(targets) def _compute_confusion_matrix(self): if self._version == "tnt": confusion_matrix = self.confusion_matrix.value() elif self._version == "sklearn": confusion_matrix = confusion_matrix_fn(y_true=self.targets, y_pred=self.outputs) return confusion_matrix def _plot_confusion_matrix(self, logger, epoch, confusion_matrix, class_names=None): fig = plot_confusion_matrix(confusion_matrix, class_names=class_names, normalize=True, show=False, **self._plot_params) fig = render_figure_to_tensor(fig) logger.add_image(f"{self.prefix}/epoch", fig, global_step=epoch) def on_loader_start(self, state: RunnerState): self._reset_stats() def on_batch_end(self, state: RunnerState): self._add_to_stats(state.output[self.output_key].detach(), state.input[self.input_key].detach()) def on_loader_end(self, state: RunnerState): class_names = \ self.class_names or \ [str(i) for i in range(self.num_classes)] confusion_matrix = self._compute_confusion_matrix() logger = self._get_tensorboard_logger(state) self._plot_confusion_matrix(logger=logger, epoch=state.epoch, confusion_matrix=confusion_matrix, class_names=class_names)
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = opt.rpn_sigma self.roi_sigma = opt.roi_sigma self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} def forward(self, imgs, bboxes, labels, scale): n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.totensor(gt_rpn_label).long() gt_rpn_loc = at.totensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def update_meters(self, losses): loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module): def __init__(self, faster_rcnn): super(FasterRCNNTrainer, self).__init__() self.faster_rcnn = faster_rcnn self.rpn_sigma = 3. self.roi_sigma = 1. self.anchor_target_creator = AnchorTargetCreator() self.proposal_target_creator = ProposalTargetCreator() self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std self.optimizer = self.faster_rcnn.get_optimizer() self.rpn_cm = ConfusionMeter(2) self.roi_cm = ConfusionMeter(21) self.meters = {k: AverageValueMeter() for k in LossTuple._fields} def forward(self, imgs, bboxes, labels, scale): n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) ''' rpn_locs: [N, H*W*A, 4] rpn_scores: [N, H*W*A, 2] rois: [R, 4] roi_indices: which images [R, ] ''' rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # batch size is one bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, to_numpy(bbox), to_numpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = torch.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( to_numpy(bbox), anchor, img_size) gt_rpn_label = to_tensor(gt_rpn_label).long() gt_rpn_loc = to_tensor(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] self.rpn_cm.add(to_tensor(_rpn_score, False), _gt_rpn_label.data.long()) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), to_tensor(gt_roi_label).long()] gt_roi_label = to_tensor(gt_roi_label).long() gt_roi_loc = to_tensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(to_tensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses) def train_step(self, imgs, bboxes, labels, scale): self.optimizer.zero_grad() losses = self.forward(imgs, bboxes, labels, scale) losses.total_loss.backward() self.optimizer.step() self.update_meters(losses) return losses def save(self, save_optimizer=False, save_path=None, **kwags): save_dict = dict() save_dict['model'] = self.faster_rcnn.state_dict() save_dict['other_info'] = kwags if save_optimizer: save_dict['optimizer'] = self.optimizer.state_dict() if save_path is None: timestr = time.strftime('%m%d%H%M') save_path = 'checkpoints/fasterrcnn_%s' % timestr for k_, v_ in kwags.items(): save += '_%s' % v_ save_dir = os.path.dirname(save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) torch.save(save_dict, save_path) return save_path def load( self, path, load_optimizer=True, parse_opt=False, ): state_dict = torch.load(path) if 'model' in state_dict: self.faster_rcnn.load_state_dict(state_dict['model']) else: self.faster_rcnn.load_state_dict(state_dict) return self if 'optimizer' in state_dict and load_optimizer: self.optimizer.load_state_dict(state_dict['optimizer']) return self def update_meters(self, losses): loss_d = {k: scalar(v) for k, v in losses._asdict().items()} for key, meter in self.meters.items(): meter.add(loss_d[key]) def reset_meters(self): for key, meter in self.meters.items(): meter.reset() self.roi_cm.reset() self.rpn_cm.reset() def get_meter_data(self): return {k: v.value()[0] for k, v in self.meters.items()}
def _reset_stats(self): if self._version == "tnt": self.confusion_matrix = ConfusionMeter(self.num_classes) elif self._version == "sklearn": self.outputs = [] self.targets = []