def forward(self, data): """ only used in training """ template = data['template'].cuda() search = data['search'].cuda() template_box = data['template_box'].cuda() search_box = data['search_box'].cuda() # 12: from template to search label_cls12 = data['label_cls12'].cuda() label_loc12 = data['label_loc12'].cuda() label_loc_weight12 = data['label_loc_weight12'].cuda() # 21: from search to template label_cls21 = data['label_cls21'].cuda() label_loc21 = data['label_loc21'].cuda() label_loc_weight21 = data['label_loc_weight21'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) # neck zf = self.neck(zf) xf = self.neck(xf) # non-local # zf = self.non_local(zf) # xf = self.non_local(xf) # crop template_box = torch.split(template_box, 1, dim=0) search_box = torch.split(search_box, 1, dim=0) if isinstance(zf, (list, tuple)): zf_crop = [self.roi_align(zi, template_box) for zi in zf] xf_crop = [self.roi_align(xi, search_box) for xi in xf] else: zf_crop = self.roi_align(zf, template_box) xf_crop = self.roi_align(xf, search_box) # head cls12, loc12 = self.rpn_head(zf_crop, xf) cls21, loc21 = self.rpn_head(xf_crop, zf) # get loss cls12 = self.log_softmax(cls12) cls_loss12 = select_cross_entropy_loss(cls12, label_cls12) loc_loss12 = weight_l1_loss(loc12, label_loc12, label_loc_weight12) cls21 = self.log_softmax(cls21) cls_loss21 = select_cross_entropy_loss(cls21, label_cls21) loc_loss21 = weight_l1_loss(loc21, label_loc21, label_loc_weight21) cls_loss = 0.5 * (cls_loss12 + cls_loss21) loc_loss = 0.5 * (loc_loss12 + loc_loss21) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss # done return outputs
def forward(self, data): """ only used in training """ template = data['template'].cuda( ) # 28, 2, 127, 127: batch 28, color 3, size 127x127 search = data['search'].cuda() label_cls = data['label_cls'].type( torch.FloatTensor).cuda() # torch.Size([28, 5, 25, 25]) label_loc = data['label_loc'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.TRANSFORMER.TRANSFORMER: zf = zf[-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) output = self.tr_head(zf, xf) outputs = self.criterion(output, (label_cls, label_loc)) return outputs else: label_loc_weight = data['label_loc_weight'].cuda() if cfg.MASK.MASK: zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) # loc torch.Size([28, 20, 25, 25]) # label_loc torch.Size([28, 4, 5, 25, 25]) # label_loc_weight torch.Size([28, 5, 25, 25]) # get loss cls = self.log_softmax(cls) # torch.Size([28, 5, 25, 25, 2]) cls_loss = select_cross_entropy_loss( cls, label_cls) # cls_loss torch.Size([]) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs
def forward(self, data): """ only used in training """ template = data['template'].cuda() search = data['search'].cuda() bbox = data['bbox'].cuda() labelcls2 = data['label_cls2'].cuda() labelxff = data['labelxff'].cuda() weightcls3 = data['weightcls3'].cuda() labelcls3 = data['labelcls3'].cuda() weightxff = data['weightxff'].cuda() zf1, zf = self.backbone(template) xf1, xf = self.backbone(search) xff, ress = self.grader(xf1, zf1) anchors = self.getcenter(xff) label_cls,label_loc,label_loc_weight\ =self.fin2.get(anchors,bbox,xff.size()[3]) cls1, cls2, cls3, loc = self.new(xf, zf, ress) cls1 = self.log_softmax(cls1) cls2 = self.log_softmax(cls2) cls_loss1 = select_cross_entropy_loss(cls1, label_cls) cls_loss2 = select_cross_entropy_loss(cls2, labelcls2) cls_loss3 = self.cls3loss(cls3, labelcls3) cls_loss = cfg.TRAIN.w3 * cls_loss3 + cfg.TRAIN.w1 * cls_loss1 + cfg.TRAIN.w2 * cls_loss2 loc_loss1 = weight_l1_loss(loc, label_loc, label_loc_weight) pre_bbox = self._convert_bbox(loc, anchors) label_bbox = self._convert_bbox(label_loc, anchors) loc_loss2 = self.IOULOSS(pre_bbox, label_bbox, label_loc_weight) loc_loss = cfg.TRAIN.w4 * loc_loss1 + cfg.TRAIN.w5 * loc_loss2 shapeloss = l1loss(xff, labelxff, weightxff) outputs = {} outputs['total_loss'] =\ cfg.TRAIN.LOC_WEIGHT*loc_loss\ +cfg.TRAIN.CLS_WEIGHT*cls_loss\ +cfg.TRAIN.SHAPE_WEIGHT*shapeloss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss outputs['shapeloss'] = shapeloss #2 4 1 都用loss2 return outputs
def get_grads(self, cls_feas, loc_feas, label_cls, label_loc, label_loc_weight): cls = [] loc = [] cls_lws, loc_lws = [], [] for idx, (cls_fea, loc_fea) in enumerate(zip(cls_feas, loc_feas), start=2): rpn = getattr(self, 'rpn' + str(idx)) cls_fea = cls_fea.data.detach() cls_fea.requires_grad = True c = F.conv2d(cls_fea, weight=rpn.cls.last_weights.detach(), bias=rpn.cls.last_bias.detach()) loc_fea = loc_fea.data.detach() loc_fea.requires_grad = True l = F.conv2d(loc_fea, weight=rpn.loc.last_weights.detach(), bias=rpn.loc.last_bias.detach()) cls.append(c) loc.append(l) cls_feas[idx - 2] = cls_fea loc_feas[idx - 2] = loc_fea clw = rpn.cls.layer_weight llw = rpn.loc.layer_weight cls_lws.append(clw) loc_lws.append(llw) if self.weighted: cls_weight = F.softmax( self.cls_weight + torch.cat(cls_lws).detach(), 0) loc_weight = F.softmax( self.loc_weight + torch.cat(loc_lws).detach(), 0) if self.weighted: cls, loc = self.weighted_avg(cls, cls_weight), self.weighted_avg( loc, loc_weight) else: cls, loc = self.avg(cls), self.avg(loc) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) loss = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss loss.backward() cls_grads = [] loc_grads = [] for idx, (cls_fea, loc_fea) in enumerate(zip(cls_feas, loc_feas)): cls_grads.append(cls_fea.grad.data.detach() * 10000) loc_grads.append(loc_fea.grad.data.detach() * 10000) return cls_grads, loc_grads
def forward(self, data): """ only used in training """ template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['label_loc'].cuda() label_loc_weight = data['label_loc_weight'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.MASK.MASK: zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) if cfg.RPN.RPN: cls, loc = self.rpn_head(zf, xf) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss else: b, _, h, w = xf.size() cls = F.conv2d(xf.view(1, -1, h, w), zf, groups=b) * 1e-3 + self.backbone.corr_bias cls = cls.transpose(0, 1) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) outputs = {} outputs['total_loss'] = cls_loss if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs
def forward(self, data): """ only used in training """ template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['label_loc'].cuda() label_loc_weight = data['label_loc_weight'].cuda() label = data['label'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.MASK.MASK: zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) if self.use_yolo: loss, loss_cls, loss_l2 = self.rpn_head(zf, xf, label) outputs = {} outputs['total_loss'] = loss outputs['cls_loss'] = loss_cls outputs['loc_loss'] = loss_l2 else: cls, loc = self.rpn_head(zf, xf) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs
def forward(self, data, epsilon): """ only used in training """ template = data['template'] search = data['search'] label_cls = data['label_cls'] label_loc = data['label_loc'] label_loc_weight = data['label_loc_weight'] # get feature zf = self.backbone(template) search = search + 0.1 * self.cn(search) xf = self.backbone(search) if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) import pdb pdb.set_trace() score = self._convert_score(cls) idx = np.argwhere(score < 0.5) lt = score[idx] idx05 = idx[np.argmax(lt)] lt05 = score[idx05] # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {'search': search, 'cls_loss': cls_loss} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['loc_loss'] = loc_loss return outputs
def forward(self, data): """ only used in training, different training set """ template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['label_loc'].cuda() label_loc_weight = data['label_loc_weight'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.MASK.MASK: # only feature map is employed zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: # Downsample Layer zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} # Total loss include classification loss and localization loss outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss if cfg.MASK.MASK: # no mask loss, 3969 * H * W, 256 * H * W mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs
def forward(self, data): """ only used in training 对应到特征图上每个anchor的信息: , , overlap(正样本和所有anchor的IOU) """ #如果不使用gru,对于模板和搜索区域均只在单帧上提取信息 if not cfg.GRU.USE_GRU: template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() #cls(此anchor是正样本:1、负样本:0、忽略:-1 label_loc = data['label_loc'].cuda() #delta(正样本框相对于anchor的编码偏移量 label_loc_weight = data['label_loc_weight'].cuda() #正样本对应的那些anchor的权重,其他位置为0 # get feature zf = self.backbone(template) xf = self.backbone(search) #如果使用gru,模板需要在前t帧中累积提取,搜索区域只在最后一帧中提取 else: zfs = [None] * self.grus.seq_in_len # 多帧模板图z的特征f for i in range(self.grus.seq_in_len): # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg' zfs[i] = self.backbone(data[i]["template"].cuda()) zfs=torch.stack(zfs,dim=1) #将输入变为[n,t,c,h,w]的形式 zf =self.grus(zfs).squeeze() #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式 #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以 xf = self.backbone(data[self.grus.seq_in_len]["search"].cuda()) #-------------------------------特征提取并行化----------------------------------------------------- # batch, _, _, _ = data[0]["template"].shape # zfs = [None] * (self.grus.seq_in_len) # 多帧模板图z的特征f # for i in range(self.grus.seq_in_len): # # 每个data[i]中包含的信息为 'template','search','label_cls','label_loc','label_loc_weight','t_bbox','s_bbox''neg' # zfs[i] = data[i]["template"] # # #连续t个序列在batch层面上并行,加快计算速度 # zfs = self.backbone( torch.cat(zfs,dim=0).cuda()) # zfs =zfs.reshape(self.grus.seq_in_len, batch, self.grus.input_channels, self.grus.input_height, self.grus.input_width) # zfs =zfs.permute(1, 0, 2, 3, 4).contiguous() # # # zf =self.grus(zfs).squeeze() #grus输出为[n,1,c,h,w]的形式转化为【n,c,h,w】的形式 # # #搜索区域只需要取模板序列组输入完成后的下一帧搜索区域图像就可以 # xf = self.backbone(data[self.grus.seq_in_len]["search"].cuda()) # ------------------------------------------------------------------------------------ # 标签信息的提取方式和搜索区域的提取保持同步 label_cls = data[self.grus.seq_in_len]['label_cls'].cuda() #cls(此anchor是正样本:1、负样本:0、忽略:-1 label_loc = data[self.grus.seq_in_len]['label_loc'].cuda() #delta(正样本框相对于anchor的编码偏移量 label_loc_weight = data[self.grus.seq_in_len]['label_loc_weight'].cuda() #正样本对应的那些anchor的权重,其他位置为0 if cfg.MASK.MASK: #siamese mask zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: #siamese rpn++ zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) #rpn相关计算 # get loss cls_log = self.log_softmax(cls) #softmax之后在log,将【0,1】之间的概率拉到【-inf,0】之间,后面紧接着的应该使用nlloss, 其中softmax+log+nllloss 等价于CrossEntropyLoss,这里之所以要拆解开的原因是我们需要按照anchor的mask来计算损失 cls_loss = select_cross_entropy_loss(cls_log, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss*cfg.GRU.NONE_GRU_LR_COFF outputs['loc_loss'] = loc_loss*cfg.GRU.NONE_GRU_LR_COFF # 是否计算GRU预测特征的损失 if cfg.GRU.FeatLoss: zf_gt = self.backbone(data[self.grus.seq_in_len]["template"].cuda()) feat_loss=weight_feat_loss(zf, zf_gt, data[self.grus.seq_in_len]["t_bbox"]) outputs['total_loss'] += cfg.TRAIN.FEAT_WEIGHT * feat_loss outputs['feat_loss'] =feat_loss #传出去tensorboard监视看 outputs['zf_gt'] = zf_gt outputs['zf'] = zf outputs['zfs'] = zfs if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss*cfg.GRU.NONE_GRU_LR_COFF if data[0]['iter']%cfg.TRAIN.ShowPeriod==0: #截断梯度,只取数据 locd = loc.detach() clsd = cls.detach() outputs['box_img'] =self.show_result(clsd, locd, data[self.grus.seq_in_len]["search"]) return outputs
def forward(self, data): """ only used in training """ if cfg.TRACK.TYPE == 'SiamCARTracker': template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['bbox'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) features = self.xcorr_depthwise(xf[0], zf[0]) for i in range(len(xf) - 1): features_new = self.xcorr_depthwise(xf[i + 1], zf[i + 1]) features = torch.cat([features, features_new], 1) features = self.down(features) cls, loc, cen = self.car_head(features) locations = compute_locations(cls, cfg.TRACK.STRIDE) cls = self.log_softmax(cls) cls_loss, loc_loss, cen_loss = self.loss_evaluator( locations, cls, loc, cen, label_cls, label_loc) # get loss outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss + cfg.TRAIN.CEN_WEIGHT * cen_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss outputs['cen_loss'] = cen_loss return outputs else: template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['label_loc'].cuda() label_loc_weight = data['label_loc_weight'].cuda() # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.MASK.MASK: zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) cls, loc = self.rpn_head(zf, xf) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) outputs = {} outputs['total_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs
def forward(self, data): """ only used in training """ template = data['template'].cuda() search = data['search'].cuda() label_cls = data['label_cls'].cuda() label_loc = data['label_loc'].cuda() label_loc_weight = data['label_loc_weight'].cuda() bbox = data['bbox'] # get feature zf = self.backbone(template) xf = self.backbone(search) if cfg.MASK.MASK: zf = zf[-1] self.xf_refine = xf[:-1] xf = xf[-1] if cfg.ADJUST.ADJUST: zf = self.neck(zf) xf = self.neck(xf) if cfg.LATENT: cls_features, loc_features = self.rpn_head(zf, xf) if cfg.LATENTS.NEW_LABEL: label_cls = self.get_new_label_cls(cls_features, loc_features, label_cls) kl = self.rpn_head.update_weights(cls_features, loc_features, label_cls) cls, loc = self.rpn_head.get_cls_loc(cls_features, loc_features) else: cls, loc = self.rpn_head(zf, xf) # get loss cls = self.log_softmax(cls) cls_loss = select_cross_entropy_loss(cls, label_cls) loc_loss = weight_l1_loss(loc, label_loc, label_loc_weight) features = {} features['cls'] = cls_features features['loc'] = loc_features outputs = {} outputs['inner_loop_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss# + 0.001*kl outputs['cls_loss'] = cls_loss outputs['loc_loss'] = loc_loss if cfg.LATENTS: outputs['out_loop_loss'] = cfg.TRAIN.CLS_WEIGHT * cls_loss + \ cfg.TRAIN.LOC_WEIGHT * loc_loss if cfg.MASK.MASK: # TODO mask, self.mask_corr_feature = self.mask_head(zf, xf) mask_loss = None outputs['total_loss'] += cfg.TRAIN.MASK_WEIGHT * mask_loss outputs['mask_loss'] = mask_loss return outputs, features