def forward(self, x, targets=None): img_dim = x.shape[2] loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",") ], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": x, layer_loss = module[0](x, targets, img_dim) loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, x, targets=None): img_dim = x.shape[2] loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": # TODO raw x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",") ], 1) # TODO modify by shortcut # layers = [int(x) for x in module_def['layers'].split(',')] # if len(layers) == 1: # x = layer_outputs[layers[0]] # else: # try: # apply stride 2 for darknet reorg layer # x = torch.cat([layer_outputs[i] for i in layers], 1) # except: # layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5]) # x = torch.cat([layer_outputs[i] for i in layers], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": x, layer_loss = module[0](x, targets, img_dim) loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, x, targets=None): img_dim = x.shape[2] loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def['type'] in ['convolutional', 'upsample', 'maxpool']: x = module(x) elif module_def['type'] == 'route': x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def['layers'].split(',') ], 1) elif module_def['type'] == 'shortcut': layer_i = int(module_def['from']) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def['type'] == 'yolo': x, layer_loss = module[0](x, targets, img_dim) loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, x, targets=None): img_dim = x.shape[2] # 取决于输入图片的大小,因为是正方形输入,所以只考虑height loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",") ], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": # [82, 94, 106] for yolov3 x, layer_loss = module[0]( x, targets, img_dim) # module是nn.Sequential(),所以要取[0] loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) # 将每个块的output都保存起来 yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) # 只保存yolo层的output return yolo_outputs if targets is None else (loss, yolo_outputs)
test_embed1 = DataLoadDf(df_test_embed1, encode_function_label, transform=Compose(trans_embedding)) test_embed_loader1 = DataLoader(test_embed1, batch_size=batch_size_classif, shuffle=False, num_workers=num_workers, drop_last=False) test_df10 = dfs["test10"] test_dl10 = DataLoadDf(test_df10, encode_function_label, transform=Compose(trans_fr_sc_embed)) embed_set10 = "final_test10" test_embed_dir10 = os.path.join(embed_dir, embed_set10) df_test_embed10, _ = calculate_embedding(test_dl10, model_triplet, savedir=test_embed_dir10, concatenate="append") test_embed10 = DataLoadDf(df_test_embed10, encode_function_label, transform=Compose(trans_embedding)) test_embed_loader10 = DataLoader(test_embed10, batch_size=batch_size_classif, shuffle=False, num_workers=num_workers, drop_last=False) model_triplet = to_cpu(model_triplet) classif_model = to_cuda_if_available(classif_model) classif_model.eval() mean_test_results1 = measure_classif(classif_model, test_embed_loader1, classes=classes, suffix_print="test1") mean_test_results10 = measure_classif(classif_model, test_embed_loader10, classes=classes, suffix_print="test10") print(f"Time of the program: {time.time() - t}") from orion.client import report_results report_results( [dict(
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda Support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) # x.size() => torch.Size([1, 3, 13, 13]) grid_size = x.size(2) # 13, 13 prediction = ( # ( )이렇게 감 쌈 그냥 가독성을 위한 것인듯... shape 모양에는 그대로임 x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2) # 내부의 차원의 배치를 바꿀 것이다. .contigous() # 메모리를 연속적으로 할당해준다. 이렇게 하믄 backend에서 효율적으로 동작한다는듯 ) # ( num_samples, self.num_anchors, grid_size, grid_size, self.num_classes + 5 ) # 만약 coco라면 => (1, 3, 13, 13, 85) # Get outputs x = torch.sigmoid(prediction[..., 0]) # => O(tx)이다. 즉, Sigmoid를 씌운 x 좌표 y = torch.sigmoid(prediction[..., 1]) # => O(ty)이다. 즉, Sigmoid를 씌운 y 좌표 w = prediction[..., 2] h = prediction[..., 3] pred_conf = torch.sigmoid(prediction[..., 4]) pred_cls = torch.sigmoid(prediction[..., 5:]) # if grid size does not match current we compute new offsets # 맨처음에는 grid_size가 0이니까 if 안으로 빠진다. if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors # x.data.shape -> [1, 3, 13, 13] 이다. # self.grid_x.shape -> [1, 1, 13, 13] 이다. pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w # pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h # output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) if not obj_mask.any(): total_loss = self.noobj_scale * self.bce_loss( pred_conf[noobj_mask], tconf[noobj_mask]) return output, total_loss # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) tconf = obj_mask.float() loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) logger.info( f"YOLOLayer input: {x.size(0)}, {x.size(1)}, {x.size(2)}, {x.size(3)}" ) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) logger.info( f"After resize, prediction: {prediction.size(0)}, {prediction.size(1)}, {prediction.size(2)}, {prediction.size(3)}, {prediction.size(4)}" ) # Get outputs x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) w = prediction[..., 2] h = prediction[..., 3] pred_conf = torch.sigmoid(prediction[..., 4]) pred_cls = torch.sigmoid(prediction[..., 5:]) # if grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_bboxes = FloatTensor(prediction[..., :4].shape) pred_bboxes[..., 0] = x.data + self.grid_x pred_bboxes[..., 1] = y.data + self.grid_y # 乘scale过的anchor_w, anchor_h pred_bboxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_bboxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat(( pred_bboxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1) logger.info( f"YOLOLayer output: {output.size(0)}, {output.size(1)}, {output.size(2)}\n" ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_bboxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf loss) # 目标框使用 mse loss # 计算loss采用最原始的数值 loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse.loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse.loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # 置信度使用 bce 交叉熵, 有无物体的交叉熵比例贡献不一样 loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj # 分类交叉熵 loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) # 总体损失 坐标损失,置信度损失,分类损失 total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics # cls_acc 不理解??? cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() # detected_mask ??? detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size( 0) # 三个路径x分别为(N, 255, 13, 13),(N, 255, 26, 26),(N, 255, 52, 52) grid_size = x.size(2) # print(x.shape) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size). permute( 0, 1, 3, 4, 2 ) # 交换维度后(N, num_anchors(3), grid_size, grid_size, num_classes + 5(85)) .contiguous() # 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续则返回原tensor ) # (N, 3, 13, 13, 85) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: # iou_scores:标签中有物体的位置地方预测的框与真实的框的IOU # class_mask:标签中有物体的位置地方预测的物体的分类正确率 # obj_mask:指标签中有物体的网格中且与真实框IOU最大的框 # noobj_mask:指标签中有物体的网格中且与真实框IOU最大的框和IOU大于0.5之外的框 # tx, ty, tw, th:标签中检测物体的中心坐标和长宽 # tcls:类别的标签 # tconf:obj_mask.float() iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() # 类别分类的正确率 conf_obj = pred_conf[obj_mask].mean() # 标签中有物体的网格所在位置,预测是否有物体的置信度 conf_noobj = pred_conf[noobj_mask].mean( ) # 标签中没有物体的网格所在位置,预测是否有物体的置信度 conf50 = (pred_conf > 0.5).float() # 预测是否有物体的置信度大于0.5的框 iou50 = (iou_scores > 0.5).float() # 预测的框与真正的框的IOU值>0.5的框 iou75 = (iou_scores > 0.75).float() # 预测的框与真正的框的IOU值>0.7的框 detected_mask = conf50 * class_mask * tconf # 检测到物体和分类总的正确率 precision = torch.sum(iou50 * detected_mask) / ( conf50.sum() + 1e-16) # 精准度 recall50 = torch.sum(iou50 * detected_mask) / ( obj_mask.sum() + 1e-16) # IOU为0.5的召回率 recall75 = torch.sum(iou75 * detected_mask) / ( obj_mask.sum() + 1e-16) # IOU为0.75的召回率 self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def compute_loss(predictions, targets, model): # predictions, targets, model device = targets.device lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros( 1, device=device), torch.zeros(1, device=device) tcls, tbox, indices, anchors = build_targets(predictions, targets, model) # targets hyperparams = model.hyperparams # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss( pos_weight=torch.tensor([1.0], device=device)) BCEobj = nn.BCEWithLogitsLoss( pos_weight=torch.tensor([1.0], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 cp, cn = smooth_BCE(eps=0.0) # Focal loss gamma = 0 #hyperparams['fl_gamma'] # focal loss gamma if gamma > 0: BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma) # Losses balance = [4.0, 1.0, 0.4, 0.1] # P3-P6 for layer_index, layer_predictions in enumerate( predictions): # layer index, layer predictions b, anchor, grid_j, grid_i = indices[ layer_index] # image, anchor, gridy, gridx tobj = torch.zeros_like(layer_predictions[..., 0], device=device) # target obj num_targets = b.shape[0] # number of targets if num_targets: ps = layer_predictions[ b, anchor, grid_j, grid_i] # prediction subset corresponding to targets # Regression pxy = ps[:, :2].sigmoid() * 2. - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[layer_index] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss model.gr = 1 # Objectness tobj[b, anchor, grid_j, grid_i] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type( tobj.dtype) # iou ratio # Classification t = torch.full_like(ps[:, 5:], cn, device=device) # targets t[range(num_targets), tcls[layer_index]] = cp lcls += BCEcls(ps[:, 5:], t) # BCE lobj += BCEobj(layer_predictions[..., 4], tobj) * balance[layer_index] # obj loss lbox *= 0.05 * (3. / 2) lobj *= (3. / 2) lcls *= 0.31 batch_size = tobj.shape[0] # batch size loss = lbox + lobj + lcls return loss * batch_size, to_cpu(torch.cat((lbox, lobj, lcls, loss)))
def forward(self, x, targets=None, img_dim=None, Half=False): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor FloatTensor = torch.cuda.HalfTensor if x.type() == "torch.cuda.HalfTensor" else torch.cuda.FloatTensor # LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor # ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor # 注释说明 # x 是最后一层卷积输出的特征图,在输入图片大小为416×416的前提下 # x[0],x[1],x[2],x[3] = batch size, 255, 13, 13 # x[0],x[1],x[2],x[3] = batch size, 255, 26, 26 # 255 = 3*(4+1+80) 3:我认为是mask的数量,也即每个cell生成的检测框数; 4:检测框坐标; 1:检测框置信度;80:类别数。 # 检测框具体顺序为 Center x,Center y,Width,Height self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) # 注释说明 # prediction 的维度为 batch_size, num_anchors=3, grid_size, grid_size, num_classes + 5(coco:85) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) # permute: 将维度换位 .contiguous() ) # print(prediction.size()) # 注释说明 # Center x,Center y,Conf,Cls pred 用sigmoid函数限定其范围在0-1范围内 # 为什么 w,h 不用限定范围?确实存在 w,h 大于1的是数据 # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf (检测框置信度) pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # print(torch.max(w)) # print(h) # 调试 # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, img_dim, cuda=x.is_cuda, Half=Half) # 注释说明 # pred_box 表示网络预测的框 # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h # print(pred_boxes[..., 2].type()) output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) # print(output.size()) # 注释说明 # target 用来表明是否是训练还是推理 if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) # 注释说明 # loss_conf 正负样本带有各自权重(obj_scale,noobj_scale) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
transform=Compose(list_trans_val)) embed_set10 = "final_test10" test_embed_dir10 = os.path.join(embed_dir, embed_set10) df_test_embed10, _ = calculate_embedding(test_dl10, model, savedir=test_embed_dir10, concatenate="append") test_embed10 = DataLoadDf(df_test_embed10, many_hot_encoder.encode_weak, transform=Compose(trans_embedding)) test_embed_loader10 = DataLoader(test_embed10, batch_size=cfg.batch_size_classif, shuffle=False, num_workers=num_workers, drop_last=False) model = to_cpu(model) classif_model = to_cuda_if_available(classif_model) classif_model.eval() mean_test_results1 = measure_classif(classif_model, test_embed_loader1, classes=classes, suffix_print="test1") mean_test_results10 = measure_classif(classif_model, test_embed_loader10, classes=classes, suffix_print="test10") print(f"Time of the program: {time.time() - t}")
def forward(self, x, targets=None, img_dim=None): FloatTensor = torch.cuda.FloatTensor LongTensor = torch.cuda.LongTensor ByteTensor = torch.cuda.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) # convert predictions # note: NCHW format -> grid_y, grid_x # nx255x13x13 -> nx3x85x13x13 -> nx3x13x13x85 # 85: tx_ctr, ty_ctr, tw, th, objectness, 80 class prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # get and parse outputs x = torch.sigmoid(prediction[..., 0]) # tx_ctr range: (0, 1) # format: [batch_size, anchors, grid_y, grid_x] y = torch.sigmoid(prediction[..., 1]) # ty_ctr range: (0, 1) w = prediction[..., 2] # tw h = prediction[..., 3] # th pred_conf = torch.sigmoid(prediction[..., 4]) # objectness use sigmoid() pred_cls = torch.sigmoid(prediction[..., 5:]) # cls use sigmoid() # format: [batch_size, anchors, grid_y, grid_x, cls] if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x # x_ctr range: (0, 13) pred_boxes[..., 1] = y.data + self.grid_y # y_ctr range: (0, 13) pred_boxes[..., 2] = torch.exp( w.data ) * self.anchor_w # width w.r.t current feature map dimension pred_boxes[..., 3] = torch.exp( h.data ) * self.anchor_h # height w.r.t current feature map dimension # output shape: [1, x, 85] output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, # get (x_ctr, y_ctr, w, h) w.r.t 416x416 pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: # calculate loss # (tx, ty, tw, th): target offset iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, # normalize x_ctr, y_ctr, w, h pred_cls=pred_cls, target=targets, anchors=self. scaled_anchors, # normalize (anchor w, anchor h) w.r. current yolo layer dimension ignore_thres=self.ignore_thres, # 0.5 ) """ test code """ tmp = list(obj_mask.size()) sum = 1 for item in tmp: sum *= item #print ('sum anchors: ', sum) #print ('positive samples: ', list(obj_mask[obj_mask].size())[0]) #print ('negative sample: %d \n' %(list(noobj_mask[noobj_mask].size())[0])) # calculate loss #print ('loss') """ calculate postive samples loss: loc loss + cls loss + obj loss """ # calculate loc loss loss_x = self.mse_loss( x[obj_mask], tx[obj_mask]) # choose positive predict box tx ang target tx* # x size: [batch_size, anchors, grid_y, grid_x] # obj_mask size: [batch_size, anchors, grid_y, grid_x] # tx size: [batch_size, anchors, grid_y, grid_x] # x[obj_mask] size: [14] 14 is number of positive samples loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # calculate cls loss loss_cls = self.bce_loss( pred_cls[obj_mask], tcls[obj_mask]) # pred_cls size: [1, 3, 13, 13, 80] # obj_mask size: [1, 3, 13, 13] # pred_cls[obj_mask] size: [n, 80] # tcls[obj_mask] size: [n, 80] # loss_cls: 1/N * Sum(-(y x logp + (1-y) x log(1-p))) # calculate obj loss loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) # tconf = obj_mask # tconf[obj_mask]: [1, 1, 1, 1, 1 ...] note: just choose 1(target) # pred_conf[obj_mask]: [0.1, 0.12, 0.13 ...] # use binary cross-entropy loss """ calculate negative samples loss: no obj loss """ # calculate no-obj loss loss_conf_noobj = self.bce_loss( pred_conf[noobj_mask], tconf[noobj_mask]) # tconf = obj_mask # obj_mask[noobj_mask]: just choose 0(target) """ loss post-process """ loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj # note: it is unreasonable total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # metrics cls_acc = 100 * class_mask[obj_mask].mean( ) # class_mask[obj_mask] size: [20] 20 is positive samples number conf_obj = pred_conf[obj_mask].mean( ) # pred_conf[obj_mask] size: [20] 20 is positve samples number conf_noobj = pred_conf[noobj_mask].mean( ) # pred_conf[noobj_mask] size: [2000] 2000 is negative samples number conf50 = (pred_conf > 0.5).float() # size: [1, 3, 13, 13] iou50 = (iou_scores > 0.5).float() # size: [1, 3, 13, 13] iou75 = (iou_scores > 0.5).float() # size: [1, 3, 13, 13] detected_mask = conf50 * class_mask * tconf # size: [1, 3, 13, 13] # objectness > 0.5 and predict class is correct precision = torch.sum(iou50 * detected_mask) / ( conf50.sum() + 1e-16) # precision = TP / (TP + FP) # TP: objectness > 0.5 && predict class correct && IOU > 0.5 # TP + FP: objectness > 0.5 recall50 = torch.sum(iou50 * detected_mask) / ( obj_mask.sum() + 1e-16) # recall = TP / (TP + FN) # TP: objectness > 0.5 && predict class correct && IOU > 0.5 # TP + FN : all positive samples(obj_mask) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) #print (grid_size, 'x', grid_size, '-loss: ', to_cpu(total_loss).item(), ' coord loss: ', # to_cpu(loss_x).item() + to_cpu(loss_y).item() + to_cpu(loss_w).item() + to_cpu(loss_h).item(), # ' conf loss: ', to_cpu(loss_conf).item(), ' cls loss: ', to_cpu(loss_cls).item()) self.metrics = { "grid_size": grid_size, "loss": to_cpu(total_loss).item(), "loss-tx": to_cpu(loss_x).item(), "loss-ty": to_cpu(loss_y).item(), "loss-tw": to_cpu(loss_w).item(), "loss-th": to_cpu(loss_h).item(), "loss-conf": to_cpu(loss_conf).item(), "loss-cls": to_cpu(loss_cls).item(), "loss-obj": to_cpu(loss_conf_obj).item(), "loss-noobj x scale": to_cpu(loss_conf_noobj * self.noobj_scale).item(), "loss-noobj": to_cpu(loss_conf_noobj).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), } #print (self.metrics) self.noobj_scale = 100000 return output, total_loss
def forward(self, x, target = None, img_dim = None): FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) # todo 这个size为什么是输入的宽高维度呢 prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .premute(0,1,3,4,2) # todo .contiguous() # todo ) # get output x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) w = prediction[..., 2] h = prediction[..., 3] pred_conf = torch.sigmoid(prediction[..., 4]) pred_cls = torch.sigmoid(prediction[..., 5]) # if the grid size dose not match current we compute new offset if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if target is None: return output, 0 else: iou_score, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes = pred_boxes, pred_cls = pred_cls, target = targets, anchors=self.scaled_anchors, ignore_thres= self.ignore_thres. ) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tconf[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_score > 0.5 ).float() iou75 = (iou_score > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-15) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) # reshape input torch to num_samples * num_anchors * (num_classes + 4) * grid_size^2 # permute prediction torch into num_samples * num_anchors * grid_size^2 * (num_classes + 4) # modify # only 4 parameters to be learned, so num_classes+5 => num_classes + 4 prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 4, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs # modify # reduce one channel for height x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y d = prediction[..., 2] # diameter # h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 3]) # Conf pred_cls = torch.sigmoid(prediction[..., 4:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors # modify # only adjust d pred_boxes = FloatTensor(prediction[..., :3].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(d.data) * self.anchor_w # pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 3) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: #modify build_target function to calculate new IOU for circle and rectangle #here tw is used as td iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) # modify, loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) # pick loss_w as loss_d and stop using loss_h loss_d = self.mse_loss(d[obj_mask], tw[obj_mask]) # loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + 0.5 * loss_d + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "d": to_cpu(loss_d).item(), # "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 8, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs # (u, v) Projected points on image plane u = torch.sigmoid(prediction[..., 0]) v = torch.sigmoid(prediction[..., 1]) # Z in the 3D coordinates Z = prediction[..., 2] # (Qw + Qx * i + Qy * j + Qz * k) Quaternion Qw = prediction[..., 3] Qx = prediction[..., 4] Qy = prediction[..., 5] Qz = prediction[..., 6] pred_conf = torch.sigmoid(prediction[..., 7]) # Conf pred_cls = torch.sigmoid(prediction[..., 8:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_uvZQ = FloatTensor(prediction[..., :7].shape) pred_uvZQ[..., 0] = u.data + self.grid_x pred_uvZQ[..., 1] = v.data + self.grid_y pred_uvZQ[..., 2] = Z.data pred_uvZQ[..., 3] = torch.sigmoid(Qw.data) # * self.anchor_Qw pred_uvZQ[..., 4] = torch.tanh(Qx.data) # * self.anchor_Qx pred_uvZQ[..., 5] = torch.tanh(Qy.data) # * self.anchor_Qy pred_uvZQ[..., 6] = torch.tanh(Qz.data) # * self.anchor_Qz output = torch.cat( ( pred_uvZQ[..., :2].view(num_samples, -1, 2) * self.stride, pred_uvZQ[..., 2:].view(num_samples, -1, 5), pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: z_scores, class_mask, obj_mask, noobj_mask, tu, tv, tZ, tQw, tQx, tQy, tQz, tcls, tconf = build_targets( pred_uvZQ=pred_uvZQ, pred_cls=pred_cls, target=targets, anchors=self.anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_u = 10 * self.mse_loss(u[obj_mask], tu[obj_mask]) loss_v = 10 * self.mse_loss(v[obj_mask], tv[obj_mask]) loss_Z = 10 * self.mse_loss(Z[obj_mask], tZ[obj_mask]) loss_Qw = self.mse_loss(Qw[obj_mask], tQw[obj_mask]) loss_Qx = self.mse_loss(Qx[obj_mask], tQx[obj_mask]) loss_Qy = self.mse_loss(Qy[obj_mask], tQy[obj_mask]) loss_Qz = self.mse_loss(Qz[obj_mask], tQz[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_u + loss_v + loss_Z + loss_Qw + loss_Qx + loss_Qy + loss_Qz + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() z5 = (z_scores < 0.5).float() z05 = (z_scores < 0.05).float() detected_mask = conf50 * class_mask * tconf recall5 = torch.sum(z5 * detected_mask) / (obj_mask.sum() + 1e-16) recall05 = torch.sum(z05 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "u": to_cpu(loss_u).item(), "v": to_cpu(loss_v).item(), "Z": to_cpu(loss_Z).item(), "Qw": to_cpu(loss_Qw).item(), "Qx": to_cpu(loss_Qx).item(), "Qy": to_cpu(loss_Qy).item(), "Qz": to_cpu(loss_Qz).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall5": to_cpu(recall5).item(), "recall05": to_cpu(recall05).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): print("^" * 30) print("yolo layer input: ", x.shape) print("targets: ", targets.shape) print("img_dim: ", img_dim) # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor #输入到模型时图片的尺寸 self.img_dim = img_dim num_samples = x.size(0) #特征图尺寸 grid_size = x.size(2) # 对x的操作 #(num_samples, 255, 13, 13)->(num_samples, 3, 80+5, 13, 13)->(num_samples, 3, 13, 13, 80+5) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs # last dimension column 1 = tensor[...,0] x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) w = prediction[..., 2] h = prediction[..., 3] pred_conf = torch.sigmoid(prediction[..., 4]) pred_cls = torch.sigmoid(prediction[..., 5:]) # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors # this part is related to the bounding box. ?????????????? pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp( w.data) * self.anchor_w # why exp? 这是yolo v3论文中的公式,用这个来做预测值 pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h # print(self.stride) output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, # why does it(x, y, w, h) mult self.stride pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) # 对targets的操作 if targets is None: return output, 0 else: #此函数用于将模型输入的target转化成用于计算loss的target,应当熟悉其逻辑,这也可能是目标检测算法对标签数据处理的通用逻辑 iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # ******************************** # 如果不类型转换,会报警告 obj_mask = obj_mask.bool() noobj_mask = noobj_mask.bool() # ******************************** # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # 计算总损失 以及 预测结果outputs targets为真实边界框 用于计算ap recall等 # Tensors for cuda support # FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim # 图片尺寸 num_samples = x.size(0) # (img_batch) grid_size = x.size(2) # (feature_map_size) # x.shape = tensor([batch_size,num_anchors*(num_classes+5),grid_size,grid_size]) # (batch_size, 255, grid_size, grid_size) # x就是最终输出的预测结果 255 = (80 + 4 + 1)* 3 # 13*13*255 prediction = (x.view(num_samples, self.num_anchors, 5 + self.num_classes, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # print prediction.shape (batch_size, num_anchors, grid_size, grid_size, 85) # Get outputs # 这里的prediction是初步的所有预测,在grid_size*grid_size个网格中,它表示每个网格都会有num_anchor(3)个anchor框 # x,y,w,h, pred_conf的shape都是一样的 (batch_size, num_anchor, gride_size, grid_size) x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf置信度 pred_cls = torch.sigmoid( prediction[..., 5:] ) # Cls pred. (batch_size, num_anchor, gride_size, grid_size, cls) # If grid size does not match current we compute new offsets # print grid_size, self.grid_size if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # print self.grid_x, self.grid_y, self.anchor_w, self.anchor_h # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) # 这里是创建一个同等shape的tensor # 针对每个网格的偏移量,每个网格的单位长度为1,而预测的中心点(x,y)是归一化的(0,1之间),所以可以直接相加 # 广播机制 pred_boxes[ ..., 0] = x.data + self.grid_x # (batch_size, 1, gride_size, gride_size) # pred_boxes.shape = tensor.size([1,3,13,13]) # 详细解析上一步是什么意思,首先看维度 x的维度13*13*1 什么意思 就是每个网格中都包含一个预测的x值 # 那么距离左上角的距离就是 第一个网格左上角就是整个的左上角所以 +0 以此类推 +1 +2 +3 ... pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w # # (1,3,1,1) pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h # anchor_w 是预先设定的anchor尺寸 w.data是预测的边界框的宽 # 0 , 1 是指预测的中心点相对于图片左上角的偏移量 # pred_boxes.shape = tensor.size([batch_size, num_anchors,grid_size,grid_size, 4]) output = torch.cat( ( # (batch_size, num_anchors*grid_size*grid_size, 4) pred_boxes.view(num_samples, -1, 4) * self.stride, # 放大到最初输入的尺寸 # (batch_size, num_anchors*grid_size*grid_size, 1) pred_conf.view(num_samples, -1, 1), # (batch_size, num_anchors*grid_size*grid_size, 80) pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) # output.shape = tensor.size([batch_size, num_anchors*grid_size*grid_size, 85]) if targets is None: # targets 是指ground truth return output, 0 # 计算loss else: # pred_boxes => (batch_size, anchor_num, gride, gride, 4) # pred_cls => (batch_size, anchor_num, gride, gride, 80) # targets => (num, 6) 6=>(batch_index, cls, center_x, center_y, widht, height) # scaled_anchors => (3, 2) # print pred_boxes.shape, pred_cls.shape, targets.shape, self.scaled_anchors.shape iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # # iou_scores:预测框pred_boxes中的正确框与目标实体框target_boxes的交集IOU,以IOU作为分数,IOU越大,分值越高. # class_mask:将预测正确的标记为1(正确的预测了实体中心点所在的网格坐标,哪个anchor框可以最匹配实体,以及实体的类别) # obj_mask:将目标实体框所对应的anchor标记为1,目标实体框所对应的anchor与实体一一对应的 # noobj_mask:将所有与目标实体框IOU小于某一阈值的anchor标记为1 # tx, ty, tw, th: 需要拟合目标实体框的坐标和尺寸 # tcls:目标实体框的所属类别 # tconf:所有anchor的目标置信度 # 这里计算得到的iou_scores,class_mask,obj_mask,noobj_mask,tx, ty, tw, th和tconf都是(batch, anchor_num, gride, gride) # 预测的x,y,w,h,pred_conf也都是(batch, anchor_num, gride, gride) # tcls 和 pred_cls 都是(batch, anchor_num, gride, gride,num_class) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) # 坐标和尺寸的loss计算: loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # anchor置信度的loss计算: loss_conf_obj = self.bce_loss( pred_conf[obj_mask], tconf[obj_mask]) # tconf[obj_mask] 全为1 loss_conf_noobj = self.bce_loss( pred_conf[noobj_mask], tconf[noobj_mask]) # tconf[noobj_mask] 全为0 loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj # 类别的loss计算 loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) # loss汇总 total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics 指标 cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf obj_mask = obj_mask.float() # print type(iou50), type(detected_mask), type(conf50.sum()), type(iou75), type(obj_mask) # # print iou50.dtype, detected_mask.dtype, conf50.sum().dtype, iou75.dtype, obj_mask.dtype precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None): img_dim = x.shape[2] layer_outputs, yolo_outputs = {}, [] loss = 0 x = self.conv1(x) x = self.maxpool(x) layer_outputs["stage_0"] = x for (i, stage_name) in enumerate(self.stages): if stage_name == "yolo1_conv1": feature_layer = self.__getattr__(stage_name) x = feature_layer(layer_outputs["conv5"]) elif stage_name == "yolo1_conv2": feature_layer = self.__getattr__(stage_name) x = feature_layer(layer_outputs["yolo1_conv1"]) elif stage_name == "yolo1_conv3": feature_layer = self.__getattr__(stage_name) x = feature_layer(layer_outputs["yolo1_conv2"]) elif stage_name == "yolo1_detection": feature_layer = self.__getattr__(stage_name) #print(feature_layer) x, layer_loss = feature_layer(layer_outputs["yolo1_conv3"], targets, img_dim) loss += layer_loss yolo_outputs.append(x) elif stage_name == "yolo2_route1": x = layer_outputs["yolo1_conv1"] #print("yolo2_route1:",x.shape) elif stage_name == "yolo2_conv1": feature_layer = self.__getattr__(stage_name) x = feature_layer(layer_outputs["yolo2_route1"]) #print("yolo2_conv1:",x.shape) elif stage_name == "yolo2_upsample": feature_layer = self.__getattr__(stage_name) x = feature_layer(layer_outputs["yolo2_conv1"]) #print("yolo2_upsample:",x.shape) elif stage_name == "yolo2_route2": input1 = layer_outputs["stage_2"] input2 = layer_outputs["yolo2_upsample"] #print("yolo2_route2:",input1.shape,input2.shape) x = torch.cat((input1, input2), 1) elif stage_name == "yolo2_conv2": feature_layer = self.__getattr__(stage_name) #print(feature_layer) x = feature_layer(layer_outputs["yolo2_route2"]) elif stage_name == "yolo2_conv3": feature_layer = self.__getattr__(stage_name) #print(feature_layer) x = feature_layer(layer_outputs["yolo2_conv2"]) elif stage_name == "yolo2_detection": feature_layer = self.__getattr__(stage_name) #print(feature_layer) x, layer_loss = feature_layer(layer_outputs["yolo2_conv3"], targets, img_dim) loss += layer_loss yolo_outputs.append(x) else: feature_layer = self.__getattr__(stage_name) #print(stage_name,feature_layer) x = feature_layer(x) layer_outputs[stage_name] = x yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, pred, targets=None): if targets is not None: # yolo层的输入是特征图,即pred是特征图,维度是样本数量*((5+类别数量)*3)*13*13 # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if pred.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if pred.is_cuda else torch.LongTensor num_samples = pred.size(0) # 样本数量 import math grid_size = int(math.sqrt(pred.size(2) / self.num_anchors)) # 此时网格尺寸应该是13或26或52 self.grid_size = grid_size self.stride = self.img_dim / grid_size prediction = ( pred.view( num_samples, self.num_anchors, grid_size, grid_size, self.num_classes + 5) # pred的维度是n*(self.num_anchors*grid_size*grid_size)*85 .contiguous() # 当调用contiguous()时,会强制拷贝一份tensor,让它的布局和从头创建的一模一样,但是两个tensor完全没有联系。 ) self.grid_x = FloatTensor([i for j in range(self.grid_size) for i in range(self.grid_size)]) \ .view([1, 1, self.grid_size, self.grid_size]) self.grid_y = FloatTensor([j for j in range(self.grid_size) for i in range(self.grid_size)]) \ .view([1, 1, self.grid_size, self.grid_size]) pred_boxes = prediction[..., 0:4] pred_x = (pred_boxes[..., 0] / self.stride - self.grid_x + 0.5) / 2 pred_y = (pred_boxes[..., 1] / self.stride - self.grid_y + 0.5) / 2 pred_w = pred_boxes[..., 2] pred_h = pred_boxes[..., 3] scaled_anchors = FloatTensor([ (a_w, a_h) for a_w, a_h in self.anchors ]) # 对Anchor的坐标位置进行缩放 anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) pred_w = torch.sqrt(pred_w / anchor_w) / 2 pred_h = torch.sqrt(pred_h / anchor_h) / 2 pred_conf = prediction[..., 4] pred_cls = prediction[..., 5:] # 接下来的代码主要是为了进行性能评估 scaled_anchors = FloatTensor([ (a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors ]) # 对Anchor的坐标位置进行缩放 iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=scaled_anchors, ignore_thres=self.ignore_thres, stride=self.stride, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(pred_x[obj_mask == 1], tx[obj_mask == 1]) loss_y = self.mse_loss(pred_y[obj_mask == 1], ty[obj_mask == 1]) loss_w = self.mse_loss(pred_w[obj_mask == 1], tw[obj_mask == 1]) loss_h = self.mse_loss(pred_h[obj_mask == 1], th[obj_mask == 1]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask == 1], obj_mask[obj_mask == 1]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask == 1], obj_mask[noobj_mask == 1]) loss_conf = loss_conf_obj + 100 * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask == 1], tcls[obj_mask == 1]) total_loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \ loss_w * self.lambda_wh + loss_h * self.lambda_wh + \ loss_conf * self.lambda_conf + loss_cls * self.lambda_cls # print("loss_x: ", loss_x.detach().to("cpu").item()) # print("loss_y: ", loss_y.detach().to("cpu").item()) # print("loss_w: ", loss_w.detach().to("cpu").item()) # print("loss_h: ", loss_h.detach().to("cpu").item()) # print("loss_conf: ", loss_conf_noobj.detach().to("cpu").item()) # print("loss_conf: ", loss_conf_obj.detach().to("cpu").item()) # print("loss_cls: ", loss_cls.detach().to("cpu").item()) # Metrics # 对模型的评估 cls_acc = 100 * class_mask[obj_mask == 1].mean() conf_obj = pred_conf[obj_mask == 1].mean() conf_noobj = pred_conf[noobj_mask == 1].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask precision = torch.sum(iou50 * detected_mask) / ( conf50.sum() + 1e-16) # 精确率 recall50 = torch.sum(iou50 * detected_mask) / ( obj_mask.sum() + 1e-16) # 召回率 recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return pred, total_loss else: return pred
optimizer.step() # Reset gradients optimizer.zero_grad() # ############ # Log progress # ############ log_str = "" log_str += AsciiTable( [ ["Type", "Value"], ["IoU loss", float(loss_components[0])], ["Object loss", float(loss_components[1])], ["Class loss", float(loss_components[2])], ["Loss", float(loss_components[3])], ["Batch loss", to_cpu(loss).item()], ]).table if batch_i % print_frequency == 0: if verbose: log_and_print(log_file, f'\nEpoch: {epoch}; Batch: {batch_i}; images:{batch_i * batch_size}; {format_timelapsed(time() - start)} elapsed') log_and_print(log_file, f'Learning Rate: {lr}') log_and_print(log_file, log_str) # Tensorboard logging ''' tensorboard_log = [ ("train/iou_loss", float(loss_components[0])), ("train/obj_loss", float(loss_components[1])), ("train/class_loss", float(loss_components[2])),
def forward(self, x, targets=None, img_dim=None): # print('hahaha',x.shape) # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor # 输入图像大小 self.img_dim = img_dim # N,C,H,W # 几个样本 num_samples = x.size(0) # 目前样本的尺寸 grid_size = x.size(2) # print('raw x shape {}'.format(x.shape)) # print('x view shape {}'.format((num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size))) ''' reshape一下, [num_samples,num_anchors,grid_size,grid_size,num_class+5] ''' prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) ''' 这个...表示取最里面那个num_class+5这个维度的 x,y是bbox相对于当前cell的偏移量 w,h是bbox的w,h相对于anchors(在当前feature_map下)的log值 ''' # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # print('heihei',pred_cls.shape) # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # print(self.grid_x) # print(self.grid_y) ''' 将tx,ty,tw,th恢复成bbox的坐标 ''' # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: ''' 这个targets,是一个【n,6】的张量 [第几张图,0,cx,cy,dw,dh] obj_mask包含的是和anchors的IOU最大的一批数据 noobj_mask包含的是除去IOU超过阈值的一批数据 ''' import time # print(pred_boxes.shape) # print(pred_cls.shape) # print(targets.shape) # # print('stop here') # time.sleep(1000) iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) obj_mask = obj_mask.bool() # convert int8 to bool noobj_mask = noobj_mask.bool() # convert int8 to bool # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) ''' loss由三部分组成: 1、(有物体在的cell && 被选中的anchors)对应的tx,ty,tw,th误差 2、(有物体在的cell && 被选中的anchors)对应的前背景分类误差 3、(没物体在的cell && 被选中的anchors)对应的前背景分类误差 4、(有物体在的cell && 被选中的anchors)对应的类别分类误差 ''' # 第一部分 loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # 第二部分 loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) # 第三部分 loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) # 按照不同比例组合 loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj # 第四部分 loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensores para soporte cuda FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # obtener salidas x = torch.sigmoid(prediction[..., 0]) # centro de x y = torch.sigmoid(prediction[..., 1]) # centro de y w = prediction[..., 2] # ancho h = prediction[..., 3] # largo pred_conf = torch.sigmoid(prediction[..., 4]) # configuracion pred_cls = torch.sigmoid(prediction[..., 5:]) # predicciones # Si el tamaño de la cuadrícula no coincide con el actual, calculamos nuevas compensaciones if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Agregue desplazamiento y escala con anclajes pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Pérdida: enmascara las salidas para ignorar objetos no existentes (excepto con pérdida de configuración) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # metricas cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # x.shape: b x 255 x 13 x 13 (anchor 6, 7, 8) # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) # batch size grid_size = x.size(2) # feature map size: 13, 26, 52 # initially, self.grid_size = 0 prediction = ( # b, 3, 85, 13, 13 x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) # b, 3, 13, 13, 85 .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs # the x,y,w,h corresponds to the pink circle in slides (generated directly from network) x = torch.sigmoid(prediction[..., 0]) # Center x # (b,3,13,13) # 1 + y = torch.sigmoid(prediction[..., 1]) # Center y # (b,3,13,13) # 1 + w = prediction[..., 2] # Width # (b,3,13,13) # 1 + h = prediction[..., 3] # Height # (b,3,13,13) # 1 + pred_conf = torch.sigmoid(prediction[..., 4]) # Conf (b,3,13,13) # 1 + = 5 + pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. (b,3,13,13,80) # 80 = 85 # Initially, self.grid_size = 0 != 13, then 13 != 26, then 26 != 52 # Each time, if former grid size does not match current one, we need to compute new offsets # 作用: # 1. 针对不同size的feature map (13x13, 26x26, 52x52), 求出不同grid的左上角坐标 # 2. 将(0, 416)范围的anchor scale到(0, 13)的范围 # if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # self.grid_x: # self.grid_y: # tensor([[[[0,1,2,...,12], # tensor([[[[0,0,0,...,0], # [0,1,2,...,12], # [1,1,1,...,1], # ... # ... # [0,1,2,...,12]]]]) # [12,12,12,...,12]]]]) # shape=torch.Size([1, 1, 13, 13]) # shape=torch.Size([1, 1, 13, 13]) # # # self.anchor_w: shape([1, 3, 1, 1]) # self.anchor_h: shape([1, 3, 1, 1]) # tensor([ # tensor([ # [ # [ # [[3.625]], # [[2.8125]], # [[4.8750]], # [[6.1875]], # [[11.6562]] # [[10.1875]] # ] # ] # ]) # ]) # Add offset and scale with anchors # 请回想/对照slides中的等式,是目前绝大部分靠回归offset的方法通行的策略 # x, y, w, h即上文中prediction, 此部分是直接由网络predict出来的, xy经过sigmoid强制到(0,1) # grid_xy是grid的左上角坐标[0,1,...,12], # 所以xy+grid_xy就是将pred结果(即物体中心点)分布到每个grid中去,(0, 13) # # 对于wh,由于prediction的结果直接是log()后的(如果忘记,请回看slides),所以此处要exp # # 此时,所有pred_boxes都是(0,13)范围的 # These preds are final outpus for test/inference which corresponds to the blue circle in slides # This procedure could also be called as Decode # # 通常情况下,单纯的preds并不参与loss的计算,而只是作为最终的输出存在, # 但是这里依然计算,并在build_targets函数中出现,其目的,在于协助产生mask pred_boxes = FloatTensor(prediction[..., :4].shape) # (b, 3, 13, 13, 4) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( # * stride(=32对于13x13),目的是将(0, 13)的bbox恢复到(0, 416) pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: # iou_scores: [b, num_anchor, grid_size, grid_size] -> pred_boxes与ground_truth的IoU # class_mask: [b, num_anchor, grid_size, grid_size], 预测正确的class 为true # obj_mask : [b, num_anchor, grid_size, grid_size] -> 1: 一定是正样本落在的地方(b_id, anchor_id, i, j) # -> 0: 一定不是正样本落在的地方 # noobj_mask: [b, num_anchor, grid_size, grid_size] -> 1: 一定是负样本落在的地方 # -> 0: 不一定是正样本落在的地方,也可能是不参与计算 # 体现了ignore_thres的价值。>ignore的,都不参与计算 # 底下是,算出来的,要参与产生loss的真实target.(除了tcls) # The procedure to generate those t·, corresponding to the gray circle in slides, can be called as Encode # tx: [b, num_anchor, grid_size, grid_size] # ty: [b, num_anchor, grid_size, grid_size] # tw: [b, num_anchor, grid_size, grid_size] # th: [b, num_anchor, grid_size, grid_size] # tcls :[b, num_anchor, grid_size, grid_size, n_classes] # iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, # (b, 3, 13, 13, 4) pred_cls=pred_cls, # (b, 3, 13, 13, 80) target=targets, # (n_boxes, 6) [details in build_targets function] anchors=self.scaled_anchors, # (3, 2) 3个anchor,每个2维 ignore_thres=self.ignore_thres, # 0.5 (hard code in YOLOLayer self.init()) ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) # 可以看到,真正参与loss计算的,仍然是·与t·,即offset regress # Reg Loss loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) # Conf Loss # 因为这里conf选择的是bce_loss,因为对于noobj,基本都能预测对,所以loss_conf_noobj通常比较小 # 所以此时为了平衡,noobj_scale往往大于obj_scale, (100, 1) # 实际上,这里的conf loss就是做了个0-1分类,0就是noobj, 1就是obj loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj # Class Loss loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) # Total Loss total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() # class_mask/obj_mask(b, 3, 13, 13) # 正确率 conf_obj = pred_conf[obj_mask].mean() # 有物体的平均置信度 conf_noobj = pred_conf[noobj_mask].mean() # 无物体的平均置信度 conf50 = (pred_conf > 0.5).float() # 置信度大于0.5的位置 (b, num_anchor, 13, 13) iou50 = (iou_scores > 0.5).float() # iou大于0.5的位置 (b, num_anchor, 13, 13) iou75 = (iou_scores > 0.75).float() # iou大于0.75的位置 (b, num_anchor, 13, 13) detected_mask = conf50 * class_mask * tconf # tconf=obj_mask, 即:既是预测的置信度>0.5,又class也对,又是obj precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def yolo_loss(x, y, w, h, xdir, ydir, pred_boxes, pred_conf, pred_cls, targets, scaled_anchors, ignore_thres, clf_criterion, reg_criterion, obj_scale, noobj_scale, regr_weights, grid_size1): iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, txdir, tydir, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=scaled_anchors, ignore_thres=ignore_thres, ) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = reg_criterion(x[obj_mask], tx[obj_mask]) loss_y = reg_criterion(y[obj_mask], ty[obj_mask]) loss_w = reg_criterion(w[obj_mask], tw[obj_mask]) loss_h = reg_criterion(h[obj_mask], th[obj_mask]) # Loss: rotations loss_xdir = reg_criterion(xdir[obj_mask], txdir[obj_mask]) loss_ydir = reg_criterion(ydir[obj_mask], tydir[obj_mask]) weights = (noobj_scale, obj_scale) loss_conf = focal_loss(pred_conf, tconf, weights) #print(obj_scale, '*', loss_conf_obj, '+', noobj_scale, '*', loss_conf_noobj ) if pred_cls is not None: loss_cls = clf_criterion(pred_cls[obj_mask], tcls[obj_mask]) else: loss_cls = torch.tensor(0, device=device) total_loss = regr_weights * (loss_x + loss_y + loss_w + loss_h + loss_xdir + loss_ydir) +\ loss_conf + loss_cls # Metrics if loss_cls == 0: cls_acc = torch.tensor(0, device=device) else: cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size1, 'rotation': to_cpu(loss_xdir + loss_ydir).item() } return total_loss, metrics
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + self.num_angles + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height #a = torch.remainder((prediction[..., 4]*180/np.pi) + 180, 180)*np.pi/180 # Angle pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:5+self.num_classes]) # Cls pred. pred_angle_cls = torch.sigmoid(prediction[..., 5+self.num_classes: ]) # Angle Cls pred # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h #pred_boxes[..., 4] = a.data #print("Theta predictions: ", pred_boxes.view(num_samples, -1, 4).size(),pred_boxes[...,4].size(),pred_conf.size()) #print("Target SIZE: ",targets.size()) #print("pred boxes: ",pred_boxes[...,:4]) output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, #pred_boxes[...,4].view(num_samples,-1,1), pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), pred_angle_cls.view(num_samples, -1, self.num_angles) ), -1, ) #print(pred_boxes, targets) if targets is None: return output, 0 else: iou_scores, class_mask,angle_mask, obj_mask, noobj_mask, tx, ty, tw, th,tacls, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, pred_angle_cls = pred_angle_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) weights = torch.tensor([1.12424274,13.3361754, 75.7716263, 50.10983982, 61.6845070, 71.0974026, 73.73063973, 22.52880658 , 8.14052045, 5.87707998, 25.49243306, 10.36837121, 26.4468599, 77.92882562, 100.44954128, 82.9469697, 35.20578778, 8.97826978, 1.]).type(FloatTensor) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_angle = nn.BCELoss(reduction='none')(pred_angle_cls[obj_mask],tacls[obj_mask]) loss_conf_angle = loss_conf_angle*weights/100 loss_conf_angle = loss_conf_angle.mean() #loss_conf_angle = self.bce_loss(pred_angle_cls[obj_mask],tacls[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls + loss_conf_angle # Metrics cls_acc = 100 * class_mask[obj_mask].mean() angle_acc = 100 * angle_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "angle_acc": to_cpu(angle_acc).item(), "angle":to_cpu(loss_conf_angle).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=None): # print('yolo input shape {}'.format(x.shape)) # [8, 255, 13, 13] # [8, 255, 26, 26] # [8, 255, 52, 52] # 255 = n_anchors*(5+n_classes) = 3*85 ''' anchors = [(116, 90), (156, 198), (373, 326)] num_classes = 80 yolo_layer = YOLOLayer(anchors, num_classes) grid_size = 13 yolo_layer.compute_grid_offsets(grid_size) x = torch.rand([8, 255, grid_size, grid_size]).cuda() yolo_layer.forward(x, targets=targets) num_samples=8 self = yolo_layer ''' # Tensors for cuda support, fixme # import pdb; pdb.set_trace() device_id = x.device.index FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor BoolTensor = torch.cuda.BoolTensor if x.is_cuda else torch.BoolTensor self.img_dim = img_dim num_samples = x.size(0) # 8 grid_size = x.size(2) # # 13 prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous() ) # bs, 3, 85, 13, 13 # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda, device_id=device_id) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape, device=device_id) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) # [bs, num_bb_by_each_grid_cell*grid_cell*grid_cell, num_classes] if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) # self.register_buffer('metrics', None) # fixme self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss, self.metrics
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None, valid_loader=None, state={}, dir_model="model", result_path="res", recompute=True): criterion_bce = nn.BCELoss() classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce) print(classif_model) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr, # verbose=True) print(optimizer_classif) save_results = pd.DataFrame() create_folder(dir_model) if cfg.save_best: model_path_sup1 = os.path.join(dir_model, "best_model") else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() if not os.path.exists(model_path_sup1) or recompute: for epoch_ in range(cfg.n_epoch_classifier): print(classif_model.training) start = time.time() loss_mean_bce = [] for i, samples in enumerate(train_loader): inputs, pred_labels = samples if i == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients optimizer_classif.zero_grad() inputs = to_cuda_if_available(inputs) # forward + backward + optimize weak_out = classif_model(inputs) weak_out = to_cpu(weak_out) # print(output) loss_bce = criterion_bce(weak_out, pred_labels) loss_mean_bce.append(loss_bce.item()) loss_bce.backward() optimizer_classif.step() loss_mean_bce = np.mean(loss_mean_bce) classif_model.eval() n_class = len(many_hot_encoder.labels) macro_f_measure_train = get_f_measure_by_class(classif_model, n_class, train_loader) if valid_loader is not None: macro_f_measure = get_f_measure_by_class(classif_model, n_class, valid_loader) mean_macro_f_measure = np.mean(macro_f_measure) else: mean_macro_f_measure = -1 classif_model.train() print("Time to train an epoch: {}".format(time.time() - start)) # print statistics print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce)) results = {"train_loss": loss_mean_bce, "macro_measure_train": np.mean(macro_f_measure_train), "class_macro_train": np.array_str(macro_f_measure_train, precision=2), "macro_measure_valid": mean_macro_f_measure, "class_macro_valid": np.array_str(macro_f_measure, precision=2), } for key in results: LOG.info("\t\t ----> {} : {}".format(key, results[key])) save_results = save_results.append(results, ignore_index=True) # scheduler.step(mean_macro_f_measure) # ########## # # Callbacks # ########## state['epoch'] = epoch_ + 1 state["model"]["state_dict"] = classif_model.state_dict() state["optimizer"]["state_dict"] = optimizer_classif.state_dict() state["loss"] = loss_mean_bce state.update(results) if cfg.early_stopping is not None: if early_stopping_call.apply(mean_macro_f_measure): print("EARLY STOPPING") break if cfg.save_best and save_best_call.apply(mean_macro_f_measure): save_model(state, model_path_sup1) if cfg.save_best: LOG.info( "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val)) LOG.info("loading model from: {}".format(model_path_sup1)) classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) save_model(state, model_path_sup1) LOG.debug("model path: {}".format(model_path_sup1)) LOG.debug('Finished Training') else: classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) LOG.info("#### End classif") save_results.to_csv(result_path, sep="\t", header=True, index=False) return classif_model, state
def forward(self, x, targets=None, img_dim=None, cls=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf # Softmax instead of sigmoid, since only one class will be present pred_cls = prediction[..., 5:] # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) # Weight the grid-wise predictions acc. to the object confidence weighted_class_scores = pred_conf.unsqueeze(dim=-1) * pred_cls weighted_class_scores = weighted_class_scores.sum(dim=(1, 2, 3)) if targets is None: return output, weighted_class_scores, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Calculate these only if obj_mask is non-empty if obj_mask.sum() > 0: # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.ce_loss( pred_cls[obj_mask].view(-1, self.num_classes), tcls[obj_mask].long().view(-1)) detection_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls else: detection_loss = 0. # Classification loss classification_loss = self.ce_loss(weighted_class_scores, cls) total_loss = detection_loss + classification_loss # Calculate these only if obj_mask is non-empty if obj_mask.sum() > 0: # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } _classification_loss = classification_loss.clone() self.metrics['classification_loss'] = to_cpu( _classification_loss).item() _weighted_class_scores = weighted_class_scores.clone() self.metrics['batch_acc'] = to_cpu( torch.sum(torch.argmax(_weighted_class_scores, dim=-1) == cls)).item() / len(cls) return output, weighted_class_scores, total_loss
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height im = prediction[..., 4] # angle imaginary part re = prediction[..., 5] # angle real part pred_conf = torch.sigmoid(prediction[..., 6]) # Conf pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :6].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h pred_boxes[..., 4] = im pred_boxes[..., 5] = re output = torch.cat( ( #pred_boxes.view(num_samples, -1, 6) * self.stride, pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride, pred_boxes[..., 4:].view(num_samples, -1, 2), pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: # Kevin: Adding this try catch to make sure when ious is empty in # build_targets (look at utils/utils.py), this function knows how to # handle and return (output, 0) instead. try: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_im = self.mse_loss(im[obj_mask], tim[obj_mask]) loss_re = self.mse_loss(re[obj_mask], tre[obj_mask]) loss_eular = loss_im + loss_re loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "im": to_cpu(loss_im).item(), "re": to_cpu(loss_re).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss except RuntimeError as err: print(err) return output, 0