def get_ignore(self, prediction, target, scaled_anchors, in_w, in_h, noobj_mask): bs = len(target) anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)] scaled_anchors = np.array(scaled_anchors)[anchor_index] # print(scaled_anchors) # 先验框的中心位置的调整参数 x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) # 先验框的宽高调整参数 w = prediction[..., 2] # Width h = prediction[..., 3] # Height FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor # 生成网格,先验框中心,网格左上角 grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat( int(bs * self.num_anchors / 3), 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat( int(bs * self.num_anchors / 3), 1, 1).view(y.shape).type(FloatTensor) # 生成先验框的宽高 anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape) anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape) # 计算调整后的先验框中心与宽高 pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h for i in range(bs): pred_boxes_for_ignore = pred_boxes[i] pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4) for t in range(target[i].shape[0]): gx = target[i][t, 0] * in_w gy = target[i][t, 1] * in_h gw = target[i][t, 2] * in_w gh = target[i][t, 3] * in_h gt_box = torch.FloatTensor(np.array( [gx, gy, gw, gh])).unsqueeze(0).type(FloatTensor) anch_ious = bbox_iou(gt_box, pred_boxes_for_ignore, x1y1x2y2=False) anch_ious = anch_ious.view(pred_boxes[i].size()[:3]) noobj_mask[i][anch_ious > self.ignore_threshold] = 0 # print(torch.max(anch_ious)) return noobj_mask
def _first_loss(self, pred, target): """ :param pred: type: tensor: tensor.size([image_num, anchor_num, grid_j, gird_i, 5+class_num]) :param target: type: list: [[image_num, x, y, w, h, cls],...] :return: ignore_mask which ignores iou(pred, truth) > ignore_thres_first_loss """ # Init ignore_mask which ignores iou(pred, truth) > ignore_thres_first_loss ignore_mask = self.ByteTensor(self.batch_size, self.num_anchors, self.grid_h, self.grid_w).fill_(1) if len(target): index_start = target[0][0] for i, pi0 in enumerate(pred): t = target[target[..., 0] == (i + index_start)] # Targets for image j of batchA if len(t): p_boxes = torch.zeros_like(pi0) # transform pred to yolo box p_boxes[..., 0] = (torch.sigmoid(pi0[..., 0]) + self.grid_x[i]) / self.grid_w p_boxes[..., 1] = (torch.sigmoid(pi0[..., 1]) + self.grid_y[i]) / self.grid_h p_boxes[..., 2] = (torch.exp(pi0[..., 2]) * self.anchor_w[i]) / self.grid_w p_boxes[..., 3] = (torch.exp(pi0[..., 3]) * self.anchor_h[i]) / self.grid_h p_boxes = p_boxes.view(pi0.size()[0] * pi0.size()[1] * pi0.size()[2], 6) # compute iou for each pred gird and all targets. ious = torch.stack(tuple([bbox_iou(x, p_boxes[:, :4], False) for x in t[:, 1:5]])) best_ious, best_index = ious.max(0) best_ious, best_index = best_ious.view(pi0.size()[0], pi0.size()[1], pi0.size()[2], 1), \ best_index.view(pi0.size()[0], pi0.size()[1], pi0.size()[2], 1) ignore_mask[i][torch.squeeze(best_ious > self.ignore_thres_first_loss, 3)] = 0 return ignore_mask
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): """ Removes detections with lower object confidence score than 'conf_thres' and performs Non-Maximum Suppression to further filter detections. Args: prediction.shape(batch_size, num_yolo*num_anchors*grid_size*grid_size, 85) Returns detections with shape: (x1, y1, x2, y2, object_conf, class_score, class_pred) """ # From center(xywh) to corner(xyxy) prediction[..., :4] = xywh2xyxy(prediction[..., :4]) output = [None for _ in range(len(prediction))] for image_i, image_pred in enumerate(prediction): # Filter out confidence scores below threshold image_pred = image_pred[image_pred[:, 4] >= conf_thres] # If none are remaining => process next image if not image_pred.size(0): continue # score = object_conf. * max_class_pred_prob. score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] # Sort by it image_pred = image_pred[np.argsort(-score)] class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) # detections.shape(unknown, 7_vals) # 7_vals=(x1, y1, x2, y2, object_conf., class_score, class_pred_label) detections = torch.cat( (image_pred[:, :5], class_confs.float(), class_preds.float()), 1) # Perform non-maximum suppression keep_boxes = [] while detections.size(0): #=== Indices of boxes with large IOUs and matching labels === large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres label_match = detections[0, -1] == detections[:, -1] invalid = large_overlap & label_match #=== Merge overlapping bboxes weighted by their confidence === weights = detections[invalid, 4:5] detections[0, :4] = ( weights * detections[invalid, :4]).sum(0) / weights.sum() keep_boxes += [detections[0]] #=== remove the suppression === detections = detections[~invalid] if keep_boxes: output[image_i] = torch.stack(keep_boxes) return output
def build_target(raw_coord, pred, anchors_full, args): coord = Variable(torch.zeros(raw_coord.size(0), raw_coord.size(1)).cuda()) batch, grid = raw_coord.size(0), args.size // args.gsize coord[:, 0] = (raw_coord[:, 0] + raw_coord[:, 2]) / (2 * args.size) coord[:, 1] = (raw_coord[:, 1] + raw_coord[:, 3]) / (2 * args.size) coord[:, 2] = (raw_coord[:, 2] - raw_coord[:, 0]) / (args.size) coord[:, 3] = (raw_coord[:, 3] - raw_coord[:, 1]) / (args.size) coord = coord * grid bbox = torch.zeros(coord.size(0), 9, 5, grid, grid) best_n_list, best_gi, best_gj = [], [], [] for ii in range(batch): batch, grid = raw_coord.size(0), args.size // args.gsize gi = coord[ii, 0].long() gj = coord[ii, 1].long() tx = coord[ii, 0] - gi.float() ty = coord[ii, 1] - gj.float() gw = coord[ii, 2] gh = coord[ii, 3] anchor_idxs = range(9) anchors = [anchors_full[i] for i in anchor_idxs] scaled_anchors = [ (x[0] / (args.anchor_imsize/grid), \ x[1] / (args.anchor_imsize/grid)) for x in anchors] ## Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh], dtype=np.float32)).unsqueeze(0) ## Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (len(scaled_anchors), 2)), np.array(scaled_anchors)), 1)) ## Calculate iou between gt and anchor shapes # anch_ious = list(bbox_iou(gt_box, anchor_shapes)) anch_ious = list(bbox_iou(gt_box, anchor_shapes, x1y1x2y2=False)) ## Find the best matching anchor box best_n = np.argmax(np.array(anch_ious)) tw = torch.log(gw / scaled_anchors[best_n][0] + 1e-16) th = torch.log(gh / scaled_anchors[best_n][1] + 1e-16) bbox[ii, best_n, :, gj, gi] = torch.stack( [tx, ty, tw, th, torch.ones(1).cuda().squeeze()]) best_n_list.append(int(best_n)) best_gi.append(gi) best_gj.append(gj) bbox = Variable(bbox.cuda()) return bbox, best_gi, best_gj, best_n_list
def get_batch_statistics(outputs, targets, iou_threshold): """ Compute true positives, predicted scores and predicted labels per sample. Args: targets.shape(num_bboxes, 6_vals), 6_vals=(idx, labels, x1,y1,x2,y2) """ batch_metrics = [] for sample_i in range(len(outputs)): # if prediction detections is None if outputs[sample_i] is None: continue # output.shape(num_bboxes, 7_vals) output = outputs[sample_i] pred_boxes = output[:, :4] # pred_(x1, y1, x2, y2) pred_scores = output[:, 4] # obj_conf. pred_labels = output[:, -1] # class_pred_labels true_positives = np.zeros(pred_boxes.shape[0]) # targets for i-th sample annotations = targets[targets[:, 0] == sample_i] # labels for i-th sample target_labels = annotations[:, 1] if len(annotations) else [] if len(annotations): annotations = annotations[:, 1:] detected_boxes = [] target_boxes = annotations[:, 1:] for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): # If targets are found break if len(detected_boxes) == len(annotations): break # Ignore if label is not one of the target labels if pred_label not in target_labels: continue iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) if iou >= iou_threshold and box_index not in detected_boxes: true_positives[pred_i] = 1 detected_boxes += [box_index] batch_metrics.append([true_positives, pred_scores, pred_labels]) return batch_metrics
def inter_cls_nms(detections, threshold=0.8): """ A NMS function similar to the one found in utils.utils but unlike that does suppression regardless of the class :param detections: output from network/ previous class level NMS stage :param threshold: NMS threshold level :return: Non overlapping detection regions """ # detections are expected to be sorted (max to min in this function since the previous function does this # (x1, y1, x2, y2, object_conf, class_conf, class) det_max = [] while (detections.shape[0]): det_max.append(detections[0, :]) iou = bbox_iou(detections[0, :], detections[:, :]) # print(iou) detections = detections[iou < threshold, :] det_max = torch.cat(det_max).reshape((-1, 7)) return det_max
def compute_loss_for_MFCP(output, targets, aux_model): ft = torch.cuda.FloatTensor if output[0].is_cuda else torch.Tensor lcls, lbox = ft([0]), ft([0]) if type(aux_model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel): aux_model = aux_model.module # aux 的超参数是模型内自带的,所以需要脱分布式训练的壳 hyp = aux_model.hyp ft = torch.cuda.FloatTensor if output.is_cuda else torch.Tensor BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([hyp['cls_pw']]), reduction='sum') txy, twh, tcls, tbox, index, anchors_vec = build_targets_for_MFCP( aux_model, targets) b, a, j, i = index nb = len(b) if nb: pn = output[b, a, j, i] # predict needed pxy = torch.sigmoid(pn[:, 0:2]) pbox = torch.cat( [pxy, torch.exp(pn[:, 2:4]).clamp(max=1E3) * anchors_vec], dim=1) DIoU = bbox_iou(pbox.t(), tbox, x1y1x2y2=False, DIoU=True) lbox += (1 - DIoU).sum() tclsm = torch.zeros_like(pn[:, 4:]) tclsm[range(len(b)), tcls] = 1.0 lcls += BCEcls(pn[:, 4:], tclsm) lbox *= hyp['diou'] lcls *= hyp['cls'] if nb: lbox /= nb lcls /= (nb * aux_model.nc) loss = lbox + lcls return loss, torch.cat((lbox, lcls, loss)).clone().detach()
def on_batch_end(self, last_output, last_target, **kwargs): bs = last_output[0].shape[0] iou_thres = torch.tensor((0.5, )) niou = iou_thres.numel() for batch_idx in range(0, bs): target_boxes = last_target[0][batch_idx].cpu() target_classes = last_target[1][batch_idx].cpu() - 1.0 people_idxs = (torch.LongTensor( (0, )) == target_classes).nonzero().view(-1) target_boxes = target_boxes[people_idxs] target_classes = target_classes[people_idxs] yolo_out = grab_idx(last_output, batch_idx) pred = YoloCategoryList.yolo2pred( yolo_out) # list([[x1, y1, x2, y2, conf, cls]]) detections = pred[0] if detections is None: # bs=1, first and only result if len(target_classes): self.stats.append((torch.zeros(0, 1), torch.Tensor(), torch.Tensor(), target_classes)) continue boxes = YoloCategoryList.bbox2fai(detections) correct = torch.zeros(len(detections), niou) if len(target_classes): for det_idx, det in enumerate( detections): # detections per image # Break if all targets already located in image pbox = boxes[det_idx] iou, j = bbox_iou(pbox, target_boxes).max(0) correct[det_idx] = iou > iou_thres conf = detections[:, 4] clazz = detections[:, 5] self.stats.append((correct, conf, clazz, target_classes)) stats = [np.concatenate(x, 0) for x in list(zip(*self.stats))] # to numpy p, r, ap, f1, ap_class = ap_per_class(*stats) self.apAt50 = ap.item()
def get_LCP_area(self, targets, predictions, anchors, feature_w, feature_h): pred_info = [] # anchors转化为归一形式 anchors = torch.from_numpy(anchors).to(targets.dtype).to( targets.device) / 416.0 # anchors_vec是在特征图上的anchors anchors_vec = anchors * torch.tensor( [feature_h, feature_w], dtype=anchors.dtype, device=anchors.device) # 计算一系列索引值 gwh = targets[:, 4:6] iou_anchors = wh_iou(anchors, gwh) _, idx_a = iou_anchors.max(0) idx_p = idx_a // 3 idx_a = idx_a % 3 idx_b = targets[:, 0].long() # 选择特定的框 for i, p in enumerate(predictions): mask = idx_p == i idx_x, idx_y = (targets[:, 2] * p.size(-2)).long(), (targets[:, 3] * p.size(-3)).long() pred_info.append(p[idx_b[mask], idx_a[mask], idx_y[mask], idx_x[mask]]) pred_info = torch.cat(pred_info, dim=0) # 此处开始pred_info_detach就只有选框作用,不再需要反向传播 pred_info_detach = pred_info.clone().detach() pred_info_detach[:, 0:2] = torch.sigmoid( pred_info_detach[:, 0:2]) + torch.stack([idx_x, idx_y], dim=0).t() pred_info_detach[:, 2:4] = torch.exp(pred_info_detach[:, 2:4]).clamp( max=1E3) * anchors_vec[(idx_p + idx_a)] # 将标签转化为适应特征图的形式,之前是归一化的 targets[:, [2, 4]] *= feature_w targets[:, [3, 5]] *= feature_h # 计算IoU大于0.5的 targets[:, 2:] = xywh2xyxy(targets[:, 2:]) pred_info_detach[:, :4] = xywh2xyxy(pred_info_detach[:, :4]) boxes_union, boxes_gt = torch.zeros( [len(targets), 5], device=targets.device), torch.zeros([len(targets), 5], device=targets.device) boxes_gt[:, 0] = idx_b boxes_gt[:, 1:] = targets[:, 2:] boxes_union[:, 0] = idx_b boxes_union[:, 1:3] = torch.min(pred_info_detach[:, :2], targets[:, 2:4]) boxes_union[:, 3:5] = torch.max(pred_info_detach[:, 2:4], targets[:, 4:6]) giou = bbox_iou(torch.cat([ torch.sigmoid(pred_info[:, 0:2]) + torch.stack([idx_x, idx_y], dim=0).t(), torch.exp(pred_info[:, 2:4]).clamp(max=1E3) * anchors_vec[(idx_p + idx_a)] ], dim=1).t(), targets[:, 2:6], x1y1x2y2=False, GIoU=True) return boxes_gt, boxes_union, (1 - giou).mean()
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4): """ Removes detections with lower object confidence score than 'conf_thres' and performs Non-Maximum Suppression to further filter detections. Returns detections with shape: (x1, y1, x2, y2, object_conf, class_score, class_pred) """ # From (center x, center y, width, height) to (x1, y1, x2, y2) box_corner = prediction.new(prediction.shape) box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] output = [None for _ in range(len(prediction))] for image_i, image_pred in enumerate(prediction): # Filter out confidence scores below threshold conf_mask = (image_pred[:, 4] >= conf_thres).squeeze() image_pred = image_pred[conf_mask] # If none are remaining => process next image if not image_pred.size(0): continue # Get score and class with highest confidence class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True) # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) detections = torch.cat( (image_pred[:, :5], class_conf.float(), class_pred.float()), 1) # Iterate through all predicted classes unique_labels = detections[:, -1].cpu().unique() if prediction.is_cuda: unique_labels = unique_labels.cuda() for c in unique_labels: # Get the detections with the particular class detections_class = detections[detections[:, -1] == c] # Sort the detections by maximum objectness confidence _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True) detections_class = detections_class[conf_sort_index] # Perform non-maximum suppression max_detections = [] while detections_class.size(0): # Get detection with highest confidence and save as max detection max_detections.append(detections_class[0].unsqueeze(0)) # Stop if we're at the last detection if len(detections_class) == 1: break # Get the IOUs for all boxes with lower confidence ious = bbox_iou(max_detections[-1], detections_class[1:]) # Remove detections with IoU >= NMS threshold detections_class = detections_class[1:][ious < nms_thres] max_detections = torch.cat(max_detections).data # Add max detections to outputs output[image_i] = (max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))) return output
def validate(*, dataloader, model, device, step=-1, bbox_all=False, debug_mode): # result = open("logs/result.txt", "w" ) with torch.no_grad(): t_start = time.time() conf_thres, nms_thres, iou_thres = model.get_threshs() width, height = model.img_size() model.eval() print("Calculating mAP - Model in evaluation mode") n_images = len(dataloader.dataset) mAPs = [] mR = [] mP = [] for batch_i, (img_uris, imgs, targets) in enumerate( tqdm(dataloader, desc='Computing mAP')): imgs = imgs.to(device, non_blocking=True) targets = targets.to(device, non_blocking=True) # output,_,_,_ = model(imgs) output = model(imgs) for sample_i, (labels, detections) in enumerate(zip(targets, output)): detections = detections[detections[:, 4] > conf_thres] if detections.size()[0] == 0: predictions = torch.tensor([]) else: predictions = torch.argmax(detections[:, 5:], dim=1) # From (center x, center y, width, height) to (x1, y1, x2, y2) box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, nms_thres) box_corner = box_corner[nms_indices] probabilities = probabilities[nms_indices] predictions = predictions[nms_indices] if nms_indices.shape[ 0] == 0: # there should always be at least one label continue # Get detections sorted by decreasing confidence scores _, inds = torch.sort(-probabilities) box_corner = box_corner[inds] probabilities = probabilities[inds] predictions = predictions[inds] labels = labels[(labels[:, 1:5] <= 0).sum( dim=1 ) == 0] # remove the 0-padding added by the dataloader # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 1:5]) target_boxes[:, (0, 2)] *= width target_boxes[:, (1, 3)] *= height detected = torch.zeros(target_boxes.shape[0], device=target_boxes.device, dtype=torch.uint8) correct = torch.zeros(nms_indices.shape[0], device=box_corner.device, dtype=torch.uint8) # 0th dim is the detection # (repeat in the 1st dim) # 2nd dim is the coord ious = bbox_iou( box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0], -1), target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1, -1)) # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou ####################################################### ##### skip images without label ##### if [] in ious.data.tolist(): continue ####################################################### best_is = torch.argmax(ious, dim=1) # TODO fix for multi-class. Need to use predictions somehow? for i, iou in enumerate(ious): best_i = best_is[i] if ious[i, best_i] > iou_thres and detected[best_i] == 0: correct[i] = 1 detected[best_i] = 1 # Compute Average Precision (AP) per class ap, r, p = average_precision(tp=correct, conf=probabilities, n_gt=labels.shape[0]) # Compute mean AP across all classes in this image, and append to image list mAPs.append(ap) mR.append(r) mP.append(p) if bbox_all or sample_i < 2: # log the first two images in every batch img_filepath = img_uris[sample_i] if img_filepath is None: print( "NULL image filepath for image uri: {uri}".format( uri=img_uris[sample_i])) orig_img = Image.open(img_filepath) # draw = ImageDraw.Draw(img_with_boxes) w, h = orig_img.size pad_h, pad_w, scale_factor = calculate_padding( h, w, height, width) ################################## detect_box = copy.deepcopy(box_corner) ################################## box_corner /= scale_factor box_corner[:, (0, 2)] -= pad_w box_corner[:, (1, 3)] -= pad_h ####################################################################################### if debug_mode: pil_img = transforms.ToPILImage()(imgs.squeeze()) ##### getting the image's name ##### img_path = img_uris[0] img_name = ("_".join(map(str, img_path.split("_")[-5:]))) tmp_path = os.path.join( visualization_tmp_path, img_name[:-4] + "_predicted_vis.jpg") vis_label = add_class_dimension_to_labels(detect_box) visualize_and_save_to_local(pil_img, vis_label, tmp_path, box_color="red") print("Prediction visualization uploaded") ####################################################################################### mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() # Means of all images mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() dt = time.time() - t_start print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format( mean_mAP, mean_R, mean_P)) # result.write(str(1-mean_mAP)) # result.close() return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure): rgb_imgs = [] ious = [] org_imgs = [] label = np.array([line for line in open(f_label, 'r').readlines()]) gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0] # (N', 4) x_min, y_min, x_max, y_max img = cv2.imread(f_rgb) warn("img value: {}".format(img[:3,:3,:3])) # warn("{} shape: {}".format(f_rgb, img.shape)) img_height, img_width = img.shape[:2] # warn("height: {}, width: {}".format(img_height, img_width)) for idx in range(len(gt_box2d)): box = gt_box2d[idx] # warn("box {}: {}".format(idx, box)) x_min, y_min, x_max, y_max = box x_min = int(x_min) y_min = int(y_min) x_max = int(x_max) y_max = int(y_max) ori_img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max, x_min:x_max], (64, 64)) org_imgs.append(ori_img) box_height = y_max - y_min box_width = x_max - x_min dx = int(jitter * box_width) + 1 dy = int(jitter * box_height) + 1 # warn("dx : {} dy : {}".format(dx, dy)) lx = np.random.randint(-dx, dx) ly = np.random.randint(-dy, dy) lw = np.random.randint(-dx, dx) lh = np.random.randint(-dy, dy) x = (x_max + x_min)/2.0 + lx y = (y_max + y_min)/2.0 + ly box_height = box_height + lh box_width = box_width + lw x_min = int(max(0, x - box_width/2.0)) x_max = int(min(img_width, x + box_width/2.0)) y_min = int(max(0, y - box_height/2.0)) y_max = int(min(img_height, y + box_height/2.0)) flip = np.random.randint(1,10000)%2 img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height)) if flip: img = cv2.flip(img, 1) img = random_distort_image(img, hue, saturation, exposure) # for ground truth img, calculate iou with its original location, size iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True) rgb_imgs.append(img) ious.append(iou) # Randomly e[nerate same number of background candidate that will have low iou or zero iou. # after generating new boxes, it needs to calculate iou to each of gt_boxes2d # which will be used as inference. # if inferenced iou is low, then the bounding boxes are empty or background or falsely located. # if inferenced iou is high, then the bounding boxes are correctly inferenced by 3D bounding boxes. # this is the st]rategry I am taking for simple, mini 2D classifier. for idx in range(len(gt_box2d)*4): x = np.random.randint(0, img_width) y = np.random.randint(0, img_height) h = np.random.randint(40, 200) w = np.random.randint(40, 200) x_min = int(max(0, x - w/2.0)) x_max = int(min(img_width, x + w/2.0)) y_min = int(max(0, y - h/2.0)) y_max = int(min(img_height, y + h/2.0)) max_iou = 0 for gt_idx in range(len(gt_box2d)): box = gt_box2d[gt_idx] iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True) if iou > max_iou: max_iou = iou img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height)) if flip: img = cv2.flip(img, 1) img = random_distort_image(img, hue, saturation, exposure) rgb_imgs.append(img) ious.append(iou) return org_imgs, rgb_imgs, ious
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) seen = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (imgs, targets) in pbar: _, _, height, width = imgs.shape # batch size, channels, height, width # Run model inf_out, train_out = model(imgs) # inference and training outputs # Compute loss val_loss += compute_loss(train_out, targets, model).item() # GIoU, obj, cls # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Plot images with bounding boxes if idx == 0: show_batch(imgs, output) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > 0.5 and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append( (correct, pred[:, 4].cpu().numpy(), pred[:, 6].cpu().numpy(), tcls)) pbar.set_description('loss: %8g' % (val_loss / (idx + 1))) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # sync stats if dist.is_initialized(): for i in range(len(stats)): stat = torch.FloatTensor(stats[i]).to(device) ls = torch.IntTensor([len(stat)]).to(device) ls_list = [ torch.IntTensor([0]).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(ls_list, ls) ls_list = [ls_item.item() for ls_item in ls_list] max_ls = max(ls_list) if len(stat) < max_ls: stat = torch.cat( [stat, torch.zeros(max_ls - len(stat)).to(device)]) stat_list = [ torch.zeros(max_ls).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(stat_list, stat) stat_list = [ stat_list[si][:ls_list[si]] for si in range(dist.get_world_size()) if ls_list[si] > 0 ] stat = torch.cat(stat_list) stats[i] = stat.cpu().numpy() if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1)) # Print results per class for i, c in enumerate(ap_class): print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results mAPs = np.zeros(num_classes) + mAP for i, c in enumerate(ap_class): mAPs[c] = ap[i] # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs return mAP
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): # 计算一共有多少张图片 bs = len(target) # 获得先验框 # self.feature_length分辨为原尺寸,32,16,8倍小采样以后的尺寸 # anchor_index当前feature map尺寸对应self.feature_length的索引值,如32倍下采样对应0,1,2 anchor_index = [[0, 1, 2], [3, 4, 5], [6, 7, 8]][self.feature_length.index(in_w)] subtract_index = [0, 3, 6][self.feature_length.index(in_w)] # 创建全是0或者全是1的阵列 mask = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) tx = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) ty = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) tw = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) th = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) t_box = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, 4, requires_grad=False) tconf = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, self.num_classes, requires_grad=False) box_loss_scale_x = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) box_loss_scale_y = torch.zeros(bs, int(self.num_anchors / 3), in_h, in_w, requires_grad=False) for b in range(bs): # target[b].shape[0]一张图片中有几个真值框 for t in range(target[b].shape[0]): # 计算出在特征层上的点位 #target经过归一化的真值框的中心坐标和w,h #真值框的中心坐标和长宽转为当前尺寸在的坐标 gx = target[b][t, 0] * in_w gy = target[b][t, 1] * in_h gw = target[b][t, 2] * in_w gh = target[b][t, 3] * in_h # 计算出真值框属于哪个网格 gi = int(gx) gj = int(gy) gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # 计算出所有先验框的位置 anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # 计算重合程度 # 存放一个真值框和9个anchor的iou anch_ious = bbox_iou(gt_box, anchor_shapes) # Find the best matching anchor box best_n = np.argmax(anch_ious) if best_n not in anchor_index: #第一轮是0,1,2,如果iou最大的真值框不在当前尺寸对应的三个真值框中,就换下一个真值框 continue # Masks #in_h,in_w是输入feature map的尺寸,如果iou最大的框在尺寸对应的三个真值框中且,真值框中心所在网格编号小于feature map的尺寸,则该真值框标为有物体 #gj,gi,真值框中心所在网格坐标 if (gj < in_h) and (gi < in_w): best_n = best_n - subtract_index # 判定哪些先验框内部真实的存在物体 # 无物体掩码对应网格标记为0 noobj_mask[b, best_n, gj, gi] = 0 # 有物体掩码对应网格标记为0 mask[b, best_n, gj, gi] = 1 # 计算先验框中心调整参数和 tx[b, best_n, gj, gi] = gx ty[b, best_n, gj, gi] = gy # 计算先验框宽高调整参数 tw[b, best_n, gj, gi] = gw th[b, best_n, gj, gi] = gh # 用于获得xywh的比例 box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2] box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3] # 物体置信度 tconf[b, best_n, gj, gi] = 1 # 种类 tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1 else: print('Step {0} out of bound'.format(b)) print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format( gj, in_h, gi, in_w)) continue t_box[..., 0] = tx t_box[..., 1] = ty t_box[..., 2] = tw t_box[..., 3] = th # 返回的是一个batch的所有标注好的真值框数据 #mask (bs, int(self.num_anchors / 3), in_h, in_w) 根据真值数据标注中那个网格中存在物体 #noobj_mask (bs, int(self.num_anchors / 3), in_h, in_w) 根据真值标注原始图片中那个网格中不存在物体 # t_box (bs, int(self.num_anchors/3), in_h, in_w, 4) 标注当前尺度下真值框的中心坐标,h,w # tcls (bs, int(self.num_anchors/3), in_h, in_w, num_classes)根据真值数据注存在物体的网格中的物体分类 # tconf (bs, int(self.num_anchors/3), in_h, in_w, num_classes)根据真值数据注存在物体的网格中的物体存在置信度 return mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def __getitem__(self, idx): l_bound = idx * self.config['BATCH_SIZE'] r_bound = (idx + 1) * self.config['BATCH_SIZE'] if r_bound > len(self.images): r_bound = len(self.images) l_bound = r_bound - self.config['BATCH_SIZE'] instance_count = 0 x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3)) # input images b_batch = np.zeros( (r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4) ) # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes y_batch = np.zeros( (r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'], self.config['BOX'], 4 + 1 + len(self.config['LABELS']))) # desired network output for train_instance in self.images[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self.aug_image(train_instance, jitter=self.jitter) # construct output from object's x, y, w, h true_box_index = 0 for obj in all_objs: if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[ 'ymin'] and obj['name'] in self.config['LABELS']: center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W']) center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H']) grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) if grid_x < self.config['GRID_W'] and grid_y < self.config[ 'GRID_H']: obj_indx = self.config['LABELS'].index(obj['name']) center_w = (obj['xmax'] - obj['xmin']) / ( float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell center_h = (obj['ymax'] - obj['ymin']) / ( float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell box = [center_x, center_y, center_w, center_h] # find the anchor that best predicts this box best_anchor = -1 max_iou = -1 shifted_box = BoundBox(0, 0, center_w, center_h) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: best_anchor = i max_iou = iou # assign ground truth x, y, w, h, confidence and class probs to y_batch y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box y_batch[instance_count, grid_y, grid_x, best_anchor, 4] = 1. y_batch[instance_count, grid_y, grid_x, best_anchor, 5 + obj_indx] = 1 # assign the true box to b_batch b_batch[instance_count, 0, 0, 0, true_box_index] = box true_box_index += 1 true_box_index = true_box_index % self.config[ 'TRUE_BOX_BUFFER'] # assign input image to x_batch if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']: cv2.rectangle(img[:, :, ::-1], (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), (255, 0, 0), 3) cv2.putText(img[:, :, ::-1], obj['name'], (obj['xmin'] + 2, obj['ymin'] + 12), 0, 1.2e-3 * img.shape[0], (0, 255, 0), 2) x_batch[instance_count] = img # increase instance counter in current batch instance_count += 1 #print(' new batch created', idx) return [x_batch, b_batch], y_batch
def batch_statistics(outputs, targets, iou_threshold): """ Compute true positives, predicted scores and predicted labels per sample :param outputs: List of Tensors of predictions [x0, y0, x1, y1, confidence, class label] :param targets: List of Dicts of Tensors (Dicts keys : 'boxes', 'labels', 'imaeg_id', 'area') :param iou_threshold: :return:list of True positive, pred_scores and pred_labels for each image in the batch """ batch_metrics = [] for sample_i in range( len(outputs) ): # outputs[sample_i] is one tensor of several detections, corresponding to one image in the batch if outputs[sample_i] is None: continue output = outputs[sample_i] pred_boxes = output[:, :4] pred_scores = output[:, 4].cpu() pred_labels = output[:, -1].cpu() # print("\npred_boxes=", pred_boxes) #print("pred_scores=", pred_scores) #print("pred_labels=", pred_labels) true_positives = np.zeros(pred_boxes.shape[0]) #print("true positives empty=", true_positives) annotations = targets[sample_i]['boxes'] #print("GT boxes=", annotations) target_labels = targets[sample_i]['labels'].cpu( ) if len(annotations) > 0 else [] #print("GT labels=", target_labels) if len(annotations) > 0: detected_boxes = [] target_boxes = annotations for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): #print("\nPrediction ", pred_i) #print("pred_box=", pred_box) #print("pred_label=", pred_label) # If targets are found break if len(detected_boxes) == len(annotations): break # Ignore if label is not one of the target labels if pred_label.item() not in target_labels: continue # print("Input to bbox iou 1 =", pred_box.unsqueeze(0)) iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) #print("iou=", iou) #print("box_index=", box_index) if iou >= iou_threshold and box_index not in detected_boxes: true_positives[pred_i] = 1 #print("True Positive") detected_boxes += [box_index] batch_metrics.append([true_positives, pred_scores, pred_labels]) return batch_metrics
def test_det( opt, batch_size=12, img_size=(1088, 608), iou_thres=0.5, print_interval=40, ): data_cfg = opt.data_cfg f = open(data_cfg) data_cfg_dict = json.load(f) f.close() nC = 1 test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) #model = torch.nn.DataParallel(model) model = model.to(opt.device) model.eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() #seen += batch_size output = model(imgs.cuda())[-1] origin_shape = shapes[0] width = origin_shape[1] height = origin_shape[0] inp_height = img_size[1] inp_width = img_size[0] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio } hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # Compute average precision for each sample targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)] for si, labels in enumerate(targets): seen += 1 #path = paths[si] #img0 = cv2.imread(path) dets = detections[si] dets = dets.unsqueeze(0) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] #remain_inds = dets[:, 4] > opt.det_thres #dets = dets[remain_inds] if dets is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = labels[:, 0] # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 2:6]) target_boxes[:, 0] *= width target_boxes[:, 2] *= width target_boxes[:, 1] *= height target_boxes[:, 3] *= height ''' path = paths[si] img0 = cv2.imread(path) img1 = cv2.imread(path) for t in range(len(target_boxes)): x1 = target_boxes[t, 0] y1 = target_boxes[t, 1] x2 = target_boxes[t, 2] y2 = target_boxes[t, 3] cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('gt.jpg', img0) for t in range(len(dets)): x1 = dets[t, 0] y1 = dets[t, 1] x2 = dets[t, 2] y2 = dets[t, 3] cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('pred.jpg', img1) abc = ace ''' detected = [] for *pred_bbox, conf in dets: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and obj_pred == labels[ best_i, 0] and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class( tp=correct, conf=dets[:, 4], pred_cls=np.zeros_like(dets[:, 4]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16) mean_R = np.sum(mR) / (AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval == 0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def __call__(self, y_pred, y_true): device = y_pred[0].device loss_cls = torch.zeros(1, device=device) # Tensor(0) loss_box = torch.zeros(1, device=device) # Tensor(0) loss_obj = torch.zeros(1, device=device) # Tensor(0) target_cls, target_box, indices, anchors = self.build_targets(y_pred, y_true) # targets # Define criteria reduction = 'mean' # Loss reduction (sum or mean) BCE_cls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([self.hyp['cls_pw']], device=device), reduction=reduction) BCE_obj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([self.hyp['obj_pw']], device=device), reduction=reduction) # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 label_pos, label_neg = smooth_BCE(eps=0.0) # focal loss fl_gamma = self.hyp['fl_gamma'] # focal loss gamma if fl_gamma > 0: BCE_cls, BCE_obj = FocalLoss(BCE_cls, fl_gamma), FocalLoss(BCE_obj, fl_gamma) # per output count_targets = 0 # targets for jdx, pred in enumerate(y_pred): # layer index, layer predictions idx_img, idx_anchor, grid_y, grid_x = indices[jdx] # image, anchor, grid_y, grid_x target_obj = torch.zeros_like(pred[..., 0], device=device) # target obj num_target = idx_img.shape[0] # number of targets if num_target: count_targets += num_target # cumulative targets # 对应匹配到正样本的预测信息 # prediction subset corresponding to targets pred_sub = pred[idx_img, idx_anchor, grid_y, grid_x] # GIoU pred_xy = pred_sub[..., :2].sigmoid() pred_wh = pred_sub[..., 2:4].exp().clamp(max=1E3) * anchors[jdx] pred_box = torch.cat((pred_xy, pred_wh), 1) # predicted box giou = bbox_iou(pred_box.t(), target_box[jdx].t(), ltrb=False, iou_type='GIoU') # giou(prediction, target) loss_box += (1.0 - giou).mean() # giou loss # Obj giou ratio target_obj[idx_img, idx_anchor, grid_y, grid_x] = \ (1.0 - self.giou_ratio) + self.giou_ratio * giou.detach().clamp(0).type(target_obj.dtype) # Class if self.num_cls > 1: # cls loss (only if multiple classes) pred_tar = torch.full_like(pred_sub[:, 5:], label_neg, device=device) # targets pred_tar[range(num_target), target_cls[jdx]] = label_pos loss_cls += BCE_cls(pred_sub[:, 5:], pred_tar) # BCE loss_obj += BCE_obj(pred[..., 4], target_obj) # obj loss # 乘上每种损失的对应权重 loss_box *= self.hyp['giou'] loss_obj *= self.hyp['obj'] loss_cls *= self.hyp['cls'] # loss = loss_box + loss_obj + loss_cls return {"box_loss": loss_box, "obj_loss": loss_obj, "class_loss": loss_cls}
def nms_suppress(self, pred): ''' 'OR' : 一般说的NMS都是OR方式 'AND' : 这个与OR整体类似,不同在于如果出现这个类别只有一个框,则认为无效。 可能是一般一个物体都会对应多个框,只有一个很有可能是误检了 'MERGE': 综合利用了高于阈值的预测框,对于每个预测框的conf值来赋予权重, 然后求得x1y1x2y2的坐标的加权平均作为最后的预测框 weighted mixture box精度更高,但速度较慢一些. 'SOFT' : soft-NMS https://arxiv.org/abs/1704.04503 :param pred: :param nms_thres: :param nms_style: NMS方法选择 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental), 'SOFT' :return: list[tensor[], tensor[]] ''' if len(pred) == 0: return torch.tensor([]) det_max = [] for c in pred[:, -1].unique(): # 没有80类一个个遍历,更高效 dc = pred[pred[:, -1] == c] # select class c torch.Size([21, 7]) 代表这个类别有21个预测框 n = len(dc) # 当前类别有len(dc)=21个预测框 if n == 1: det_max.append(dc) # No NMS required if only 1 prediction continue elif n > 100: # 框太多只保留前100,一般情况下是OK的,不过密集场景可能得改一下 dc = dc[:100] # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117 # Non-maximum suppression if self.nms_style == 'OR': # default # torch.Size([21, 7]) 开始时21个框 # dc.shape[0]也就是预测框的数目,如果预测框数目为0,则退出循环 while dc.shape[0]: # 21->14->9->3->0 det_max.append(dc[:1]) # 保留conf最高的预测框 4 if len(dc) == 1: # 如果只剩下一个预测框了,退出循环 break iou = bbox_iou(dc[0], dc[1:]) # 计算conf得分最高的预测框与其他框的IoU dc = dc[1:][iou < self.nms_thres] # 移除与当前conf得分最高的预测框IoU大于阈值的预测框 remove ious > threshold elif self.nms_style == 'AND': # requires overlap, single boxes erased while len(dc) > 1: # 21->14->9->3->0 # 计算得分最高的预测框与其他框的IoU iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes dc[1:]: torch.Size([20]) if iou.max() > 0.5: # 与当前conf得分最高的预测框IoU最大的如果大于0.5 det_max.append(dc[:1]) # 那么就将conf得分最高的预测框加入最终的det_max dc = dc[1:][iou < self.nms_thres] # remove ious > threshold elif self.nms_style == 'MERGE': # weighted mixture box 默认采用,精度更高,但速度较慢一些 while len(dc): if len(dc) == 1: det_max.append(dc) break i = bbox_iou(dc[0], dc) > self.nms_thres # 取大于NMS阈值的框 weights = dc[i, 4:5] # 取出iou大于NMS阈值的框求得这些框的conf值作为weights torch.Size([7, 1]) dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() det_max.append(dc[:1]) dc = dc[i == 0] # 这一步也就是进行了筛选 elif self.nms_style == 'SOFT': # soft-NMS https://arxiv.org/abs/1704.04503 sigma = 0.5 # soft-nms sigma parameter while len(dc): if len(dc) == 1: det_max.append(dc) break det_max.append(dc[:1]) iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes dc = dc[1:] dc[:, 4] *= torch.exp(-iou ** 2 / sigma) # decay confidences dc = dc[dc[:, 4] > self.nms_thres] # new line per https://github.com/ultralytics/yolov3/issues/362 if len(det_max) > 0: det_max_tensor = det_max[0] for det in det_max[1:]: det_max_tensor = torch.cat((det_max_tensor, det)) else: det_max_tensor = torch.Tensor([]) return det_max_tensor
def test( model, dataloader, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, print_interval=40, ): nC = 1 mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() out = model(imgs.cuda()) # out = model(imgs) output = [] for i,o in enumerate(out): boxes = xyxy2xywh(o['boxes']).cpu() scores = o['scores'].cpu().view(-1,1) labels = o['labels'].cpu().view(-1,1).float() output.append(torch.Tensor(torch.cat((boxes,scores,scores,labels),dim=1))) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) for i, o in enumerate(output): if o is not None: output[i] = o[:, :6] # Compute average precision for each sample targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)] for si, (labels, detections) in enumerate(zip(targets, output)): seen += 1 if detections is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # Get detections sorted by decreasing confidence scores detections = detections.cpu().numpy() detections = detections[np.argsort(-detections[:, 4])] # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = torch.zeros_like(labels[:, 0]) target_boxes = labels[:, 2:6] detected = [] for *pred_bbox, conf, obj_conf in detections: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / ( AP_accum_count + 1E-16) mean_R = np.sum(mR) / ( AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval==0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres, stride): ''' :param pred_boxes: 预测框的位置和长宽 (num_samples, self.num_anchors, grid_size, grid_size, 4) :param pred_cls: 预测类别的概率 :param target: 真值 :param anchors: Anchor,存在矩阵里 :param ignore_thres: 默认设为0.5 :return: ''' BoolTensor = torch.cuda.BoolTensor if pred_boxes.is_cuda else torch.BoolTensor FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor device = torch.device("cuda") if pred_boxes.is_cuda else torch.device( "cpu") nB = pred_boxes.size(0) # 样本数量 nA = pred_boxes.size(1) # Anchor的数量:3 nC = pred_cls.size(-1) # 类别数 nG = pred_boxes.size(2) # 13 # Output tensors obj_mask = torch.zeros(nB, nA, nG, nG, requires_grad=False).to( device) # 目标掩码,bool,用于存放该处格点是否有目标,0填充 noobj_mask = torch.ones(nB, nA, nG, nG, requires_grad=False).to(device) # 无目标掩码 class_mask = torch.zeros(nB, nA, nG, nG, requires_grad=False).to(device) # 类别掩码 iou_scores = torch.zeros(nB, nA, nG, nG, requires_grad=False).to(device) # IOU分数 tx = torch.zeros(nB, nA, nG, nG, requires_grad=True).to(device) # 真值相对于网格点的偏离值 ty = torch.zeros(nB, nA, nG, nG, requires_grad=True).to(device) # 真值相对于网格点的偏离值 tw = torch.zeros(nB, nA, nG, nG, requires_grad=True).to(device) # 真值相对于网格点的偏离值 th = torch.zeros(nB, nA, nG, nG, requires_grad=True).to(device) # 真值相对于网格点的偏离值 tcls = torch.zeros(nB, nA, nG, nG, nC, requires_grad=False).to(device) #存放类别 # Convert to position relative to box # target中存放每个样本的真值,包括目标位置及其所属类别 # traget一共6列,第一列表示样本序号,第二列表示标签,第三列到第六列表示位置坐标和长宽。 # 因为一个样本可能包含多个目标,因此需要一个维度用于标记该目标框属于哪个样本 target_boxes = target[:, 2:6] * nG # 由于位置真值是以归一化值保存的,因此需要乘上特征图大小 gxy = target_boxes[:, :2] # 目标位置真值 gwh = target_boxes[:, 2:] # 目标长宽真值 # Get anchors with best iou ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) best_ious, best_n = ious.max(0) # 分别计算每个Anchor与真值之间的IOU,挑选最大的Anchor作为最优选项 # Separate target values b, target_labels = target[:, :2].long().t( ) # .long()进行数据类型转换; .t()进行转置,而且是深拷贝 gx, gy = gxy.t() # 目标位置真值 gw, gh = gwh.t() # 目标长宽真值 gi, gj = gxy.long().t() # ########## TODO(arthur77wang): gi[gi < 0] = 0 gj[gj < 0] = 0 gi[gi > nG - 1] = nG - 1 gj[gj > nG - 1] = nG - 1 # Set masks obj_mask[b, best_n, gj, gi] = 1 # 为1的元素表示对应位置有目标。且标出了对应的最佳anchor noobj_mask[b, best_n, gj, gi] = 0 # 为1的元素表示对应位置无目标 # Set noobj mask to zero where iou exceeds ignore threshold for i, anchor_ious in enumerate(ious.t()): noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 # 通过门限设置对应位置是否有目标 # Coordinates # 计算偏移量 tx[b, best_n, gj, gi] = gx - gx.floor() ty[b, best_n, gj, gi] = (gy - gy.floor() + 0.5) / 2 # Width and height # tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) # th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) tw[b, best_n, gj, gi] = torch.sqrt(gw / anchors[best_n][:, 0]) / 2 th[b, best_n, gj, gi] = torch.sqrt(gh / anchors[best_n][:, 1]) / 2 # One-hot encoding of label tcls[b, best_n, gj, gi, target_labels] = 1 # 类别概率真值 # Compute label correctness and iou at best anchor class_mask[b, best_n, gj, gi] = ( pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() # 类别的掩码矩阵 iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) # 有目标的位置与对应的最佳anchor的iou分数 tconf = obj_mask # 置信度的真值 return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
def test(cfg, data, weights=None, batch_size=16, img_size=608, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=True, hyp=None, model=None, single_cls=False): """test the metrics of the trained model :param str cfg: model cfg file :param str data: data dict :param str weights: weights path :param int batch_size: batch size :param int img_size: image size :param float iou_thres: iou threshold :param float conf_thres: confidence threshold :param float nms_thres: nms threshold :param bool save_json: Whether to save the model :param str hyp: hyperparameter :param str model: yolov4 model :param bool single_cls: only one class :return: results """ if model is None: device = select_device(opt.device) verbose = False # Initialize model model = Model(cfg, img_size).to(device) # Load weights if weights.endswith('.pt'): checkpoint = torch.load(weights, map_location=device) state_dict = intersect_dicts(checkpoint['model'], model.state_dict()) model.load_state_dict(state_dict, strict=False) elif len(weights) > 0: load_darknet_weights(model, weights) print(f'Loaded weights from {weights}!') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device verbose = False test_path = data['valid'] num_classes, names = (1, ['item']) if single_cls else (int( data['num_classes']), data['names']) # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'Pre', 'Rec', 'mAP', 'F1') precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, aver_pre, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=output_format)): targets = targets.to(device) imgs = imgs.to(device) / 255.0 _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') with torch.no_grad(): inference_output, train_output = model(imgs) if hasattr(model, 'hyp'): # if model has loss hyperparameters loss += compute_loss(train_output, targets, model)[1][:3].cpu() # GIoU, obj, cls output = non_max_suppression(inference_output, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for i, pred in enumerate(output): labels = targets[targets[:, 0] == i, 1:] num_labels = len(labels) target_class = labels[:, 0].tolist() if num_labels else [] seen += 1 if pred is None: if num_labels: stats.append( ([], torch.Tensor(), torch.Tensor(), target_class)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[i]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[i].shape[1:], box, shapes[i][0]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for det_i, det in enumerate(pred): json_dict.append({ 'image_id': image_id, 'category_id': coco91class[int(det[6])], 'bbox': [float(format(x, '.%gf' % 3)) for x in box[det_i]], 'score': float(format(det[4], '.%gf' % 5)) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if num_labels: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for j, (*pbox, _, _, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == num_labels: break # Continue if predicted class not among image classes if pcls.item() not in target_class: continue # Best iou, index between pred and targets mask = (pcls == tcls_tensor).nonzero( as_tuple=False).view(-1) iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and mask[ best_iou] not in detected: # and pcls == target_class[bi]: correct[j] = 1 detected.append(mask[best_iou]) # Append statistics (correct, conf, pcls, target_class) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] if len(stats): precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats) mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean( ), aver_pre.mean(), f_1.mean() num_targets = np.bincount( stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: num_targets = torch.zeros(1) # Print results print_format = '%20s' + '%10.3g' * 6 print(print_format % ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1)) # Print results per class if verbose and num_classes > 1 and stats: for i, class_ in enumerate(ap_class): print(print_format % (names[class_], seen, num_targets[class_], precision[i], recall[i], aver_pre[i], f_1[i])) # Save JSON if save_json and mean_ap and json_dict: try: img_ids = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(json_dict, file) # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocogt = COCO('data/coco/annotations/instances_val2017.json' ) # initialize COCO ground truth api cocodt = cocogt.loadRes('results.json') # initialize COCO pred api cocoeval = COCOeval(cocogt, cocodt, 'bbox') cocoeval.params.imgIds = img_ids # [:32] # only evaluate these images cocoeval.evaluate() cocoeval.accumulate() cocoeval.summarize() mean_ap = cocoeval.stats[1] # update mAP to pycocotools mAP except ImportError: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(num_classes) + mean_ap for i, class_ in enumerate(ap_class): maps[class_] = aver_pre[i] return (mean_pre, mean_rec, mean_ap, mf1, *(loss / len(dataloader)).tolist()), maps
def new_detection(self, frame, detections, xywh=False): if detections is None or not len(detections): self.logger.info("No detections added") return detections = detections.cpu().numpy() if not xywh: detections[:, 2] = detections[:, 2] - detections[:, 0] detections[:, 3] = detections[:, 3] - detections[:, 1] to_remove = [] self.logger.info("Adding new detections") for i, obj in enumerate(self.objects): if detections is None or not len(detections): return cur_bbox = np.array(obj.bbox) bboxes_array = detections[:, :4] ious = bbox_iou(cur_bbox, bboxes_array) relevant_idx = np.where( obj.class_type == detections[:, 5])[0] # matching class objects if relevant_idx.size == 0: continue greatest_overlap = relevant_idx[ious[relevant_idx].argmax( )] # max overlap from matching objects # check for greatest intersection over union detection if ious[greatest_overlap] > self.iou_thres: self.logger.debug( f"Reinitialize Object id: {obj.id} of type: {obj.class_type} due large overlap" ) bbox = tuple(map(int, bboxes_array[greatest_overlap])) # bbox = tuple(np.around(bboxes_array[greatest_overlap]).astype(int)) # conversion for opencv obj.reinitialize(frame, bbox) detections = np.delete(detections, greatest_overlap, axis=0) continue # check for closest detection # Todo make distance thres relative to speed and frames without detection. distances = bbox_distance(cur_bbox, bboxes_array) closest_box = relevant_idx[distances[relevant_idx].argmin( )] # min distance from matching objects if distances[closest_box] < self.dist_thres: self.logger.debug( f"Reinitialize Object id: {obj.id} of type: {obj.class_type} due close distance" ) bbox = tuple(map(int, bboxes_array[greatest_overlap])) # bbox = tuple(np.around(bboxes_array[closest_box]).astype(int)) # conversion for opencv obj.reinitialize(frame, bbox) detections = np.delete(detections, closest_box, axis=0) continue if obj.frames_without_detection > self.no_detection_thres: to_remove.append(i) # remove undetected objects self.objects = [ self.objects[i] for i in range(len(self.objects)) if i not in to_remove ] # add new detections for det in detections: self.add(self.default_tracker, frame, tuple(det[:4]), int(det[5]), xyxy=False) self.logger.info(self.get_objects_metadata())
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): # 计算一共有多少张图片 bs = len(target) # 获得先验框 anchor_index = [[0,1,2],[3,4,5],[6,7,8]][[13,26,52].index(in_w)] subtract_index = [0,3,6][[13,26,52].index(in_w)] # 创建全是0或者全是1的阵列 mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False) box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) for b in range(bs): for t in range(target[b].shape[0]): # 计算出在特征层上的点位 gx = target[b][t, 0] * in_w gy = target[b][t, 1] * in_h gw = target[b][t, 2] * in_w gh = target[b][t, 3] * in_h # 计算出属于哪个网格 gi = int(gx) gj = int(gy) # 计算真实框的位置 gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # 计算出所有先验框的位置 anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1)) # 计算重合程度 anch_ious = bbox_iou(gt_box, anchor_shapes) # Find the best matching anchor box best_n = np.argmax(anch_ious) if best_n not in anchor_index: continue # Masks if (gj < in_h) and (gi < in_w): best_n = best_n - subtract_index # 判定哪些先验框内部真实的存在物体 noobj_mask[b, best_n, gj, gi] = 0 mask[b, best_n, gj, gi] = 1 # 计算先验框中心调整参数 tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # 计算先验框宽高调整参数 tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n+subtract_index][0]) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n+subtract_index][1]) # 用于获得xywh的比例 box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2] box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3] # 物体置信度 tconf[b, best_n, gj, gi] = 1 # 种类 tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1 else: print('Step {0} out of bound'.format(b)) print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w)) continue return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path, weights, log_file_path=None, model=None): # 0、初始化一些参数 data = parse_data_cfg(data) nc = int(data['classes']) # number of classes names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device('0') model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights, map_location=device)['model'] ) # 20200704_50epoch_modify_noobj # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 image_nums = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader) for i, (img_tensor, target_tensor, _, _) in enumerate(pbar): img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) target_tensor = target_tensor.to(device) height, width = img_tensor.shape[2:] start = time.time() # Disable gradients with torch.no_grad(): # (1) Run model output = model( img_tensor ) # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate(nms_output): # pred: (bs, 7) labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:] nl = len(labels) # len of label tcls = labels[:, 0].tolist() if nl else [] # target class image_nums += 1 # 考虑一个预测 box 都没有的情况,比如 conf 太高 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低 clip_coords(pred, (height, width)) # mAP is the same # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2] # w tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1] # h # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) ''' pred flag ( [1, 0, 1, 0, 0, 1, 0, 0, 1], pred conf tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), pred cls tensor([2., 2., 2., 2., 2., 2., 2., 2., 2.]), lb_cls [2.0, 2.0, 2.0, 2.0, 2.0]) stats is a [] ''' stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Append statistics (correct, conf, pcls, tcls) # after get stats for all images , ... # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results # time.sleep(0.01) # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了 #pf = '%20s' + '%10.3g' * 6 # print format pf = '%20s' + '%10s' + '%10.3g' * 5 pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1) print(pf_value) if __name__ != '__main__': write_to_file(s, log_file_path) write_to_file(pf_value, log_file_path) results = [] results.append({"all": (mp, mr, map, mf1)}) # Print results per class #if verbose and nc > 1 and len(stats): if nc > 1 and len(stats): for i, c in enumerate(ap_class): #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i])) if __name__ != '__main__': write_to_file( pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]), log_file_path) results.append({names[c]: (p[i], r[i], ap[i], f1[i])}) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1), maps