def plot_images(imgs, targets, paths=None, fname='images.jpg'): # Plots training images overlaid with targets imgs = imgs.cpu().numpy() targets = targets.cpu().numpy() # targets = targets[targets[:, 1] == 21] # plot only one class fig = plt.figure(figsize=(10, 10)) bs, _, h, w = imgs.shape # batch size, _, height, width bs = min(bs, 16) # limit plot to 16 images ns = np.ceil(bs**0.5) # number of subplots for i in range(bs): boxes = xywh2xyxy(targets[targets[:, 0] == i, 2:6]).T boxes[[0, 2]] *= w boxes[[1, 3]] *= h plt.subplot(ns, ns, i + 1).imshow(imgs[i].transpose(1, 2, 0)) plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-') plt.axis('off') if paths is not None: s = Path(paths[i]).name plt.title(s[:min(len(s), 40)], fontdict={'size': 8}) # limit to 40 characters fig.tight_layout() fig.savefig(fname, dpi=200) plt.close()
def nms_filter(self, pred, min_wh=5): """ 1. 剔除物体置信度得分object confidence score低于'conf_thres'阈值的检测框 2. 再利用NMS进一步过滤筛选检测框 :param prediction: 处理后的YOLO预测,相对于原图 torch.Size([1, 8190, 85]) :param conf_thres: 置信度阈值 :param nms_thres: NMS阈值 :return: Returns detections with shape:(x1, y1, x2, y2, object_conf, class_conf, class) """ min_wh = 5 # (pixels) minimum box width and height class_conf, class_pred = pred[:, :, 5:].max(dim=-1) pred[:, :, 4] *= class_conf i = (class_pred == 0) & (pred[:, :, 4] > self.conf_thres) & (pred[:, :, 2:4] > min_wh).all( 2) & torch.isfinite(pred).all(2) pred = pred[i] # 经过NMS,只剩下29个预测框 torch.Size([29, 85]) class_conf = class_conf[i] class_pred = class_pred[i].unsqueeze(1).float() # Box (center x, center y, width, height) to (x1, y1, x2, y2) pred[:, :4] = xywh2xyxy(pred[:, :4]) # torch.Size([29, 85]) # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1) # 通过这样NMS后,只剩下29个框torch.Size([29, 7]) return pred
def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): model.eval() # Get dataloader dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn ) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")): # Extract labels labels += targets[:, 1].tolist() # Rescale target targets[:, 2:] = xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= img_size imgs = Variable(imgs.type(Tensor), requires_grad=False) with torch.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) # Concatenate sample statistics true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))] precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class
def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): model.eval() # Get dataloader dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) for batch_i, (_, _, imgs, targets) in enumerate( tqdm.tqdm(dataloader, desc="Detecting objects")): # Extract labels if targets is None: continue labels += targets[:, 1].tolist() # Rescale target to x1y1x2y2. YOLO outputs in XYWH and it gets converted in the IOU script later targets[:, 2:] = xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= img_size imgs = Variable(imgs.type(Tensor), requires_grad=False) with torch.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) # Concatenate sample statistics try: true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class( true_positives, pred_scores, pred_labels, labels) except ValueError as error: print('-----------------------------------------------') print(error) print('Model failed to detect any boxes in validation above threshold') print('Zeros passed for all metrics') print('-----------------------------------------------') precision, recall, f1 = (None, None, None) AP = np.array([0] * len(np.unique(labels))) ap_class = np.unique(labels).astype("int32") return precision, recall, AP, f1, ap_class
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): """ Removes detections with lower object confidence score than 'conf_thres' and performs Non-Maximum Suppression to further filter detections. Args: prediction.shape(batch_size, num_yolo*num_anchors*grid_size*grid_size, 85) Returns detections with shape: (x1, y1, x2, y2, object_conf, class_score, class_pred) """ # From center(xywh) to corner(xyxy) prediction[..., :4] = xywh2xyxy(prediction[..., :4]) output = [None for _ in range(len(prediction))] for image_i, image_pred in enumerate(prediction): # Filter out confidence scores below threshold image_pred = image_pred[image_pred[:, 4] >= conf_thres] # If none are remaining => process next image if not image_pred.size(0): continue # score = object_conf. * max_class_pred_prob. score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] # Sort by it image_pred = image_pred[np.argsort(-score)] class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) # detections.shape(unknown, 7_vals) # 7_vals=(x1, y1, x2, y2, object_conf., class_score, class_pred_label) detections = torch.cat( (image_pred[:, :5], class_confs.float(), class_preds.float()), 1) # Perform non-maximum suppression keep_boxes = [] while detections.size(0): #=== Indices of boxes with large IOUs and matching labels === large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres label_match = detections[0, -1] == detections[:, -1] invalid = large_overlap & label_match #=== Merge overlapping bboxes weighted by their confidence === weights = detections[invalid, 4:5] detections[0, :4] = ( weights * detections[invalid, :4]).sum(0) / weights.sum() keep_boxes += [detections[0]] #=== remove the suppression === detections = detections[~invalid] if keep_boxes: output[image_i] = torch.stack(keep_boxes) return output
def val(epoch, args, model, val_dataloader, iou_thresh, conf_thresh, nms_thresh, img_size, batch_size=8): global best_mAP print("begin to val the datasets...") model.eval() Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor labels = [] sample_metrics = [] for batch_i, (_, imgs, targets) in enumerate( tqdm(val_dataloader, desc="detection the objections:")): labels += targets[:, 1].tolist() targets[:, 2:] = xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= img_size imgs = Variable(imgs.type(Tensor), requires_grad=False) with torch.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thresh) tp, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class(tp, pred_scores, pred_labels, labels) val_precision = precision.mean() val_recall = recall.mean() val_f1 = f1.mean() val_mAP = AP.mean() print("precision: %.3f, recall: %.3f, f1: %.3f, mAP: %.3f" % (val_precision, val_recall, val_f1, val_mAP)) if val_mAP > best_mAP: best_mAP = val_mAP save_name = os.path.join(args.save_dir, "best_model_%.6f.pth" % best_mAP) state_dict = model.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() torch.save({"model": state_dict, "epoch": epoch + 1}, save_name) print("model has been saved in %s" % save_name, end="") return precision, recall, AP, f1, ap_class
def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): model.eval() # Get dataloader dataset = ListDataset(path, img_size=img_size, augment=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn) if TQDM_USE: dataloader = tqdm(dataloader) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) for batch_i, (_, imgs, targets) in enumerate(dataloader): #imgs.shape(batch_size, 3, img_size, img_size), img_size is 416 by default #targets.shape(num_bbox, 6), the 6 values are (idx, labels, x, y, w, h) # Extract labels labels += targets[:, 1].tolist() # Rescale target targets[:, 2:] = xywh2xyxy( targets[:, 2:]) # convert the coordinates from xywh to xyxy targets[:, 2:] *= img_size # scale the normalized targets to image size imgs = Variable(imgs.type(Tensor), requires_grad=False) with torch.no_grad(): #outputs.shape(batch_size, num_yolo*num_anchors*grid_size*grid_size, 85) outputs = model(imgs) # the outputs from NMS is a list, which has batch_size elements # each element of outputs is the prediction bboxes of one sample # each bbox inclues (x1, y1, x2, y2, object_conf, class_score, class_pred_label) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class
def forward(self, input, targets): boxes = targets[:, [0, 2, 3, 4, 5]] _, _, h, w = input.shape boxes[:, [2, 4]] *= h boxes[:, [1, 3]] *= w o_h, o_w = torch.mean(boxes[:, 4]), torch.mean(boxes[:, 3]) boxes[:, 1:] = xywh2xyxy(boxes[:, 1:]) feat = roi_align(input, boxes, output_size=(o_h, o_w)) out = self.bn(feat) out = self.act(out) out = self.pooling(out) out = self.linear(out.squeeze()) return out
def __call__(self, bboxes, img_W, img_H, xywh=True): """ Args: bboxes (Tensor): Tensor bboxes of shape (n, 5): (x1, y1, x2, y2, label) range in [0, 1] Returns: bboxes: Normalized bboxes """ bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_W bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_H if xywh: bboxes = xywh2xyxy(bboxes) return bboxes
def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size, device): model.eval() # Get dataloader dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) dataloader = tc.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn) labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) try: tq = tqdm.tqdm(dataloader, desc='Detecting objects', ncols=100) for _, imgs, targets in tq: imgs = imgs.to(device) # Extract labels labels += targets[:, 1].tolist() # Rescale target targets[:, 2:] = xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= img_size with tc.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) finally: tq.close() # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class
def get_data_crops(self, img: torch.tensor, label: np.array) -> torch.tensor: """ Use torchvision.ops.roi_align to simultaneously crop the previous tracklets :param img: [3, self.height, self.width] :param label: [N, 6] -> (frame_idx, person_id, x, y, w, h) in normalized coordinates :return: """ _, h, w = img.shape boxes = label[:, 2:6].copy() boxes = xywh2xyxy(boxes) if self.use_roi_align: return torch.from_numpy(boxes).float() boxes = boxes * np.array([w, h, w, h]) crops = torchvision.ops.roi_align( input=img.unsqueeze(0), boxes=[torch.from_numpy(boxes).float()], output_size=self.crop_size) return crops
def evaluate(args, model, model_cfg, test_loader, iou_thres=0.5, conf_thres=0.5, nms_thres=0.5): img_size = int(model_cfg[0]['width']) model.eval() Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor labels, sample_metrics = [], [] for batch_i, (_, imgs, targets) in enumerate( tqdm.tqdm(test_loader, desc="Detecting objects")): #if batch_i >10:break labels += targets[:, 1].tolist() targets[:, 2:] = xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= img_size #import pdb;pdb.set_trace() imgs = Variable(imgs.type(Tensor), requires_grad=False) with torch.no_grad(): #import pdb;pdb.set_trace() outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class
def test_det( opt, batch_size=12, img_size=(1088, 608), iou_thres=0.5, print_interval=40, ): data_cfg = opt.data_cfg f = open(data_cfg) data_cfg_dict = json.load(f) f.close() nC = 1 test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) model = load_model(model, opt.load_model) #model = torch.nn.DataParallel(model) model = model.to(opt.device) model.eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = DetDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() #seen += batch_size output = model(imgs.cuda())[-1] origin_shape = shapes[0] width = origin_shape[1] height = origin_shape[0] inp_height = img_size[1] inp_width = img_size[0] c = np.array([width / 2., height / 2.], dtype=np.float32) s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 meta = { 'c': c, 's': s, 'out_height': inp_height // opt.down_ratio, 'out_width': inp_width // opt.down_ratio } hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if opt.reg_offset else None opt.K = 200 detections, inds = mot_decode(hm, wh, reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # Compute average precision for each sample targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)] for si, labels in enumerate(targets): seen += 1 #path = paths[si] #img0 = cv2.imread(path) dets = detections[si] dets = dets.unsqueeze(0) dets = post_process(opt, dets, meta) dets = merge_outputs(opt, [dets])[1] #remain_inds = dets[:, 4] > opt.det_thres #dets = dets[remain_inds] if dets is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = labels[:, 0] # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 2:6]) target_boxes[:, 0] *= width target_boxes[:, 2] *= width target_boxes[:, 1] *= height target_boxes[:, 3] *= height ''' path = paths[si] img0 = cv2.imread(path) img1 = cv2.imread(path) for t in range(len(target_boxes)): x1 = target_boxes[t, 0] y1 = target_boxes[t, 1] x2 = target_boxes[t, 2] y2 = target_boxes[t, 3] cv2.rectangle(img0, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('gt.jpg', img0) for t in range(len(dets)): x1 = dets[t, 0] y1 = dets[t, 1] x2 = dets[t, 2] y2 = dets[t, 3] cv2.rectangle(img1, (x1, y1), (x2, y2), (0, 255, 0), 4) cv2.imwrite('pred.jpg', img1) abc = ace ''' detected = [] for *pred_bbox, conf in dets: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and obj_pred == labels[ best_i, 0] and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class( tp=correct, conf=dets[:, 4], pred_cls=np.zeros_like(dets[:, 4]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16) mean_R = np.sum(mR) / (AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval == 0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def test(cfg, data, weights=None, batch_size=16, img_size=608, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=True, hyp=None, model=None, single_cls=False): """test the metrics of the trained model :param str cfg: model cfg file :param str data: data dict :param str weights: weights path :param int batch_size: batch size :param int img_size: image size :param float iou_thres: iou threshold :param float conf_thres: confidence threshold :param float nms_thres: nms threshold :param bool save_json: Whether to save the model :param str hyp: hyperparameter :param str model: yolov4 model :param bool single_cls: only one class :return: results """ if model is None: device = select_device(opt.device) verbose = False # Initialize model model = Model(cfg, img_size).to(device) # Load weights if weights.endswith('.pt'): checkpoint = torch.load(weights, map_location=device) state_dict = intersect_dicts(checkpoint['model'], model.state_dict()) model.load_state_dict(state_dict, strict=False) elif len(weights) > 0: load_darknet_weights(model, weights) print(f'Loaded weights from {weights}!') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device verbose = False test_path = data['valid'] num_classes, names = (1, ['item']) if single_cls else (int( data['num_classes']), data['names']) # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'Pre', 'Rec', 'mAP', 'F1') precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, aver_pre, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=output_format)): targets = targets.to(device) imgs = imgs.to(device) / 255.0 _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') with torch.no_grad(): inference_output, train_output = model(imgs) if hasattr(model, 'hyp'): # if model has loss hyperparameters loss += compute_loss(train_output, targets, model)[1][:3].cpu() # GIoU, obj, cls output = non_max_suppression(inference_output, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for i, pred in enumerate(output): labels = targets[targets[:, 0] == i, 1:] num_labels = len(labels) target_class = labels[:, 0].tolist() if num_labels else [] seen += 1 if pred is None: if num_labels: stats.append( ([], torch.Tensor(), torch.Tensor(), target_class)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[i]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[i].shape[1:], box, shapes[i][0]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for det_i, det in enumerate(pred): json_dict.append({ 'image_id': image_id, 'category_id': coco91class[int(det[6])], 'bbox': [float(format(x, '.%gf' % 3)) for x in box[det_i]], 'score': float(format(det[4], '.%gf' % 5)) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if num_labels: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for j, (*pbox, _, _, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == num_labels: break # Continue if predicted class not among image classes if pcls.item() not in target_class: continue # Best iou, index between pred and targets mask = (pcls == tcls_tensor).nonzero( as_tuple=False).view(-1) iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and mask[ best_iou] not in detected: # and pcls == target_class[bi]: correct[j] = 1 detected.append(mask[best_iou]) # Append statistics (correct, conf, pcls, target_class) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] if len(stats): precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats) mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean( ), aver_pre.mean(), f_1.mean() num_targets = np.bincount( stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: num_targets = torch.zeros(1) # Print results print_format = '%20s' + '%10.3g' * 6 print(print_format % ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1)) # Print results per class if verbose and num_classes > 1 and stats: for i, class_ in enumerate(ap_class): print(print_format % (names[class_], seen, num_targets[class_], precision[i], recall[i], aver_pre[i], f_1[i])) # Save JSON if save_json and mean_ap and json_dict: try: img_ids = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(json_dict, file) # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocogt = COCO('data/coco/annotations/instances_val2017.json' ) # initialize COCO ground truth api cocodt = cocogt.loadRes('results.json') # initialize COCO pred api cocoeval = COCOeval(cocogt, cocodt, 'bbox') cocoeval.params.imgIds = img_ids # [:32] # only evaluate these images cocoeval.evaluate() cocoeval.accumulate() cocoeval.summarize() mean_ap = cocoeval.stats[1] # update mAP to pycocotools mAP except ImportError: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(num_classes) + mean_ap for i, class_ in enumerate(ap_class): maps[class_] = aver_pre[i] return (mean_pre, mean_rec, mean_ap, mf1, *(loss / len(dataloader)).tolist()), maps
def __init__(self, img_files, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_labels=True, cache_images=False, single_cls=False): self.img_files = img_files n = len(self.img_files) assert n > 0, 'No images found. See %s' % help_url bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Sort by aspect ratio s = np.stack((np.repeat(1280., n).T, np.repeat(720., n).T), axis=1) ar = s[:, 1] / s[:, 0] # aspect ratio i = ar.argsort() self.img_files = [self.img_files[i] for i in i] self.label_files = [self.label_files[i] for i in i] self.shapes = s[i] # wh ar = ar[i] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n if cache_labels or image_weights: # cache labels for faster training self.labels = [np.zeros((0, 5))] * n extract_bounding_boxes = False create_datasubset = False pbar = tqdm(self.label_files, desc='Caching labels') nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate for i, file in enumerate(pbar): try: with open(file, 'r') as f: l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( nf, nm, ne, nd, n) assert nf > 0, 'No labels found. See %s' % help_url # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def get_LCP_area(self, targets, predictions, anchors, feature_w, feature_h): pred_info = [] # anchors转化为归一形式 anchors = torch.from_numpy(anchors).to(targets.dtype).to( targets.device) / 416.0 # anchors_vec是在特征图上的anchors anchors_vec = anchors * torch.tensor( [feature_h, feature_w], dtype=anchors.dtype, device=anchors.device) # 计算一系列索引值 gwh = targets[:, 4:6] iou_anchors = wh_iou(anchors, gwh) _, idx_a = iou_anchors.max(0) idx_p = idx_a // 3 idx_a = idx_a % 3 idx_b = targets[:, 0].long() # 选择特定的框 for i, p in enumerate(predictions): mask = idx_p == i idx_x, idx_y = (targets[:, 2] * p.size(-2)).long(), (targets[:, 3] * p.size(-3)).long() pred_info.append(p[idx_b[mask], idx_a[mask], idx_y[mask], idx_x[mask]]) pred_info = torch.cat(pred_info, dim=0) # 此处开始pred_info_detach就只有选框作用,不再需要反向传播 pred_info_detach = pred_info.clone().detach() pred_info_detach[:, 0:2] = torch.sigmoid( pred_info_detach[:, 0:2]) + torch.stack([idx_x, idx_y], dim=0).t() pred_info_detach[:, 2:4] = torch.exp(pred_info_detach[:, 2:4]).clamp( max=1E3) * anchors_vec[(idx_p + idx_a)] # 将标签转化为适应特征图的形式,之前是归一化的 targets[:, [2, 4]] *= feature_w targets[:, [3, 5]] *= feature_h # 计算IoU大于0.5的 targets[:, 2:] = xywh2xyxy(targets[:, 2:]) pred_info_detach[:, :4] = xywh2xyxy(pred_info_detach[:, :4]) boxes_union, boxes_gt = torch.zeros( [len(targets), 5], device=targets.device), torch.zeros([len(targets), 5], device=targets.device) boxes_gt[:, 0] = idx_b boxes_gt[:, 1:] = targets[:, 2:] boxes_union[:, 0] = idx_b boxes_union[:, 1:3] = torch.min(pred_info_detach[:, :2], targets[:, 2:4]) boxes_union[:, 3:5] = torch.max(pred_info_detach[:, 2:4], targets[:, 4:6]) giou = bbox_iou(torch.cat([ torch.sigmoid(pred_info[:, 0:2]) + torch.stack([idx_x, idx_y], dim=0).t(), torch.exp(pred_info[:, 2:4]).clamp(max=1E3) * anchors_vec[(idx_p + idx_a)] ], dim=1).t(), targets[:, 2:6], x1y1x2y2=False, GIoU=True) return boxes_gt, boxes_union, (1 - giou).mean()
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path, weights, log_file_path=None, model=None): # 0、初始化一些参数 data = parse_data_cfg(data) nc = int(data['classes']) # number of classes names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device('0') model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights, map_location=device)['model'] ) # 20200704_50epoch_modify_noobj # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 image_nums = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader) for i, (img_tensor, target_tensor, _, _) in enumerate(pbar): img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) target_tensor = target_tensor.to(device) height, width = img_tensor.shape[2:] start = time.time() # Disable gradients with torch.no_grad(): # (1) Run model output = model( img_tensor ) # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate(nms_output): # pred: (bs, 7) labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:] nl = len(labels) # len of label tcls = labels[:, 0].tolist() if nl else [] # target class image_nums += 1 # 考虑一个预测 box 都没有的情况,比如 conf 太高 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低 clip_coords(pred, (height, width)) # mAP is the same # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2] # w tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1] # h # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) ''' pred flag ( [1, 0, 1, 0, 0, 1, 0, 0, 1], pred conf tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), pred cls tensor([2., 2., 2., 2., 2., 2., 2., 2., 2.]), lb_cls [2.0, 2.0, 2.0, 2.0, 2.0]) stats is a [] ''' stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Append statistics (correct, conf, pcls, tcls) # after get stats for all images , ... # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results # time.sleep(0.01) # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了 #pf = '%20s' + '%10.3g' * 6 # print format pf = '%20s' + '%10s' + '%10.3g' * 5 pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1) print(pf_value) if __name__ != '__main__': write_to_file(s, log_file_path) write_to_file(pf_value, log_file_path) results = [] results.append({"all": (mp, mr, map, mf1)}) # Print results per class #if verbose and nc > 1 and len(stats): if nc > 1 and len(stats): for i, c in enumerate(ap_class): #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i])) if __name__ != '__main__': write_to_file( pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]), log_file_path) results.append({names[c]: (p[i], r[i], ap[i], f1[i])}) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1), maps
def __init__(self, path, img_size=416, batch_size=16, augment=False, rect=True, image_weights=False): with open(path, 'r') as f: img_files = f.read().splitlines() self.img_files = [x for x in img_files if os.path.splitext(x)[-1].lower() in img_formats] n = len(self.img_files) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches assert n > 0, 'No images found in %s' % path self.n = n self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.image_weights = image_weights self.rect = False if image_weights else rect # Define labels self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Read image shapes sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1] # shapefile path if not os.path.exists(sp): # read shapes using PIL and write shapefile for next time (faster) s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') with open(sp, 'r') as f: # read existing shapefile s = np.array([x.split() for x in f.read().splitlines()], dtype=np.float64) assert len(s) == n, 'Shapefile error. Please delete %s and rerun' % sp # TODO: auto-delete shapefile # Sort by aspect ratio ar = s[:, 1] / s[:, 0] # aspect ratio i = ar.argsort() self.img_files = [self.img_files[i] for i in i] self.label_files = [self.label_files[i] for i in i] ar = ar[i] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n preload_labels = False if preload_labels: self.labels = [np.zeros((0, 5))] * n iter = tqdm(self.label_files, desc='Reading labels') if n > 10 else self.label_files extract_bounding_boxes = False for i, file in enumerate(iter): try: with open(file, 'r') as f: l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file self.labels[i] = l # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w, _ = img.shape for j, x in enumerate(l): f = '%s%sclassification%s%g_%g_%s' % ( p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder box = xywh2xyxy(x[1:].reshape(-1, 4)).ravel() box = np.clip(box, 0, 1) # clip boxes outside of image result = cv2.imwrite(f, img[int(box[1] * h):int(box[3] * h), int(box[0] * w):int(box[2] * w)]) if not result: print('stop') except: pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file assert len(np.concatenate(self.labels, 0)) > 0, 'No labels found. Incorrect label paths provided.' # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, pad=0.0): try: path = str(Path(path)) parent = str(Path(path).parent) + os.sep if os.path.isfile(path): # file with open(path, "r") as f: f = f.read().splitlines() # local to global path f = [ x.replace("./", parent) if x.startswith("./") else x for x in f ] elif os.path.isdir(path): # folder f = glob.iglob(path + os.sep + "*.*") else: raise Exception("%s does not exist" % path) # local to global path self.img_files = [ x.replace("./", parent) for x in f if os.path.splitext(x)[-1].lower() in img_formats ] except: raise Exception("Error loading data from %s. See %s" % (path, help_url)) n = len(self.img_files) assert n > 0, "No images found in %s. See %s" % (path, help_url) # batch index bi = np.floor(np.arange(n) / batch_size).astype(np.int) nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels self.label_files = [ x.replace("images", "labels").replace(os.path.splitext(x)[-1], ".txt") for x in self.img_files ] # Read image shapes (wh) sp = path.replace(".txt", "") + ".shapes" # shapefile path try: with open(sp, "r") as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] # 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等 # 如果不相等则认为是不同的数据集,故重新生成shape文件 assert len(s) == n, "shapefile out of aync" except: # tqdm库会显示处理的进度 s = [ exif_size(Image.open(f)) for f in tqdm(self.img_files, desc="Reading image shapes") ] # 将所有图片的shape信息保存在.shape文件中 np.savetxt(sp, s, fmt="%g") # overwrite existing (if any) self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 # 如果为ture,训练网络时,会使用类似原图像比例的矩形(让最长边为img_size),而不是img_size x img_size if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio # argsort函数返回的是数组值从小到大的索引值 # 按照长宽比例进行排序 irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # set training image shapes shapes = [[1, 1]] * nb # nb: number of batches for i in range(nb): ari = ar[bi == i] # bi: batch index mini, maxi = ari.min(), ari.max() # 如果高/宽小于1(w > h),将w设为img_size if maxi < 1: shapes[i] = [maxi, 1] # 如果高/宽大于1(w < h),将h设置为img_size elif mini > 1: shapes[i] = [1, 1 / mini] # 计算每个batch输入网络的shape值 self.batch_shapes = np.ceil( np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # cache labels self.imgs = [None] * n # label: [class, x, y, w, h] self.labels = [np.zeros((0, 5), dtype=np.float32)] * n create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number mission, found, empty, datasunset, duplicate np_labels_path = str(Path( self.label_files[0]).parent) + ".npy" # saved labels in *.npy file if os.path.isfile(np_labels_path): s = np_labels_path # print string x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: self.labels = x labels_loaded = True else: s = path.replace("images", "labels") pbar = tqdm(self.label_files) for i, file in enumerate(pbar): if labels_loaded is True: l = self.labels[i] else: try: with open(file, "r") as f: l = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # file missing continue if l.shape[0]: assert l.shape[1] == 5, "> 5 label columns: %s" % file assert (l >= 0).all(), "negative labels: %s" % file assert (l[:, 1:] <= 1).all( ), "non-normalized or out of bounds coordinate labels: %s" % file # 检查每一行,看是否有重复信息 if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path="./datasubset") os.makedirs("./datasubset/images") exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 with open("./datasubset/images.txt", "a") as f: f.write(self.img_files[i] + "\n") # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = "%s%sclassifier%s%g_%g_%s" % ( p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder # 将相对坐标转为绝对坐标 # b: x, y, w, h b = x[1:] * [w, h, w, h] # box # 将宽和高设置为宽和高中的最大值 b[2:] = b[2:].max() # rectangle to square # 在宽和高方向添加padding b[2:] = b[2:] * 1.3 + 30 # pad # 将坐标格式从 x,y,w,h -> xmin,ymin,xmax,ymax b = xywh2xyxy(b.reshape(-1, 4)).revel().astype(np.int) # 裁剪bbox坐标到图片内 b[[0, 2]] = np.clip[b[[0, 2]], 0, w] b[[1, 3]] = np.clip[b[[1, 3]], 0, h] assert cv2.imwrite(f, img[ b[1]:b[3], b[0]:b[2]]), "Failure extracting classifier boxes" else: ne += 1 # file empty pbar.desc = "Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)" % ( s, nf, nm, ne, nd, n) assert nf > 0 or n == 20288, "No labels found in %s. See %s" % ( os.path.dirname(file) + os.sep, help_url) # 如果标签信息没有被保存成numpy的格式,且训练样本数大于1000则将标签信息保存成numpy的格式 if not labels_loaded and n > 1000: print("Saving labels to %s for faster future loading" % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (Warning: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images 用于记录缓存图像占用RAM大小 pbar = tqdm(range(len(self.img_files)), desc="Caching images") self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image( self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes # 用于记录缓存图像占用RAM大小 pbar.desc = "Caching images (%.1fGB)" % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc="Detecting corrupted images"): try: _ = io.imread(file) except: print("Corrupted image detected: %s" % file)
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False, cache_labels=False, cache_images=False): path = str(Path(path)) # os-agnostic with open(path, 'r') as f: self.img_files = [x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic if os.path.splitext(x)[-1].lower() in img_formats] n = len(self.img_files) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches assert n > 0, 'No images found in %s' % path self.n = n self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect # Define labels self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Read image shapes sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1] # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) # Sort by aspect ratio s = np.array(s, dtype=np.float64) ar = s[:, 1] / s[:, 0] # aspect ratio i = ar.argsort() self.img_files = [self.img_files[i] for i in i] self.label_files = [self.label_files[i] for i in i] self.shapes = s[i] ar = ar[i] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n if cache_labels or image_weights: # cache labels for faster training self.labels = [np.zeros((0, 5))] * n extract_bounding_boxes = False create_datasubset = False pbar = tqdm(self.label_files, desc='Reading labels') nm, nf, ne, ns = 0, 0, 0, 0 # number missing, number found, number empty, number datasubset for i, file in enumerate(pbar): try: with open(file, 'r') as f: l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder b = x[1:] * np.array([w, h, w, h]) # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n) assert nf > 0, 'No labels found. Recommend correcting image and label paths.' # Cache images into memory for faster training (~5GB) if cache_images and augment: # if training for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'): # max 10k images img_path = self.img_files[i] img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path r = self.img_size / max(img.shape) # size ratio if self.augment and r < 1: # if training (NOT testing), downsize to inference shape h, w = img.shape[:2] img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # or INTER_AREA self.imgs[i] = img # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def run_eval(): torch.backends.quantized.engine = 'qnnpack' detection_model = torch.jit.load( os.path.join(PATH_TO_DETECTION_MODEL, 'model_quantized.pt')) detection_model.to('cpu').eval() classification_model = torch.jit.load( os.path.join(PATH_TO_CLASSIFICATION_MODEL, 'model_quantized.pt')) classification_model.to('cpu').eval() cap = cv2.VideoCapture(0) # Start video capturing while cap.isOpened(): ret, image = cap.read() # original image orig_shape = image.shape[:2] # (H, W) start = time.time() with torch.no_grad(): detection_image = cv2.resize(image, DETECTION_SIZE) detection_image = ImageToTensor()(detection_image) detection_image = detection_image.unsqueeze(0) output = detection_model(detection_image) # Prediction x, y, z = get_most_confident_bbox(output, 2) pred_xywh = transform_bbox_coords(output, x, y, z, DETECTION_SIZE, GRID_SIZE) pred_xyxy = xywh2xyxy(pred_xywh) if output[0, z + 4, x, y].item( ) > DETECTION_THRESHOLD: # prediction confidence threshold bbox_l_y = int((pred_xyxy[1]) * (orig_shape[0] / DETECTION_SIZE[1])) # Transform bbox coords bbox_r_y = int( (pred_xyxy[3]) * (orig_shape[0] / DETECTION_SIZE[1]) ) # correspondingly to DETECTION_SHAPE -> orig_shape bbox_l_x = int( (pred_xyxy[0]) * (orig_shape[1] / DETECTION_SIZE[0])) bbox_r_x = int( (pred_xyxy[2]) * (orig_shape[1] / DETECTION_SIZE[0])) bbox_x_c = (bbox_l_x + bbox_r_x) // 2 bbox_h = bbox_r_y - bbox_l_y bbox_l_x = bbox_x_c - bbox_h // 2 # Make bbox square with sides equal to bbox_h bbox_r_x = bbox_x_c + bbox_h // 2 bbox_l_y = np.clip( bbox_l_y, 0, orig_shape[0] ) # clip coordinates which limit image borders bbox_r_y = np.clip(bbox_r_y, 0, orig_shape[0]) bbox_l_x = np.clip(bbox_l_x, 0, orig_shape[1]) bbox_r_x = np.clip(bbox_r_x, 0, orig_shape[1]) # Converting image to format and shape required by recognition model cl_image = image[bbox_l_y:bbox_r_y, bbox_l_x:bbox_r_x, :] cl_image = cv2.resize(cl_image, CLASSIFICATION_SIZE, CLASSIFICATION_SIZE) cl_image = cv2.cvtColor(cl_image, cv2.COLOR_BGR2GRAY) cl_image = ToTensor()(cl_image).unsqueeze(0) # Paint bbox and emotion prediction pred_emo = EMOTIONS_LIST[classification_model(cl_image).argmax( dim=1).item()] image = cv2.rectangle(image, (bbox_l_x, bbox_l_y), (bbox_r_x, bbox_r_y), color=(0, 255, 0), thickness=2) image = cv2.putText(image, pred_emo, (bbox_l_x, bbox_l_y - 10), cv2.FONT_HERSHEY_SIMPLEX, color=(0, 0, 255), fontScale=1.1, thickness=2) fps = 1. / (time.time() - start) # Count fps image = cv2.putText(image, 'FPS: ' + str(fps), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, color=(255, 255, 0), fontScale=0.5, thickness=2) cv2.imshow('image', image) else: cv2.imshow('image', image) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) seen = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (imgs, targets) in pbar: _, _, height, width = imgs.shape # batch size, channels, height, width # Run model inf_out, train_out = model(imgs) # inference and training outputs # Compute loss val_loss += compute_loss(train_out, targets, model).item() # GIoU, obj, cls # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Plot images with bounding boxes if idx == 0: show_batch(imgs, output) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > 0.5 and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append( (correct, pred[:, 4].cpu().numpy(), pred[:, 6].cpu().numpy(), tcls)) pbar.set_description('loss: %8g' % (val_loss / (idx + 1))) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # sync stats if dist.is_initialized(): for i in range(len(stats)): stat = torch.FloatTensor(stats[i]).to(device) ls = torch.IntTensor([len(stat)]).to(device) ls_list = [ torch.IntTensor([0]).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(ls_list, ls) ls_list = [ls_item.item() for ls_item in ls_list] max_ls = max(ls_list) if len(stat) < max_ls: stat = torch.cat( [stat, torch.zeros(max_ls - len(stat)).to(device)]) stat_list = [ torch.zeros(max_ls).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(stat_list, stat) stat_list = [ stat_list[si][:ls_list[si]] for si in range(dist.get_world_size()) if ls_list[si] > 0 ] stat = torch.cat(stat_list) stats[i] = stat.cpu().numpy() if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1)) # Print results per class for i, c in enumerate(ap_class): print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results mAPs = np.zeros(num_classes) + mAP for i, c in enumerate(ap_class): mAPs[c] = ap[i] # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs return mAP
def validation_step(self, opt, outputs, batch, batch_idx, epoch): imgs, targets, paths, shapes, pad = batch _, _, height, width = imgs.shape inf_out, train_out = outputs whwh = torch.Tensor([width, height, width, height]).to(imgs.device) losses = compute_loss(train_out, targets, self.model)[1][:3] # GIoU, obj, cls output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class self.seen += 1 if pred is None: if nl: self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # target indices pi = (cls == pred[:, 5]).nonzero().view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > self.iouv[0].to(ious.device)).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > self.iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) return losses
def __init__(self, path, # 指向data/my_train_data.txt路径或data/my_val_data.txt路径 # 这里设置的是预处理后输出的图片尺寸 # 当为训练集时,设置的是训练过程中(开启多尺度)的最大尺寸 # 当为验证集时,设置的是最终使用的网络大小 img_size=416, batch_size=16, augment=False, # 训练集设置为True(augment_hsv),验证集设置为False hyp=None, # 超参数字典,其中包含图像增强会使用到的超参数 rect=False, # 是否使用rectangular training cache_images=False, # 是否缓存图片到内存中 single_cls=False, pad=0.0, rank=-1): try: path = str(Path(path)) # parent = str(Path(path).parent) + os.sep if os.path.isfile(path): # file # 读取对应my_train/val_data.txt文件,读取每一行的图片路劲信息 with open(path, "r") as f: f = f.read().splitlines() else: raise Exception("%s does not exist" % path) # 检查每张图片后缀格式是否在支持的列表中,保存支持的图像路径 # img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng'] self.img_files = [x for x in f if os.path.splitext(x)[-1].lower() in img_formats] except Exception as e: raise FileNotFoundError("Error loading data from {}. {}".format(path, e)) # 如果图片列表中没有图片,则报错 n = len(self.img_files) assert n > 0, "No images found in %s. See %s" % (path, help_url) # batch index # 将数据划分到一个个batch中 bi = np.floor(np.arange(n) / batch_size).astype(np.int) # 记录数据集划分后的总batch数 nb = bi[-1] + 1 # number of batches self.n = n # number of images 图像总数目 self.batch = bi # batch index of image 记录哪些图片属于哪个batch self.img_size = img_size # 这里设置的是预处理后输出的图片尺寸 self.augment = augment # 是否启用augment_hsv self.hyp = hyp # 超参数字典,其中包含图像增强会使用到的超参数 self.rect = rect # 是否使用rectangular training # 注意: 开启rect后,mosaic就默认关闭 self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels # 遍历设置图像对应的label路径 # (./my_yolo_dataset/train/images/2009_004012.jpg) -> (./my_yolo_dataset/train/labels/2009_004012.txt) self.label_files = [x.replace("images", "labels").replace(os.path.splitext(x)[-1], ".txt") for x in self.img_files] # Read image shapes (wh) # 查看data文件下是否缓存有对应数据集的.shapes文件,里面存储了每张图像的width, height sp = path.replace(".txt", ".shapes") # shapefile path try: with open(sp, "r") as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] # 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等 # 如果不相等则认为是不同的数据集,故重新生成shape文件 assert len(s) == n, "shapefile out of aync" except Exception as e: # print("read {} failed [{}], rebuild {}.".format(sp, e, sp)) # tqdm库会显示处理的进度 # 读取每张图片的size信息 if rank in [-1, 0]: image_files = tqdm(self.img_files, desc="Reading image shapes") else: image_files = self.img_files s = [exif_size(Image.open(f)) for f in image_files] # 将所有图片的shape信息保存在.shape文件中 np.savetxt(sp, s, fmt="%g") # overwrite existing (if any) # 记录每张图像的原始尺寸 self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 # 如果为ture,训练网络时,会使用类似原图像比例的矩形(让最长边为img_size),而不是img_size x img_size # 注意: 开启rect后,mosaic就默认关闭 if self.rect: # Sort by aspect ratio s = self.shapes # wh # 计算每个图片的高/宽比 ar = s[:, 1] / s[:, 0] # aspect ratio # argsort函数返回的是数组值从小到大的索引值 # 按照高宽比例进行排序,这样后面划分的每个batch中的图像就拥有类似的高宽比 irect = ar.argsort() # 根据排序后的顺序重新设置图像顺序、标签顺序以及shape顺序 self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # set training image shapes # 计算每个batch采用的统一尺度 shapes = [[1, 1]] * nb # nb: number of batches for i in range(nb): ari = ar[bi == i] # bi: batch index # 获取第i个batch中,最小和最大高宽比 mini, maxi = ari.min(), ari.max() # 如果高/宽小于1(w > h),将w设为img_size if maxi < 1: shapes[i] = [maxi, 1] # 如果高/宽大于1(w < h),将h设置为img_size elif mini > 1: shapes[i] = [1, 1 / mini] # 计算每个batch输入网络的shape值(向上设置为32的整数倍) self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # cache labels self.imgs = [None] * n # n为图像总数 # label: [class, x, y, w, h] 其中的xywh都为相对值 self.labels = [np.zeros((0, 5), dtype=np.float32)] * n extract_bounding_boxes, labels_loaded = False, False nm, nf, ne, nd = 0, 0, 0, 0 # number mission, found, empty, duplicate # 这里分别命名是为了防止出现rect为False/True时混用导致计算的mAP错误 # 当rect为True时会对self.images和self.labels进行从新排序 if rect is True: np_labels_path = str(Path(self.label_files[0]).parent) + ".rect.npy" # saved labels in *.npy file else: np_labels_path = str(Path(self.label_files[0]).parent) + ".norect.npy" if os.path.isfile(np_labels_path): x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: # 如果载入的缓存标签个数与当前计算的图像数目相同则认为是同一数据集,直接读缓存 self.labels = x labels_loaded = True # 处理进度条只在第一个进程中显示 if rank in [-1, 0]: pbar = tqdm(self.label_files) else: pbar = self.label_files # 遍历载入标签文件 for i, file in enumerate(pbar): if labels_loaded is True: # 如果存在缓存直接从缓存读取 l = self.labels[i] else: # 从文件读取标签信息 try: with open(file, "r") as f: # 读取每一行label,并按空格划分数据 l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) except Exception as e: print("An error occurred while loading the file {}: {}".format(file, e)) nm += 1 # file missing continue # 如果标注信息不为空的话 if l.shape[0]: # 标签信息每行必须是五个值[class, x, y, w, h] assert l.shape[1] == 5, "> 5 label columns: %s" % file assert (l >= 0).all(), "negative labels: %s" % file assert (l[:, 1:] <= 1).all(), "non-normalized or out of bounds coordinate labels: %s" % file # 检查每一行,看是否有重复信息 if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = "%s%sclassifier%s%g_%g_%s" % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder # 将相对坐标转为绝对坐标 # b: x, y, w, h b = x[1:] * [w, h, w, h] # box # 将宽和高设置为宽和高中的最大值 b[2:] = b[2:].max() # rectangle to square # 放大裁剪目标的宽高 b[2:] = b[2:] * 1.3 + 30 # pad # 将坐标格式从 x,y,w,h -> xmin,ymin,xmax,ymax b = xywh2xyxy(b.reshape(-1, 4)).revel().astype(np.int) # 裁剪bbox坐标到图片内 b[[0, 2]] = np.clip[b[[0, 2]], 0, w] b[[1, 3]] = np.clip[b[[1, 3]], 0, h] assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), "Failure extracting classifier boxes" else: ne += 1 # file empty # 处理进度条只在第一个进程中显示 if rank in [-1, 0]: # 更新进度条描述信息 pbar.desc = "Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)" % ( nf, nm, ne, nd, n) assert nf > 0, "No labels found in %s." % os.path.dirname(self.label_files[0]) + os.sep # 如果标签信息没有被保存成numpy的格式,且训练样本数大于1000则将标签信息保存成numpy的格式 if not labels_loaded and n > 1000: print("Saving labels to %s for faster future loading" % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (Warning: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images 用于记录缓存图像占用RAM大小 if rank in [-1, 0]: pbar = tqdm(range(len(self.img_files)), desc="Caching images") else: pbar = range(len(self.img_files)) self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes # 用于记录缓存图像占用RAM大小 if rank in [-1, 0]: pbar.desc = "Caching images (%.1fGB)" % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc="Detecting corrupted images"): try: _ = io.imread(file) except Exception as e: print("Corrupted image detected: {}, {}".format(file, e))
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False, cache_images=False): """ :param path: 得到训练集的ID文件路径 'data/train.txt' :param img_size: 网络输入分辨率 416 :param batch_size: 2 :param augment: 是否进行数据增广 :param hyp: 数据增广的超参数 :param rect: 是否采用矩形训练 :param image_weights: False :param cache_images: True """ path = str(Path(path)) # os-agnostic # 读取训练/验证txt文件的内容 with open(path, 'r') as f: self.img_files = [x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic if os.path.splitext(x)[-1].lower() in img_formats] n = len(self.img_files) # 4807 图片的个数 bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index [0 0 1 1 2 2...] nb = bi[-1] + 1 # number of batches 2404 assert n > 0, 'No images found in %s' % path self.n = n self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect # 将图片与标注对应上,根据train.txt的图片路径得到对应的标注文件路径 # 图片的images文件名替换为标注label所在的labels # 图片的后缀遇到.png或者.jpg则替换为标注文件后缀.txt self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] # 读取train.txt记录的图片路径 # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Read image shapes sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1] # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) # Sort by aspect ratio s = np.array(s, dtype=np.float64) ar = s[:, 1] / s[:, 0] # aspect ratio i = ar.argsort() self.img_files = [self.img_files[i] for i in i] self.label_files = [self.label_files[i] for i in i] self.shapes = s[i] ar = ar[i] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n if augment or image_weights: # cache labels for faster training self.labels = [np.zeros((0, 5))] * n extract_bounding_boxes = False pbar = tqdm(self.label_files, desc='Reading labels') # Reading labels: 0%| | 0/4807 [00:00<?, ?it/s] nm, nf, ne = 0, 0, 0 # number missing, number found, number empty for i, file in enumerate(pbar): try: with open(file, 'r') as f: # 'data\\labels\\train\\Inria_319.txt' l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # 2代表两个目标物体: (2, 5) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file self.labels[i] = l nf += 1 # file found # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w, _ = img.shape for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs(Path(f).parent) # make new output folder box = xywh2xyxy(x[1:].reshape(-1, 4)).ravel() b = np.clip(box, 0, 1) # clip boxes outside of image ret_val = cv2.imwrite(f, img[int(b[1] * h):int(b[3] * h), int(b[0] * w):int(b[2] * w)]) assert ret_val, 'Failure extracting classifier boxes' else: ne += 1 # file empty pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n) assert nf > 0, 'No labels found. Recommend correcting image and label paths.' # Cache images into memory for faster training (~5GB) # imread比较慢,因此这里直接先读取最多10000张图片,大概5GB,加快训练 if cache_images and augment: # if training for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'): # max 10k images img_path = self.img_files[i] img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path r = self.img_size / max(img.shape) # size ratio 长边缩放到416的缩放比例 if self.augment and r < 1: # if training (NOT testing), downsize to inference shape h, w, _ = img.shape img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # or INTER_AREA self.imgs[i] = img # 将等比例缩放后的图片存进去 # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 # 判断图片是否下载下来正常,如果出现异常的图片就打印出来 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def validate(*, dataloader, model, device, step=-1, bbox_all=False, debug_mode): # result = open("logs/result.txt", "w" ) with torch.no_grad(): t_start = time.time() conf_thres, nms_thres, iou_thres = model.get_threshs() width, height = model.img_size() model.eval() print("Calculating mAP - Model in evaluation mode") n_images = len(dataloader.dataset) mAPs = [] mR = [] mP = [] for batch_i, (img_uris, imgs, targets) in enumerate( tqdm(dataloader, desc='Computing mAP')): imgs = imgs.to(device, non_blocking=True) targets = targets.to(device, non_blocking=True) # output,_,_,_ = model(imgs) output = model(imgs) for sample_i, (labels, detections) in enumerate(zip(targets, output)): detections = detections[detections[:, 4] > conf_thres] if detections.size()[0] == 0: predictions = torch.tensor([]) else: predictions = torch.argmax(detections[:, 5:], dim=1) # From (center x, center y, width, height) to (x1, y1, x2, y2) box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, nms_thres) box_corner = box_corner[nms_indices] probabilities = probabilities[nms_indices] predictions = predictions[nms_indices] if nms_indices.shape[ 0] == 0: # there should always be at least one label continue # Get detections sorted by decreasing confidence scores _, inds = torch.sort(-probabilities) box_corner = box_corner[inds] probabilities = probabilities[inds] predictions = predictions[inds] labels = labels[(labels[:, 1:5] <= 0).sum( dim=1 ) == 0] # remove the 0-padding added by the dataloader # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 1:5]) target_boxes[:, (0, 2)] *= width target_boxes[:, (1, 3)] *= height detected = torch.zeros(target_boxes.shape[0], device=target_boxes.device, dtype=torch.uint8) correct = torch.zeros(nms_indices.shape[0], device=box_corner.device, dtype=torch.uint8) # 0th dim is the detection # (repeat in the 1st dim) # 2nd dim is the coord ious = bbox_iou( box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0], -1), target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1, -1)) # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou ####################################################### ##### skip images without label ##### if [] in ious.data.tolist(): continue ####################################################### best_is = torch.argmax(ious, dim=1) # TODO fix for multi-class. Need to use predictions somehow? for i, iou in enumerate(ious): best_i = best_is[i] if ious[i, best_i] > iou_thres and detected[best_i] == 0: correct[i] = 1 detected[best_i] = 1 # Compute Average Precision (AP) per class ap, r, p = average_precision(tp=correct, conf=probabilities, n_gt=labels.shape[0]) # Compute mean AP across all classes in this image, and append to image list mAPs.append(ap) mR.append(r) mP.append(p) if bbox_all or sample_i < 2: # log the first two images in every batch img_filepath = img_uris[sample_i] if img_filepath is None: print( "NULL image filepath for image uri: {uri}".format( uri=img_uris[sample_i])) orig_img = Image.open(img_filepath) # draw = ImageDraw.Draw(img_with_boxes) w, h = orig_img.size pad_h, pad_w, scale_factor = calculate_padding( h, w, height, width) ################################## detect_box = copy.deepcopy(box_corner) ################################## box_corner /= scale_factor box_corner[:, (0, 2)] -= pad_w box_corner[:, (1, 3)] -= pad_h ####################################################################################### if debug_mode: pil_img = transforms.ToPILImage()(imgs.squeeze()) ##### getting the image's name ##### img_path = img_uris[0] img_name = ("_".join(map(str, img_path.split("_")[-5:]))) tmp_path = os.path.join( visualization_tmp_path, img_name[:-4] + "_predicted_vis.jpg") vis_label = add_class_dimension_to_labels(detect_box) visualize_and_save_to_local(pil_img, vis_label, tmp_path, box_color="red") print("Prediction visualization uploaded") ####################################################################################### mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() # Means of all images mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item() mean_R = torch.tensor(mR, dtype=torch.float).mean().item() mean_P = torch.tensor(mP, dtype=torch.float).mean().item() dt = time.time() - t_start print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format( mean_mAP, mean_R, mean_P)) # result.write(str(1-mean_mAP)) # result.close() return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0): try: f = [] # image files for p in path if isinstance(path, list) else [path]: p = str(Path(p)) # os-agnostic parent = str(Path(p).parent) + os.sep if os.path.isfile(p): # file with open(p, 'r') as t: t = t.read().splitlines() f += [ x.replace('./', parent) if x.startswith('./') else x for x in t ] # local to global path elif os.path.isdir(p): # folder f += glob.iglob(p + os.sep + '*.*') else: raise Exception('%s does not exist' % p) self.img_files = [ x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats ] except Exception as e: raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride # Define labels self.label_files = [ x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files ] # Check cache cache_path = str(Path( self.label_files[0]).parent) + '.cache' # cached labels if os.path.isfile(cache_path): cache = torch.load(cache_path) # load if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed cache = self.cache_labels(cache_path) # re-cache else: cache = self.cache_labels(cache_path) # cache # Get labels labels, shapes = zip(*[cache[x] for x in self.img_files]) self.shapes = np.array(shapes, dtype=np.float64) self.labels = list(labels) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil( np.array(shapes) * img_size / stride + pad).astype( np.int) * stride # Cache labels create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate pbar = tqdm(self.label_files) for i, file in enumerate(pbar): l = self.labels[i] # label if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all( ), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % ( p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[ b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( cache_path, nf, nm, ne, nd, n) if nf == 0: s = 'WARNING: No labels found in %s. See %s' % ( os.path.dirname(file) + os.sep, help_url) print(s) assert not augment, '%s. Can not train without labels.' % s # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n if cache_images: gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image( self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = './yolov3.onnx' engine_file_path = "yolov3.trt" data_path = "./data/unrel.data" data = parse_data_cfg(data_path) nc = int(data['classes']) # number of classes path = data['valid'] # path to test images names = load_classes(data['names']) # class names iouv = torch.linspace(0.5, 0.95, 1, dtype=torch.float32) # iou vector for [email protected]:0.95 niou = 1 conf_thres = 0.001 iou_thres = 0.6 verbose = True # Genearte custom dataloader img_size = 448 # copy form pytorch src batch_size = 16 dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = data_loader(dataset, batch_size, img_size) # Output shapes expected by the post-processor output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. pbar = tqdm.tqdm(dataloader, desc=s) stats, ap, ap_class = [], [], [] seen = 0 for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar): imgs = imgs.astype(np.float32) / 255.0 nb, _, height, width = imgs.shape # batch size, channels, height, width whwh = np.array([width, height, width, height]) inputs[0].host = imgs postprocessor_args = { "yolo_masks": [ (6, 7, 8), (3, 4, 5), (0, 1, 2) ], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), ( 62, 45 ), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "num_classes": 37, "stride": [32, 16, 8] } postprocessor = PostprocessYOLO(**postprocessor_args) # Do layers before yolo t = time.time() trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] trt_outputs = [ np.ascontiguousarray( otpt[:, :, :int(imgs.shape[2] * (2**i) / 32), :int(imgs.shape[3] * (2**i) / 32)], dtype=np.float32) for i, otpt in enumerate(trt_outputs) ] output_list = postprocessor.process(trt_outputs) t0 += time.time() - t inf_out = torch.cat(output_list, 1) t = time.time() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms t1 += time.time() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = tbox.type(torch.float32) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[ j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = 'test_batch%g_gt.jpg' % batch_i # filename plot_images(imgs, targets, paths=paths, names=names, fname=f) # ground truth f = 'test_batch%g_pred.jpg' % batch_i plot_images(imgs, output_to_target(output, width, height), paths=paths, names=names, fname=f) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean( 1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( img_size, img_size, batch_size) # tuple print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, pad=0.0): try: path = str(Path(path)) # os-agnostic parent = str(Path(path).parent) + os.sep if os.path.isfile(path): # file with open(path, 'r') as f: f = f.read().splitlines() f = [ x.replace('./', parent) if x.startswith('./') else x for x in f ] # local to global path elif os.path.isdir(path): # folder f = glob.iglob(path + os.sep + '*.*') else: raise Exception('%s does not exist' % path) self.img_files = [ x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats ] except: raise Exception('Error loading data from %s. See %s' % (path, help_url)) n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n # number of images self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels self.label_files = [ x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files ] # Read image shapes (wh) sp = path.replace('.txt', '') + '.shapes' # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [ exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes') ] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) self.shapes = np.array(s, dtype=np.float64) # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil( np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # Cache labels self.imgs = [None] * n self.labels = [np.zeros((0, 5), dtype=np.float32)] * n create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate np_labels_path = str(Path( self.label_files[0]).parent) + '.npy' # saved labels in *.npy file if os.path.isfile(np_labels_path): s = np_labels_path # print string x = np.load(np_labels_path, allow_pickle=True) if len(x) == n: self.labels = x labels_loaded = True else: s = path.replace('images', 'labels') pbar = tqdm(self.label_files) for i, file in enumerate(pbar): if labels_loaded: l = self.labels[i] # np.savetxt(file, l, '%g') # save *.txt from *.npy file else: try: with open(file, 'r') as f: l = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all( ), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % ( p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(f, img[ b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( s, nf, nm, ne, nd, n) assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % ( os.path.dirname(file) + os.sep, help_url) if not labels_loaded and n > 1000: print('Saving labels to %s for faster future loading' % np_labels_path) np.save(np_labels_path, self.labels) # save for next time # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image( self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_labels=False, cache_images=False): path = str(Path(path)) # os-agnostic train.txt绝对路径 assert os.path.isfile(path), 'File not found %s. ' % path with open(path, 'r') as f: self.img_files = [ x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic 获取img文件的绝对路径list if os.path.splitext(x)[-1].lower() in img_formats ] n = len(self.img_files) # 图片个数 bi = np.floor(np.arange(n) / batch_size).astype( np.int) # batch index,将n张图片按照bs大小得到batch数进行图片的batch编号( len(bi)=n ) nb = bi[-1] + 1 # number of batches 总batch数 assert n > 0, 'No images found in %s' % path self.n = n self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect # Define labels self.label_files = [ x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files ] # 获取label txt文件的绝对路径list # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 # 和inference一样,支持非squre训练(最好同步) # if self.rect: # # Read image shapes # sp = path.replace('.txt', '.shapes') # shapefile path # try: # with open(sp, 'r') as f: # read existing shapefile # s = [x.split() for x in f.read().splitlines()] # assert len(s) == n, 'Shapefile out of sync' # except: # s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] # np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) # # Sort by aspect ratio # s = np.array(s, dtype=np.float64) # ar = s[:, 1] / s[:, 0] # aspect ratio # i = ar.argsort() # self.img_files = [self.img_files[i] for i in i] # self.label_files = [self.label_files[i] for i in i] # self.shapes = s[i] # ar = ar[i] # # Set training image shapes # shapes = [[1, 1]] * nb # for i in range(nb): # ari = ar[bi == i] # mini, maxi = ari.min(), ari.max() # if maxi < 1: # shapes[i] = [maxi, 1] # elif mini > 1: # shapes[i] = [1, 1 / mini] # self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n if cache_labels or image_weights: # cache labels for faster training self.labels = [np.zeros((0, 5))] * n extract_bounding_boxes = False create_datasubset = False pbar = tqdm(self.label_files, desc='Reading labels') # tqdm对象,后面加载label时可以生成进度 nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate for i, file in enumerate(pbar): try: with open(file, 'r') as f: # l是当前label file的info矩阵, shape:(num_box , 5) # 5 = cxywh l = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: # 对不标准的label抛出异常: 1.info长度不对 2.负样本 3.未归一化(yolo format) assert l.shape[1] == 6, '> 6 label columns: %s' % file assert (l[:, 1:-1] >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:-1] <= 1).all( ), 'non-normalized or out of bounds coordinate labels: %s' % file assert (l[:, 5] < math.pi / 2).all() and ( l[:, 5] > -math.pi / 2).all(), 'out of angle bounds (-0.5pi,0.5pi)' if np.unique( l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier 裁剪出bbox一般没啥用 if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w, _ = img.shape for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder box = xywh2xyxy( x[1:].reshape(-1, 4) * np.array([1, 1, 1.5, 1.5])).ravel() b = np.clip(box, 0, 1) # clip boxes outside of image ret_val = cv2.imwrite( f, img[int(b[1] * h):int(b[3] * h), int(b[0] * w):int(b[2] * w)]) assert ret_val, 'Failure extracting classifier boxes' else: ne += 1 # file empty # print('empty labels for image %s' % self.img_files[i]) pbar.desc = 'Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( nf, nm, ne, nd, n) assert nf > 0, 'No labels found. Recommend correcting image and label paths.' # Cache images into memory for faster training (WARNING: Large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') for i in pbar: # max 10k images self.imgs[i] = load_image(self, i) gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file)