def postprocess(self, pred, img): pred = pred[pred[:, :, 4] > self.opt.conf_thres] # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score if len(pred) > 0: dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu() # Final proposals are obtained in dets. Information of bounding box and embeddings also included # Next step changes the detection scales scale_coords(self.opt.img_size, dets[:, :4], img.shape).round() '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' # class_pred is the embeddings. return dets[:, :5], dets[:, 6:] else: return [], []
def forward_one(model, bgr_mat, checked_imgsz, device, half, opt): img = letterbox(bgr_mat, new_shape=checked_imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = torch_utils.time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], bgr_mat.shape).round() return pred
def detect(config, save_img=False): weights, imgsz = config.WEIGHTS, config.IMG_SIZE source = config.SOURCE # Initialize device = torch_utils.select_device(config.DEVICE) half = False # Load model model = torch.load(weights, map_location=device)['model'].to(device).float().eval() dataset = LoadImages(source, img_size=config.IMG_SIZE) all_path = [] all_bboxex = [] all_score = [] for path, img, im0s, vid_cap in dataset: print(im0s.shape) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference bboxes_2 = [] score_2 = [] if True: pred = model(img, augment=config.AUGMENT)[0] pred = non_max_suppression(pred, config.NMS_CONF_THR, config.NMS_IOU_THR, classes=config.CLASSES, agnostic=config.AGNOSTIC_NMS) bboxes = [] score = [] # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class for *xyxy, conf, cls in det: if True: # Write to file xywh = torch.tensor(xyxy).view( -1).numpy() # normalized xywh bboxes.append(xywh) score.append(conf) bboxes_2.append(bboxes) score_2.append(score) all_path.append(path) all_score.append(score_2) all_bboxex.append(bboxes_2) return all_path, all_score, all_bboxex
def rescale(self, pred, processed_image, ref_image): rescaled = [] for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(processed_image.shape[2:], det[:, :4], ref_image.shape).round() rescaled.append(det) return rescaled
def bbox2fai(cls, det): sz = cls.img_size det[:, :4] = scale_coords(sz, det[:, :4], sz).round() det /= torch.tensor((sz[1], sz[0], sz[1], sz[0], 1, 1)) det *= torch.tensor((2, 2, 2, 2, 1, 1)) det -= torch.tensor((1, 1, 1, 1, 0, 0)) det = torch.index_select(det, 1, torch.LongTensor((1, 0, 3, 2))) return det
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/yolov3-spp.cfg" weights = "weights/yolov3-spp-ultralytics-{}.pt".format(img_size) img_path = "test.jpg" input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.3, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 category_index = dict([(i + 1, str(i + 1)) for i in range(90)]) img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
def detect(): path = './data/bus.png' im0 = cv2.imread(path) # BGR assert im0 is not None, 'Image Not Found ' + path # img = letterbox(im0, (608,608 ), )[0] img = cv2.resize(im0, (608, 608)) # img = im0 draw_img = img img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img_size = (608, 608) device = torch.device('cpu') cfg = './cfg/yolov4.cfg' model = Darknet(cfg, img_size) weights = './weights/yolov4.pt' model.load_state_dict(torch.load(weights, map_location=device)['model']) model.to(device).eval() img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) with torch.no_grad(): pred = model(img) # Apply NMS pred[:,:,:4] *= torch.Tensor(img_size*2) pred = non_max_suppression(pred) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in det: if conf > 0.7: c1 = (int(xyxy[0].item()), int(xyxy[1].item())) c2 = (int(xyxy[2].item()), int(xyxy[3].item())) # color = tuple(np.random.randint(0,255,3)) # import ipdb;ipdb.set_trace() color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) cv2.rectangle(draw_img, c1, c2, color) print(conf.item(), cls.item()) cv2.imshow("123", draw_img) cv2.waitKey(10000)
def process_img(img): input_shape = (416, 416) #input_shape = (608, 608) #input_shape = (960, 960) resized_img, ratio, _ = letterbox(img, input_shape, auto=False, scaleFill=False) resized = resized_img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB resized = np.ascontiguousarray(resized) input_data = torch.from_numpy(resized).to(device) input_data = input_data.float() input_data /= 255. if input_data.ndimension() == 3: input_data.unsqueeze_(0) input_data.to(device) out = yolov3spp(input_data) out = non_max_suppression(out, conf_thres=0.2) for i, det in enumerate(out): if det is not None and len(det): det[:, :4] = scale_coords(input_shape, det[:, :4], img.shape).round() for x1, y1, x2, y2, conf, cls in det: cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) cls_num = int(cls.cpu().detach().numpy()) cv2.putText(img, obj_list[cls_num], (x1, y1), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 255), 0) cv2.imshow("i", img) k = cv2.waitKey(0)
def analyse_rgb(): t = time.time() img0 = get_bgr() img_org = img0[:, :, ::-1] img, _, _, _ = letterbox(img0, new_shape=image_size) img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = np.ascontiguousarray(img, dtype=np.float32) img /= 255.0 img = torch.from_numpy(img).unsqueeze(0).to('cpu') pred, _ = model(img) det = non_max_suppression(pred, 0.6, 0.5)[0] if det is not None and len(det) > 0: detected_classes = [] print('+ Rescaling model') det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *coordinates, conf, cls_conf, cls in det: if classes[int(cls)] in RISKY_CLASSES: label = '%s %.2f' % (classes[int(cls)], conf) plot_one_box(coordinates, img0, label=label, color=colors[int(cls)]) print(f"+ Detected {classes[int(cls)]}") detected_classes.append({classes[int(cls)]: {'x': coordinates[0], 'y': coordinates[1]}}) n = [] for counter in detected_classes: width = img0.shape[1] x, y = counter[list(counter.keys())[0]].values() phi = (x / width * 2 - 1) * (CAMERA_FOV / 2) n.append(f"{list(counter.keys())[0]};{phi};-1|") s = str(''.join(str(x) for x in n)[:-1]) return {"raw": get_rgb(), "done": img0, "objects": s} return {'raw': img_org, 'done': img_org, 'objects': ''}
def test(cfg, data, weights=None, batch_size=16, img_size=608, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=True, hyp=None, model=None, single_cls=False): """test the metrics of the trained model :param str cfg: model cfg file :param str data: data dict :param str weights: weights path :param int batch_size: batch size :param int img_size: image size :param float iou_thres: iou threshold :param float conf_thres: confidence threshold :param float nms_thres: nms threshold :param bool save_json: Whether to save the model :param str hyp: hyperparameter :param str model: yolov4 model :param bool single_cls: only one class :return: results """ if model is None: device = select_device(opt.device) verbose = False # Initialize model model = Model(cfg, img_size).to(device) # Load weights if weights.endswith('.pt'): checkpoint = torch.load(weights, map_location=device) state_dict = intersect_dicts(checkpoint['model'], model.state_dict()) model.load_state_dict(state_dict, strict=False) elif len(weights) > 0: load_darknet_weights(model, weights) print(f'Loaded weights from {weights}!') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device verbose = False test_path = data['valid'] num_classes, names = (1, ['item']) if single_cls else (int( data['num_classes']), data['names']) # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'Pre', 'Rec', 'mAP', 'F1') precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, aver_pre, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=output_format)): targets = targets.to(device) imgs = imgs.to(device) / 255.0 _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') with torch.no_grad(): inference_output, train_output = model(imgs) if hasattr(model, 'hyp'): # if model has loss hyperparameters loss += compute_loss(train_output, targets, model)[1][:3].cpu() # GIoU, obj, cls output = non_max_suppression(inference_output, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for i, pred in enumerate(output): labels = targets[targets[:, 0] == i, 1:] num_labels = len(labels) target_class = labels[:, 0].tolist() if num_labels else [] seen += 1 if pred is None: if num_labels: stats.append( ([], torch.Tensor(), torch.Tensor(), target_class)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[i]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[i].shape[1:], box, shapes[i][0]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for det_i, det in enumerate(pred): json_dict.append({ 'image_id': image_id, 'category_id': coco91class[int(det[6])], 'bbox': [float(format(x, '.%gf' % 3)) for x in box[det_i]], 'score': float(format(det[4], '.%gf' % 5)) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if num_labels: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for j, (*pbox, _, _, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == num_labels: break # Continue if predicted class not among image classes if pcls.item() not in target_class: continue # Best iou, index between pred and targets mask = (pcls == tcls_tensor).nonzero( as_tuple=False).view(-1) iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and mask[ best_iou] not in detected: # and pcls == target_class[bi]: correct[j] = 1 detected.append(mask[best_iou]) # Append statistics (correct, conf, pcls, target_class) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] if len(stats): precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats) mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean( ), aver_pre.mean(), f_1.mean() num_targets = np.bincount( stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: num_targets = torch.zeros(1) # Print results print_format = '%20s' + '%10.3g' * 6 print(print_format % ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1)) # Print results per class if verbose and num_classes > 1 and stats: for i, class_ in enumerate(ap_class): print(print_format % (names[class_], seen, num_targets[class_], precision[i], recall[i], aver_pre[i], f_1[i])) # Save JSON if save_json and mean_ap and json_dict: try: img_ids = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(json_dict, file) # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocogt = COCO('data/coco/annotations/instances_val2017.json' ) # initialize COCO ground truth api cocodt = cocogt.loadRes('results.json') # initialize COCO pred api cocoeval = COCOeval(cocogt, cocodt, 'bbox') cocoeval.params.imgIds = img_ids # [:32] # only evaluate these images cocoeval.evaluate() cocoeval.accumulate() cocoeval.summarize() mean_ap = cocoeval.stats[1] # update mAP to pycocotools mAP except ImportError: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(num_classes) + mean_ap for i, class_ in enumerate(ap_class): maps[class_] = aver_pre[i] return (mean_pre, mean_rec, mean_ap, mf1, *(loss / len(dataloader)).tolist()), maps
def stream(cfg, classes_file, weights, socket_ip, socket_port, image_size=128, confidence_threshold=0.6, nms_thres=0.5): print('+ Initializing model') model = Darknet(cfg, image_size) print('+ Loading model') load_darknet_weights(model, weights) print('+ Fusing model') model.fuse() print('+ Loading model to CPU') model.to('cpu').eval() print('+ Loading webcam') cap = LoadKinect(img_size=image_size) print('+ Loading classes') classes = load_classes(classes_file) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] print('+ Connecting to remote socket') global sock sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((socket_ip, socket_port)) print('+ Enumerating cam') for counter, (path, img, im0, vid_cap) in enumerate(cap): t = time.time() print('+ Loading image to CPU') img = torch.from_numpy(img).unsqueeze(0).to('cpu') pred, _ = model(img) print('+ Detecting objects') det = non_max_suppression(pred, confidence_threshold, nms_thres)[0] if det is not None and len(det) > 0: detected_classes = [] print('+ Rescaling model') det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() print('+ Reading depth') depth = get_depth() depth_swap = np.swapaxes(depth, 0, 1) depth_strip1d = np.array([ np.sort(stripe)[100] for stripe in depth_swap ]).astype(np.uint8) depth_strip2d_swap = np.array([ np.ones(depth_swap.shape[1]) * depth for depth in depth_strip1d ]).astype(np.uint8) depth_strip2d = np.swapaxes(depth_strip2d_swap, 0, 1) depth_edge1d = np.zeros(depth_strip1d.shape) state = False for counter, _ in np.ndenumerate(depth_edge1d[:-1]): state = True if not state and depth_strip1d[ counter] < 230 else False depth_edge1d[counter[0]] = not state state = False state_cnt = 0 for counter, _ in np.ndenumerate(depth_edge1d[:-1]): counter = counter[0] if depth_edge1d[counter] == state: state_cnt += 1 else: if state_cnt < 10: for r in range(max(0, counter - 10), counter): depth_edge1d[counter] = state state_cnt = 0 state = depth_edge1d[counter] depth_edge1d = depth_edge1d * 255 depth_edge2d_swap = np.array([ np.ones(100) * awddawd for awddawd in depth_edge1d ]).astype(np.uint8) depth_edge2d = np.swapaxes(depth_edge2d_swap, 0, 1) for *coordinates, conf, cls_conf, cls in det: if classes[int(cls)] in RISKY_CLASSES: label = '%s %.2f' % (classes[int(cls)], conf) plot_one_box(coordinates, im0, label=label, color=colors[int(cls)]) print(f"+ Detected {classes[int(cls)]}") x_avg_depth = np.mean(depth[coordinates[0] - 5:coordinates[0] + 5]) y_avg_depth = np.mean(depth[coordinates[1] - 5:coordinates[1] + 5]) detected_classes.append({ classes[int(cls)]: { 'x': coordinates[0], 'y': coordinates[1], 'z': np.average(np.array([x_avg_depth, y_avg_depth])) } }) n = [] for counter in detected_classes: width = im0.shape[1] x, y, z = counter[list(counter.keys())[0]].values() phi = (x / width * 2 - 1) * (CAMERA_FOV / 2) n.append(f"{list(counter.keys())[0]};{phi};{z}|") sock.send(''.join(str(x) for x in n)[:-1].encode('utf-8')) print('+ Cycle took %.3fs' % (time.time() - t)) plt.imshow(bgr_to_rgb(im0)) plt.show(block=False) plt.pause(.001)
def main(): img_size = 512 # 必须是32的整数倍 [416, 512, 608] cfg = "cfg/my_yolov3.cfg" # 改成生成的.cfg文件 weights = "weights/yolov3spp-voc-512.pth".format(img_size) # 改成自己训练好的权重文件 json_path = "./data/pascal_voc_classes.json" # json标签文件 img_path = "test.jpg" assert os.path.exists(cfg), "cfg file {} dose not exist.".format(cfg) assert os.path.exists(weights), "weights file {} dose not exist.".format( weights) assert os.path.exists(json_path), "json file {} dose not exist.".format( json_path) assert os.path.exists(img_path), "image file {} dose not exist.".format( img_path) json_file = open(json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} input_size = (img_size, img_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Darknet(cfg, img_size) model.load_state_dict(torch.load(weights, map_location=device)["model"]) model.to(device) model.eval() with torch.no_grad(): # init img = torch.zeros((1, 3, img_size, img_size), device=device) model(img) img_o = cv2.imread(img_path) # BGR assert img_o is not None, "Image Not Found " + img_path img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device).float() img /= 255.0 # scale (0, 255) to (0, 1) img = img.unsqueeze(0) # add batch dimension t1 = torch_utils.time_synchronized() pred = model(img)[0] # only get inference result t2 = torch_utils.time_synchronized() print(t2 - t1) pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0] t3 = time.time() print(t3 - t2) if pred is None: print("No target detected.") exit(0) # process detections pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round() print(pred.shape) bboxes = pred[:, :4].detach().cpu().numpy() scores = pred[:, 4].detach().cpu().numpy() classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1 img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index) plt.imshow(img_o) plt.show() img_o.save("test_result.jpg")
# Padded resize img, *_ = letterbox( img0, new_shape=640) # img经过padding后的最小输入矩形图: (416, 320, 3) # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR2RGB HWC2CHW: (3, 416, 320) # ascontiguousarray函数将一个内存不连续存储的数组转换为内存连续存储的数组,使得运行速度更快。 img = np.ascontiguousarray(img, np.float32) # uint8 to fp32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 img = torch.from_numpy(img).unsqueeze(0).to(device) # 获取预测框,从80类别概率中取最大值作为预测类别,将预测框还原到原图尺寸 pred = detector.predict(img) # 得到初始预测结果 pred = bridge.nms_filter(pred) # 根据置信度,简单处理,并且压缩类别概率维度 pred[:, 0:4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round() # 参数还原到原图尺寸 # 传统nms后的结果 naive_nms_pred = bridge.nms_suppress(pred) # 传统的nms抑制后的预测结果 # 计算预测结果与标准类别的距离,删除异常框后nms crop_imgs, crop_locs = crop_boxes(pred, img0) # 从原图中裁剪预测框 gallery_feats = reid.extract_feats(crop_imgs) # 将图片转换为特征 query_feats = gallery_feats # Delete dist_query_gallery = calcu_dist(query_feats, gallery_feats) # 计算两个集合的距离 anomaly_ids = bridge.detect_anomaly(dist_query_gallery) # 检测异常框 pred = bridge.remove_amomaly(pred, anomaly_ids) # 删除异常框 dist_rank_nms_pred = bridge.nms_suppress(pred) # 非极大值抑制
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path='./valid.txt', dst_path='./output', weights=None, model=None, log_file_path='log.txt'): # 0、初始化一些参数 if not os.path.exists(dst_path): os.mkdir(dst_path) data = parse_data_cfg(data) nc = int(data['classes']) # number of classes class_names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device(opt.device) model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights)['model']) # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') pbar = tqdm(dataloader) for i, (img_tensor, _, img_path, shapes) in enumerate(pbar): start = time.time() img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) # Disable gradients with torch.no_grad(): # (1) Run model output = model(img_tensor) # [0] # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) # list (64,) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate( nms_output ): # pred: (bs, 7) -> xyxy, obj_conf*class_conf, class_conf, cls_idx ################################################ if pred is None: continue bboxes_prd = torch.cat((pred[:, 0:5], pred[:, 6].unsqueeze(1)), dim=1).cpu().numpy() ###### clw note: coord transform to origin size(because of resize and so on....) is really important !!! scale_coords(img_tensor[batch_idx].shape[1:], bboxes_prd, shapes[batch_idx][0], shapes[batch_idx][1]) # to original shape ###### for bbox in bboxes_prd: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = class_names[class_ind] classes_pred.add(class_name) score = '%.4f' % score xmin, ymin, xmax, ymax = map(str, coor) s = ' '.join([ str(img_path[batch_idx]), str(score), xmin, ymin, xmax, ymax ]) + '\n' with open( os.path.join(result_path, 'comp4_det_test_' + class_name + '.txt'), 'a') as f: f.write(s) ################################################ return calc_APs()
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, verbose=False, model=None, dataloader=None, logdir='./runs', merge=False): # Initialize/load model and set device if model is None: training = False device = torch_utils.select_device(opt.device, batch_size=batch_size) # Remove previous for f in glob.glob(os.path.join(logdir, 'test_batch*.jpg')): os.remove(f) # Load model model = torch.load( weights, map_location=device)['model'].float() # load to FP32 torch_utils.model_info(model) model.fuse() model.to(device) # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) else: # called by train.py training = True device = next(model.parameters()).device # get model device # Half half = device.type != 'cpu' and torch.cuda.device_count( ) == 1 # half precision only supported on single-GPU half = False if half: model.half() # to FP16 # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict nc = int(data['num_classes']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() losser = YoloLoss(model) # Dataloader if dataloader is None: # not training merge = opt.merge # use Merge NMS img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = kitti.create_dataloader(path, imgsz, batch_size, int(max(model.stride)), config=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = data['names'] kitti8class = data_utils.kitti8_classes() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm.tqdm(dataloader, desc=s)): targets.delete_by_mask() targets.to_float32() targ = ParamList(targets.size, True) targ.copy_from(targets) img_id = targets.get_field('img_id') classes = targets.get_field('class') bboxes = targets.get_field('bbox') targets = torch.cat( [img_id.unsqueeze(-1), classes.unsqueeze(-1), bboxes], dim=-1) img = img.to(device) img = img.half() if half else img.float() # uint8 to fp16/32 # img /= 1.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = torch_utils.time_synchronized() inf_out, train_out = model(img) # inference and training outputs t0 += torch_utils.time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters # loss += calc_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls loss += losser([x.float() for x in train_out], targ)[1][:3] # Run NMS t = torch_utils.time_synchronized() output = postprocess.apply_nms(inf_out, nc, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += torch_utils.time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds utils.clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy utils.scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = data_utils.xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': kitti8class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = data_utils.xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = metrics_utils.box_iou( pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = os.path.join(logdir, 'test_batch%g_gt.jpg' % batch_i) # filename visual_utils.plot_images(img, targets, paths, f, names) # ground truth f = os.path.join(logdir, 'test_batch%g_pred.jpg' % batch_i) visual_utils.plot_images(img, utils.output_to_target( output, width, height), paths, f, names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = metrics_utils.ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and map50 and len(jdict): imgIds = [ int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files ] f = 'detections_val2017_%s_results.json' % \ (weights.split(os.sep)[-1].replace('.pt', '') if weights else '') # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO( glob.glob('../coco/annotations/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except: print( 'WARNING: pycocotools must be installed with numpy==1.17 to run correctly. ' 'See https://github.com/cocodataset/cocoapi/issues/356') # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train_yolo(gpu, args): local_rank = args.node_rank * args.gpus + gpu writer = None if local_rank == 0: writer = SummaryWriter(args.log_path) if not torch.cuda.is_available(): print("CUDA is not available") exit(0) torch.manual_seed(0) # get dataloader train_loader, test_loader = get_dataloader(args, local_rank) # model initilization m = net_dict[args.net] model = m(num_classes=args.num_classes, pretrained=args.pretrained) torch.cuda.set_device(gpu) model.cuda(gpu) # add optimizer to parameters of the model param_g0, param_g1, param_g2 = [], [], [] for k, v in dict(model.named_parameters()).items(): if '.bias' in k: param_g2 += [v] elif 'Conv2d.weight' in k: param_g1 += [v] # apply weight decay else: param_g0 += [v] # optimizer use sgd optimizer = optim.SGD(param_g0, lr=args.lr, momentum=hyp['momentum']) ## optimizer use adam #optimizer = optim.Adam(param_g0, lr=args.lr) optimizer.add_param_group({ 'params': param_g1, 'weight_decay': hyp['weight_decay'] }) optimizer.add_param_group({'params': param_g2}) optimizer.param_groups[2]['lr'] *= 2.0 # bias learning rate del param_g0, param_g1, param_g2 if args.mixed_precision: model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O1', verbosity=0) if args.world_size > 1: dist.init_process_group( backend='nccl', # distributed backend init_method='env://', #init_method = 'tcp://127.0.0.1:9998', # distributed training init method world_size=args. world_size, # number of nodes for distributed training rank=local_rank # distributed training node rank ) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[gpu], find_unused_parameters=True) start_epoch = 0 # resume if args.resume and os.path.exists(args.resume): checkpoint = torch.load(args.resume, map_location="cuda:{}".format(local_rank)) if args.world_size > 1: model.module.load_state_dict(checkpoint['model']) else: model.load_state_dict(checkpoint['model']) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: optimizer.load_state_dict(checkpoint['optimizer']) if not args.test_only: # scheduler, lambda lr_func = lambda x: (1 + math.cos(x * math.pi / args.epoches) ) / 2 * 0.99 + 0.01 ## scheduler cosine #total_steps = len(train_loader) * args.epoches #if args.warmup_steps == 0: # args.warmup_steps = total_steps * 0.01 #lr_func = lambda x : (x / args.warmup_steps) if x < args.warmup_steps else 0.5 * (math.cos((x - args.warmup_steps)/( total_steps - args.warmup_steps) * math.pi) + 1) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_func, last_epoch=start_epoch - 1) #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_func) #scheduler.last_epoch = start_epoch results = (0, 0, 0, 0, 0, 0, 0) model.hyp = hyp best_mAP = -1. # start training for epoch in range(start_epoch, args.epoches): if not args.test_only: model.train() model.iou_ratio = 1. - (1 + math.cos( min(epoch * 2, args.epoches) * math.pi / args.epoches)) / 2 mean_loss = torch.zeros(4) if local_rank == 0: print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(train_loader), total=len(train_loader)) for idx, (imgs, targets, _, _) in pbar: if (len(targets) == 0): continue #print('- targets : ', targets) ### uncommet to vis train image and targets #import cv2 #orig_img = imgs[0] #orig_img = orig_img.permute(1,2,0).numpy().copy() #targets_c = targets.clone() #current_shape = orig_img.shape[:2] #det = torch.zeros(targets.shape[0], 4) ##print(' - targets : ', targets[:,2:6]) #det = targets_c[:,2:6] #for cx, cy, w, h in det: # cx = int(cx.numpy() * current_shape[0]) # cy = int(cy.numpy() * current_shape[1]) # w = int(w.numpy() * current_shape[0]) # h = int(h.numpy() * current_shape[1]) # # x1 = int(cx - w / 2) # y1 = int(cy - h / 2) # x2 = int(cx + w / 2) # y2 = int(cy + h / 2) # # cv2.rectangle( orig_img, (x1, y1), (x2, y2), (0,0,255), 2) # #cv2.imshow("- i", orig_img) #cv2.waitKey(0) ni = idx + epoch * len(train_loader) imgs = imgs.cuda(gpu, non_blocking=True).float() / 255. targets = targets.cuda(gpu, non_blocking=True) # run model yolo_outs = model(imgs) loss, loss_items = get_yolo_loss(model, yolo_outs, targets, regression_loss_type='GIoU') #print(" - loss_items : ", loss_items) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results ## why nomial loss with batch size 64? #loss *= args.batch_size / 64 #if ni % args.accumulate == 0: # optimizer.zero_grad() if args.mixed_precision: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if ni % args.accumulate == 0: optimizer.step() optimizer.zero_grad() # mean loss mean_loss = (mean_loss * idx + loss_items.cpu()) / (idx + 1) # tensorboard if local_rank == 0: writer.add_scalar("lr : ", scheduler.get_last_lr()[0], ni) writer.add_scalar("Mean Loss : ", mean_loss[3], ni) writer.add_scalar("IOU Loss : ", mean_loss[0], ni) writer.add_scalar("Obj Loss : ", mean_loss[1], ni) writer.add_scalar("Cls Loss : ", mean_loss[2], ni) mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 6) % ( '%g/%g' % (epoch, args.epoches - 1), mem, *loss_items.cpu(), len(targets), max(imgs[0].shape[2:])) pbar.set_description(s) # update scheduler scheduler.step() ''' Finished one epoch, save model 1. test coco mAP 2. save the better model ''' # test coco if local_rank == 0: model.eval() test_input_shape = (416, 416) results = [] processed_ids = [] coco91cls = coco80_to_coco91_class() tbar = tqdm(enumerate(test_loader), total=len(test_loader)) #tbar = tqdm(enumerate(test_loader)) for idx, (imgs, targets, img_id_tuple, orig_shape_tuple) in tbar: if (len(targets) == 0): continue c_img = imgs[0].permute(1, 2, 0).numpy().copy() imgs = imgs.to("cuda:{}".format(local_rank)).float() / 255. # run model with torch.no_grad(): thres_out = 0.05 yolo_outs = model(imgs) outputs = non_max_suppression(yolo_outs, conf_thres=thres_out) for i, det in enumerate(outputs): if not isinstance(det, torch.Tensor): continue orig_img_shape = orig_shape_tuple[i] det[:, :4] = scale_coords(test_input_shape, det[:, :4], orig_img_shape).round() img_result = convert_out_format( img_id_tuple[i], det, coco91cls, thres_out) if img_result: processed_ids.append(img_id_tuple[i]) #results.append(item for item in list(img_result)) for item in list(img_result): results.append(item) ##print(results) ## uncommet to vis results #import cv2 #max_boader, min_boader = max(orig_img_shape[1], orig_img_shape[0]), min(orig_img_shape[1], orig_img_shape[0]) #c_img = cv2.resize(c_img, (max_boader, max_boader) ) #for x1, y1, x2, y2, conf, cls in det: # padded_v = (max_boader - min_boader) / 2 # if max_boader == orig_img_shape[1]: # cv2.rectangle( c_img, (x1, y1+padded_v), (x2, y2+padded_v), (0,0,255), 2) # else: # cv2.rectangle( c_img, (x1+padded_v, y1), (x2+padded_v, y2), (0,0,255), 2) #cv2.imshow("- i", c_img) #cv2.waitKey(0) # save results for new test pred_file = "../results/val2017_bbox_results.json" save_json(pred_file, results) val_file = os.path.join(args.coco_dir, 'annotations/instances_val2017.json') test_status = get_coco_eval(val_file, pred_file, processed_ids) if args.test_only: break else: # save model if local_rank == 0: if args.world_size > 1: state_dict = model.module.state_dict() else: state_dict = model.state_dict() state = { 'model': state_dict, 'optimizer': optimizer.state_dict(), 'epoch': epoch, #'scheduler' : scheduler.state_dict() } torch.save(state, "../weights/" + args.net + "_last.pth") if test_status[0] > best_mAP: torch.save(state, "../weights/" + args.net + "_best.pth") best_mAP = test_status[0] writer.add_scalar("best [email protected]:0.95 : ", best_mAP, epoch)